1 files changed, 246 insertions, 545 deletions
diff --git a/src/libffmpeg/libavcodec/vp3.c b/src/libffmpeg/libavcodec/vp3.c
index a7a9e8bac..b5cfbb02c 100644
--- a/src/libffmpeg/libavcodec/vp3.c
+++ b/src/libffmpeg/libavcodec/vp3.c
@@ -229,6 +229,8 @@ typedef struct Vp3DecodeContext {
     DSPContext dsp;
     int flipped_image;
 
+    int qis[3];
+    int nqis;
     int quality_index;
     int last_quality_index;
 
@@ -254,17 +256,17 @@ typedef struct Vp3DecodeContext {
     Vp3Fragment *all_fragments;
     Coeff *coeffs;
     Coeff *next_coeff;
-    int u_fragment_start;
-    int v_fragment_start;
+    int fragment_start[3];
 
     ScanTable scantable;
 
     /* tables */
     uint16_t coded_dc_scale_factor[64];
     uint32_t coded_ac_scale_factor[64];
-    uint16_t coded_intra_y_dequant[64];
-    uint16_t coded_intra_c_dequant[64];
-    uint16_t coded_inter_dequant[64];
+    uint8_t base_matrix[384][64];
+    uint8_t qr_count[2][3];
+    uint8_t qr_size [2][3][64];
+    uint16_t qr_base[2][3][64];
 
     /* this is a list of indices into the all_fragments array indicating
      * which of the fragments are coded */
@@ -285,9 +287,7 @@ typedef struct Vp3DecodeContext {
 
     /* these arrays need to be on 16-byte boundaries since SSE2 operations
      * index into them */
-    DECLARE_ALIGNED_16(int16_t, intra_y_dequant[64]);
-    DECLARE_ALIGNED_16(int16_t, intra_c_dequant[64]);
-    DECLARE_ALIGNED_16(int16_t, inter_dequant[64]);
+    DECLARE_ALIGNED_16(int16_t, qmat[2][4][64]);        //<qmat[is_inter][plane]
 
     /* This table contains superblock_count * 16 entries. Each set of 16
      * numbers corresponds to the fragment indices 0..15 of the superblock.
@@ -328,8 +328,7 @@ typedef struct Vp3DecodeContext {
     int bounding_values_array[256];
 } Vp3DecodeContext;
 
-static int theora_decode_comments(AVCodecContext *avctx, GetBitContext gb);
-static int theora_decode_tables(AVCodecContext *avctx, GetBitContext gb);
+static int theora_decode_tables(AVCodecContext *avctx, GetBitContext *gb);
 
 /************************************************************************
  * VP3 specific functions
@@ -345,8 +344,6 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext gb);
 static int init_block_mapping(Vp3DecodeContext *s)
 {
     int i, j;
-    signed int hilbert_walk_y[16];
-    signed int hilbert_walk_c[16];
     signed int hilbert_walk_mb[4];
 
     int current_fragment = 0;
@@ -385,41 +382,6 @@ static int init_block_mapping(Vp3DecodeContext *s)
 
     debug_vp3("  vp3: initialize block mapping tables\n");
 
-    /* figure out hilbert pattern per these frame dimensions */
-    hilbert_walk_y[0]  = 1;
-    hilbert_walk_y[1]  = 1;
-    hilbert_walk_y[2]  = s->fragment_width;
-    hilbert_walk_y[3]  = -1;
-    hilbert_walk_y[4]  = s->fragment_width;
-    hilbert_walk_y[5]  = s->fragment_width;
-    hilbert_walk_y[6]  = 1;
-    hilbert_walk_y[7]  = -s->fragment_width;
-    hilbert_walk_y[8]  = 1;
-    hilbert_walk_y[9]  = s->fragment_width;
-    hilbert_walk_y[10]  = 1;
-    hilbert_walk_y[11] = -s->fragment_width;
-    hilbert_walk_y[12] = -s->fragment_width;
-    hilbert_walk_y[13] = -1;
-    hilbert_walk_y[14] = -s->fragment_width;
-    hilbert_walk_y[15] = 1;
-
-    hilbert_walk_c[0]  = 1;
-    hilbert_walk_c[1]  = 1;
-    hilbert_walk_c[2]  = s->fragment_width / 2;
-    hilbert_walk_c[3]  = -1;
-    hilbert_walk_c[4]  = s->fragment_width / 2;
-    hilbert_walk_c[5]  = s->fragment_width / 2;
-    hilbert_walk_c[6]  = 1;
-    hilbert_walk_c[7]  = -s->fragment_width / 2;
-    hilbert_walk_c[8]  = 1;
-    hilbert_walk_c[9]  = s->fragment_width / 2;
-    hilbert_walk_c[10]  = 1;
-    hilbert_walk_c[11] = -s->fragment_width / 2;
-    hilbert_walk_c[12] = -s->fragment_width / 2;
-    hilbert_walk_c[13] = -1;
-    hilbert_walk_c[14] = -s->fragment_width / 2;
-    hilbert_walk_c[15] = 1;
-
     hilbert_walk_mb[0] = 1;
     hilbert_walk_mb[1] = s->macroblock_width;
     hilbert_walk_mb[2] = 1;
@@ -440,7 +402,6 @@ static int init_block_mapping(Vp3DecodeContext *s)
             current_height = 0;
             superblock_row_inc = 3 * s->fragment_width -
                 (s->y_superblock_width * 4 - s->fragment_width);
-            hilbert = hilbert_walk_y;
 
             /* the first operation for this variable is to advance by 1 */
             current_fragment = -1;
@@ -454,10 +415,9 @@ static int init_block_mapping(Vp3DecodeContext *s)
             current_height = 0;
             superblock_row_inc = 3 * (s->fragment_width / 2) -
                 (s->c_superblock_width * 4 - s->fragment_width / 2);
-            hilbert = hilbert_walk_c;
 
             /* the first operation for this variable is to advance by 1 */
-            current_fragment = s->u_fragment_start - 1;
+            current_fragment = s->fragment_start[1] - 1;
 
         } else if (i == s->v_superblock_start) {
 
@@ -468,10 +428,9 @@ static int init_block_mapping(Vp3DecodeContext *s)
             current_height = 0;
             superblock_row_inc = 3 * (s->fragment_width / 2) -
                 (s->c_superblock_width * 4 - s->fragment_width / 2);
-            hilbert = hilbert_walk_c;
 
             /* the first operation for this variable is to advance by 1 */
-            current_fragment = s->v_fragment_start - 1;
+            current_fragment = s->fragment_start[2] - 1;
 
         }
 
@@ -486,7 +445,7 @@ static int init_block_mapping(Vp3DecodeContext *s)
 
         /* iterate through all 16 fragments in a superblock */
         for (j = 0; j < 16; j++) {
-            current_fragment += hilbert[j];
+            current_fragment += travel_width[j] + right_edge * travel_height[j];
             current_width += travel_width[j];
             current_height += travel_height[j];
 
@@ -593,13 +552,13 @@ static int init_block_mapping(Vp3DecodeContext *s)
                 s->macroblock_fragments[mapping_index++] = -1;
 
             /* C planes */
-            c_fragment = s->u_fragment_start +
+            c_fragment = s->fragment_start[1] +
                 (i * s->fragment_width / 4) + (j / 2);
             s->all_fragments[c_fragment].macroblock = s->macroblock_count;
             s->macroblock_fragments[mapping_index++] = c_fragment;
             debug_init("%d ", c_fragment);
 
-            c_fragment = s->v_fragment_start +
+            c_fragment = s->fragment_start[2] +
                 (i * s->fragment_width / 4) + (j / 2);
             s->all_fragments[c_fragment].macroblock = s->macroblock_count;
             s->macroblock_fragments[mapping_index++] = c_fragment;
@@ -646,94 +605,38 @@ static void init_frame(Vp3DecodeContext *s, GetBitContext *gb)
  */
 static void init_dequantizer(Vp3DecodeContext *s)
 {
-
     int ac_scale_factor = s->coded_ac_scale_factor[s->quality_index];
     int dc_scale_factor = s->coded_dc_scale_factor[s->quality_index];
-    int i, j;
+    int i, j, plane, inter, qri, bmi, bmj, qistart;
 
     debug_vp3("  vp3: initializing dequantization tables\n");
 
-    /*
-     * Scale dequantizers:
-     *
-     *   quantizer * sf
-     *   --------------
-     *        100
-     *
-     * where sf = dc_scale_factor for DC quantizer
-     *         or ac_scale_factor for AC quantizer
-     *
-     * Then, saturate the result to a lower limit of MIN_DEQUANT_VAL.
-     */
-#define SCALER 4
-
-    /* scale DC quantizers */
-    s->intra_y_dequant[0] = s->coded_intra_y_dequant[0] * dc_scale_factor / 100;
-    if (s->intra_y_dequant[0] < MIN_DEQUANT_VAL * 2)
-        s->intra_y_dequant[0] = MIN_DEQUANT_VAL * 2;
-    s->intra_y_dequant[0] *= SCALER;
-
-    s->intra_c_dequant[0] = s->coded_intra_c_dequant[0] * dc_scale_factor / 100;
-    if (s->intra_c_dequant[0] < MIN_DEQUANT_VAL * 2)
-        s->intra_c_dequant[0] = MIN_DEQUANT_VAL * 2;
-    s->intra_c_dequant[0] *= SCALER;
-
-    s->inter_dequant[0] = s->coded_inter_dequant[0] * dc_scale_factor / 100;
-    if (s->inter_dequant[0] < MIN_DEQUANT_VAL * 4)
-        s->inter_dequant[0] = MIN_DEQUANT_VAL * 4;
-    s->inter_dequant[0] *= SCALER;
-
-    /* scale AC quantizers, zigzag at the same time in preparation for
-     * the dequantization phase */
-    for (i = 1; i < 64; i++) {
-        int k= s->scantable.scantable[i];
-        j = s->scantable.permutated[i];
-
-        s->intra_y_dequant[j] = s->coded_intra_y_dequant[k] * ac_scale_factor / 100;
-        if (s->intra_y_dequant[j] < MIN_DEQUANT_VAL)
-            s->intra_y_dequant[j] = MIN_DEQUANT_VAL;
-        s->intra_y_dequant[j] *= SCALER;
-
-        s->intra_c_dequant[j] = s->coded_intra_c_dequant[k] * ac_scale_factor / 100;
-        if (s->intra_c_dequant[j] < MIN_DEQUANT_VAL)
-            s->intra_c_dequant[j] = MIN_DEQUANT_VAL;
-        s->intra_c_dequant[j] *= SCALER;
-
-        s->inter_dequant[j] = s->coded_inter_dequant[k] * ac_scale_factor / 100;
-        if (s->inter_dequant[j] < MIN_DEQUANT_VAL * 2)
-            s->inter_dequant[j] = MIN_DEQUANT_VAL * 2;
-        s->inter_dequant[j] *= SCALER;
+    for(inter=0; inter<2; inter++){
+        for(plane=0; plane<3; plane++){
+            int sum=0;
+            for(qri=0; qri<s->qr_count[inter][plane]; qri++){
+                sum+= s->qr_size[inter][plane][qri];
+                if(s->quality_index <= sum)
+                    break;
+            }
+            qistart= sum - s->qr_size[inter][plane][qri];
+            bmi= s->qr_base[inter][plane][qri  ];
+            bmj= s->qr_base[inter][plane][qri+1];
+            for(i=0; i<64; i++){
+                int coeff= (  2*(sum    -s->quality_index)*s->base_matrix[bmi][i]
+                            - 2*(qistart-s->quality_index)*s->base_matrix[bmj][i]
+                            + s->qr_size[inter][plane][qri])
+                           / (2*s->qr_size[inter][plane][qri]);
+
+                int qmin= 8<<(inter + !i);
+                int qscale= i ? ac_scale_factor : dc_scale_factor;
+
+                s->qmat[inter][plane][i]= clip((qscale * coeff)/100 * 4, qmin, 4096);
+            }
+        }
     }
 
-    memset(s->qscale_table, (FFMAX(s->intra_y_dequant[1], s->intra_c_dequant[1])+8)/16, 512); //FIXME finetune
-
-    /* print debug information as requested */
-    debug_dequantizers("intra Y dequantizers:\n");
-    for (i = 0; i < 8; i++) {
-      for (j = i * 8; j < i * 8 + 8; j++) {
-        debug_dequantizers(" %4d,", s->intra_y_dequant[j]);
-      }
-      debug_dequantizers("\n");
-    }
-    debug_dequantizers("\n");
-
-    debug_dequantizers("intra C dequantizers:\n");
-    for (i = 0; i < 8; i++) {
-      for (j = i * 8; j < i * 8 + 8; j++) {
-        debug_dequantizers(" %4d,", s->intra_c_dequant[j]);
-      }
-      debug_dequantizers("\n");
-    }
-    debug_dequantizers("\n");
-
-    debug_dequantizers("interframe dequantizers:\n");
-    for (i = 0; i < 8; i++) {
-      for (j = i * 8; j < i * 8 + 8; j++) {
-        debug_dequantizers(" %4d,", s->inter_dequant[j]);
-      }
-      debug_dequantizers("\n");
-    }
-    debug_dequantizers("\n");
+    memset(s->qscale_table, (FFMAX(s->qmat[0][0][1], s->qmat[0][1][1])+8)/16, 512); //FIXME finetune
 }
 
 /*
@@ -903,7 +806,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
                         s->all_fragments[current_fragment].next_coeff= s->coeffs + current_fragment;
                         s->coded_fragment_list[s->coded_fragment_list_index] =
                             current_fragment;
-                        if ((current_fragment >= s->u_fragment_start) &&
+                        if ((current_fragment >= s->fragment_start[1]) &&
                             (s->last_coded_y_fragment == -1) &&
                             (!first_c_fragment_seen)) {
                             s->first_coded_c_fragment = s->coded_fragment_list_index;
@@ -931,7 +834,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
                     s->all_fragments[current_fragment].next_coeff= s->coeffs + current_fragment;
                     s->coded_fragment_list[s->coded_fragment_list_index] =
                         current_fragment;
-                    if ((current_fragment >= s->u_fragment_start) &&
+                    if ((current_fragment >= s->fragment_start[1]) &&
                         (s->last_coded_y_fragment == -1) &&
                         (!first_c_fragment_seen)) {
                         s->first_coded_c_fragment = s->coded_fragment_list_index;
@@ -1146,17 +1049,10 @@ static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb)
                         motion_y[4] += motion_y[k];
                     }
 
-                    if (motion_x[4] >= 0)
-                        motion_x[4] = (motion_x[4] + 2) / 4;
-                    else
-                        motion_x[4] = (motion_x[4] - 2) / 4;
-                    motion_x[5] = motion_x[4];
-
-                    if (motion_y[4] >= 0)
-                        motion_y[4] = (motion_y[4] + 2) / 4;
-                    else
-                        motion_y[4] = (motion_y[4] - 2) / 4;
-                    motion_y[5] = motion_y[4];
+                    motion_x[5]=
+                    motion_x[4]= RSHIFT(motion_x[4], 2);
+                    motion_y[5]=
+                    motion_y[4]= RSHIFT(motion_y[4], 2);
 
                     /* vector maintenance; vector[3] is treated as the
                      * last vector in this case */
@@ -1416,7 +1312,6 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb)
   (compatible_frame[s->all_fragments[x].coding_method] == current_frame_type)
 #define FRAME_CODED(x) (s->all_fragments[x].coding_method != MODE_COPY)
 #define DC_COEFF(u) (s->coeffs[u].index ? 0 : s->coeffs[u].coeff) //FIXME do somethin to simplify this
-static inline int iabs (int x) { return ((x < 0) ? -x : x); }
 
 static void reverse_dc_prediction(Vp3DecodeContext *s,
                                   int first_fragment,
@@ -1432,24 +1327,8 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
     int x, y;
     int i = first_fragment;
 
-    /*
-     * Fragment prediction groups:
-     *
-     * 32222222226
-     * 10000000004
-     * 10000000004
-     * 10000000004
-     * 10000000004
-     *
-     * Note: Groups 5 and 7 do not exist as it would mean that the
-     * fragment's x coordinate is both 0 and (width - 1) at the same time.
-     */
-    int predictor_group;
     short predicted_dc;
 
-    /* validity flags for the left, up-left, up, and up-right fragments */
-    int fl, ful, fu, fur;
-
     /* DC values for the left, up-left, up, and up-right fragments */
     int vl, vul, vu, vur;
 
@@ -1462,26 +1341,24 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
      *   1: up multiplier
      *   2: up-right multiplier
      *   3: left multiplier
-     *   4: mask
-     *   5: right bit shift divisor (e.g., 7 means >>=7, a.k.a. div by 128)
      */
-    int predictor_transform[16][6] = {
-        {  0,  0,  0,  0,   0,  0 },
-        {  0,  0,  0,  1,   0,  0 },        // PL
-        {  0,  0,  1,  0,   0,  0 },        // PUR
-        {  0,  0, 53, 75, 127,  7 },        // PUR|PL
-        {  0,  1,  0,  0,   0,  0 },        // PU
-        {  0,  1,  0,  1,   1,  1 },        // PU|PL
-        {  0,  1,  0,  0,   0,  0 },        // PU|PUR
-        {  0,  0, 53, 75, 127,  7 },        // PU|PUR|PL
-        {  1,  0,  0,  0,   0,  0 },        // PUL
-        {  0,  0,  0,  1,   0,  0 },        // PUL|PL
-        {  1,  0,  1,  0,   1,  1 },        // PUL|PUR
-        {  0,  0, 53, 75, 127,  7 },        // PUL|PUR|PL
-        {  0,  1,  0,  0,   0,  0 },        // PUL|PU
-        {-26, 29,  0, 29,  31,  5 },        // PUL|PU|PL
-        {  3, 10,  3,  0,  15,  4 },        // PUL|PU|PUR
-        {-26, 29,  0, 29,  31,  5 }         // PUL|PU|PUR|PL
+    int predictor_transform[16][4] = {
+        {  0,  0,  0,  0},
+        {  0,  0,  0,128},        // PL
+        {  0,  0,128,  0},        // PUR
+        {  0,  0, 53, 75},        // PUR|PL
+        {  0,128,  0,  0},        // PU
+        {  0, 64,  0, 64},        // PU|PL
+        {  0,128,  0,  0},        // PU|PUR
+        {  0,  0, 53, 75},        // PU|PUR|PL
+        {128,  0,  0,  0},        // PUL
+        {  0,  0,  0,128},        // PUL|PL
+        { 64,  0, 64,  0},        // PUL|PUR
+        {  0,  0, 53, 75},        // PUL|PUR|PL
+        {  0,128,  0,  0},        // PUL|PU
+       {-104,116,  0,116},        // PUL|PU|PL
+        { 24, 80, 24,  0},        // PUL|PU|PUR
+       {-104,116,  0,116}         // PUL|PU|PUR|PL
     };
 
     /* This table shows which types of blocks can use other blocks for
@@ -1523,113 +1400,33 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
 
                 current_frame_type =
                     compatible_frame[s->all_fragments[i].coding_method];
-                predictor_group = (x == 0) + ((y == 0) << 1) +
-                    ((x + 1 == fragment_width) << 2);
-                debug_dc_pred(" frag %d: group %d, orig DC = %d, ",
-                    i, predictor_group, DC_COEFF(i));
-
-                switch (predictor_group) {
-
-                case 0:
-                    /* main body of fragments; consider all 4 possible
-                     * fragments for prediction */
-
-                    /* calculate the indices of the predicting fragments */
-                    ul = i - fragment_width - 1;
-                    u = i - fragment_width;
-                    ur = i - fragment_width + 1;
-                    l = i - 1;
-
-                    /* fetch the DC values for the predicting fragments */
-                    vul = DC_COEFF(ul);
-                    vu = DC_COEFF(u);
-                    vur = DC_COEFF(ur);
-                    vl = DC_COEFF(l);
-
-                    /* figure out which fragments are valid */
-                    ful = FRAME_CODED(ul) && COMPATIBLE_FRAME(ul);
-                    fu = FRAME_CODED(u) && COMPATIBLE_FRAME(u);
-                    fur = FRAME_CODED(ur) && COMPATIBLE_FRAME(ur);
-                    fl = FRAME_CODED(l) && COMPATIBLE_FRAME(l);
-
-                    /* decide which predictor transform to use */
-                    transform = (fl*PL) | (fu*PU) | (ful*PUL) | (fur*PUR);
-
-                    break;
-
-                case 1:
-                    /* left column of fragments, not including top corner;
-                     * only consider up and up-right fragments */
+                debug_dc_pred(" frag %d: orig DC = %d, ",
+                    i, DC_COEFF(i));
 
-                    /* calculate the indices of the predicting fragments */
-                    u = i - fragment_width;
-                    ur = i - fragment_width + 1;
-
-                    /* fetch the DC values for the predicting fragments */
-                    vu = DC_COEFF(u);
-                    vur = DC_COEFF(ur);
-
-                    /* figure out which fragments are valid */
-                    fur = FRAME_CODED(ur) && COMPATIBLE_FRAME(ur);
-                    fu = FRAME_CODED(u) && COMPATIBLE_FRAME(u);
-
-                    /* decide which predictor transform to use */
-                    transform = (fu*PU) | (fur*PUR);
-
-                    break;
-
-                case 2:
-                case 6:
-                    /* top row of fragments, not including top-left frag;
-                     * only consider the left fragment for prediction */
-
-                    /* calculate the indices of the predicting fragments */
-                    l = i - 1;
-
-                    /* fetch the DC values for the predicting fragments */
+                transform= 0;
+                if(x){
+                    l= i-1;
                     vl = DC_COEFF(l);
-
-                    /* figure out which fragments are valid */
-                    fl = FRAME_CODED(l) && COMPATIBLE_FRAME(l);
-
-                    /* decide which predictor transform to use */
-                    transform = (fl*PL);
-
-                    break;
-
-                case 3:
-                    /* top-left fragment */
-
-                    /* nothing to predict from in this case */
-                    transform = 0;
-
-                    break;
-
-                case 4:
-                    /* right column of fragments, not including top corner;
-                     * consider up-left, up, and left fragments for
-                     * prediction */
-
-                    /* calculate the indices of the predicting fragments */
-                    ul = i - fragment_width - 1;
-                    u = i - fragment_width;
-                    l = i - 1;
-
-                    /* fetch the DC values for the predicting fragments */
-                    vul = DC_COEFF(ul);
+                    if(FRAME_CODED(l) && COMPATIBLE_FRAME(l))
+                        transform |= PL;
+                }
+                if(y){
+                    u= i-fragment_width;
                     vu = DC_COEFF(u);
-                    vl = DC_COEFF(l);
-
-                    /* figure out which fragments are valid */
-                    ful = FRAME_CODED(ul) && COMPATIBLE_FRAME(ul);
-                    fu = FRAME_CODED(u) && COMPATIBLE_FRAME(u);
-                    fl = FRAME_CODED(l) && COMPATIBLE_FRAME(l);
-
-                    /* decide which predictor transform to use */
-                    transform = (fl*PL) | (fu*PU) | (ful*PUL);
-
-                    break;
-
+                    if(FRAME_CODED(u) && COMPATIBLE_FRAME(u))
+                        transform |= PU;
+                    if(x){
+                        ul= i-fragment_width-1;
+                        vul = DC_COEFF(ul);
+                        if(FRAME_CODED(ul) && COMPATIBLE_FRAME(ul))
+                            transform |= PUL;
+                    }
+                    if(x + 1 < fragment_width){
+                        ur= i-fragment_width+1;
+                        vur = DC_COEFF(ur);
+                        if(FRAME_CODED(ur) && COMPATIBLE_FRAME(ur))
+                            transform |= PUR;
+                    }
                 }
 
                 debug_dc_pred("transform = %d, ", transform);
@@ -1651,22 +1448,16 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
                         (predictor_transform[transform][2] * vur) +
                         (predictor_transform[transform][3] * vl);
 
-                    /* if there is a shift value in the transform, add
-                     * the sign bit before the shift */
-                    if (predictor_transform[transform][5] != 0) {
-                        predicted_dc += ((predicted_dc >> 15) &
-                            predictor_transform[transform][4]);
-                        predicted_dc >>= predictor_transform[transform][5];
-                    }
+                    predicted_dc /= 128;
 
                     /* check for outranging on the [ul u l] and
                      * [ul u ur l] predictors */
                     if ((transform == 13) || (transform == 15)) {
-                        if (iabs(predicted_dc - vu) > 128)
+                        if (ABS(predicted_dc - vu) > 128)
                             predicted_dc = vu;
-                        else if (iabs(predicted_dc - vl) > 128)
+                        else if (ABS(predicted_dc - vl) > 128)
                             predicted_dc = vl;
-                        else if (iabs(predicted_dc - vul) > 128)
+                        else if (ABS(predicted_dc - vul) > 128)
                             predicted_dc = vul;
                     }
 
@@ -1707,73 +1498,32 @@ static void vertical_filter(unsigned char *first_pixel, int stride,
  */
 static void render_slice(Vp3DecodeContext *s, int slice)
 {
-    int x, y;
+    int x;
     int m, n;
-    int i;  /* indicates current fragment */
     int16_t *dequantizer;
     DECLARE_ALIGNED_16(DCTELEM, block[64]);
-    unsigned char *output_plane;
-    unsigned char *last_plane;
-    unsigned char *golden_plane;
-    int stride;
     int motion_x = 0xdeadbeef, motion_y = 0xdeadbeef;
-    int upper_motion_limit, lower_motion_limit;
     int motion_halfpel_index;
     uint8_t *motion_source;
     int plane;
-    int plane_width;
-    int plane_height;
-    int slice_height;
     int current_macroblock_entry = slice * s->macroblock_width * 6;
-    int fragment_width;
 
     if (slice >= s->macroblock_height)
         return;
 
     for (plane = 0; plane < 3; plane++) {
+        uint8_t *output_plane = s->current_frame.data    [plane];
+        uint8_t *  last_plane = s->   last_frame.data    [plane];
+        uint8_t *golden_plane = s-> golden_frame.data    [plane];
+        int stride            = s->current_frame.linesize[plane];
+        int plane_width       = s->width  >> !!plane;
+        int plane_height      = s->height >> !!plane;
+        int y =        slice *  FRAGMENT_PIXELS << !plane ;
+        int slice_height = y + (FRAGMENT_PIXELS << !plane);
+        int i = s->macroblock_fragments[current_macroblock_entry + plane + 3*!!plane];
+
+        if (!s->flipped_image) stride = -stride;
 
-        /* set up plane-specific parameters */
-        if (plane == 0) {
-            output_plane = s->current_frame.data[0];
-            last_plane = s->last_frame.data[0];
-            golden_plane = s->golden_frame.data[0];
-            stride = s->current_frame.linesize[0];
-            if (!s->flipped_image) stride = -stride;
-            upper_motion_limit = 7 * s->current_frame.linesize[0];
-            lower_motion_limit = s->height * s->current_frame.linesize[0] + s->width - 8;
-            y = slice * FRAGMENT_PIXELS * 2;
-            plane_width = s->width;
-            plane_height = s->height;
-            slice_height = y + FRAGMENT_PIXELS * 2;
-            i = s->macroblock_fragments[current_macroblock_entry + 0];
-        } else if (plane == 1) {
-            output_plane = s->current_frame.data[1];
-            last_plane = s->last_frame.data[1];
-            golden_plane = s->golden_frame.data[1];
-            stride = s->current_frame.linesize[1];
-            if (!s->flipped_image) stride = -stride;
-            upper_motion_limit = 7 * s->current_frame.linesize[1];
-            lower_motion_limit = (s->height / 2) * s->current_frame.linesize[1] + (s->width / 2) - 8;
-            y = slice * FRAGMENT_PIXELS;
-            plane_width = s->width / 2;
-            plane_height = s->height / 2;
-            slice_height = y + FRAGMENT_PIXELS;
-            i = s->macroblock_fragments[current_macroblock_entry + 4];
-        } else {
-            output_plane = s->current_frame.data[2];
-            last_plane = s->last_frame.data[2];
-            golden_plane = s->golden_frame.data[2];
-            stride = s->current_frame.linesize[2];
-            if (!s->flipped_image) stride = -stride;
-            upper_motion_limit = 7 * s->current_frame.linesize[2];
-            lower_motion_limit = (s->height / 2) * s->current_frame.linesize[2] + (s->width / 2) - 8;
-            y = slice * FRAGMENT_PIXELS;
-            plane_width = s->width / 2;
-            plane_height = s->height / 2;
-            slice_height = y + FRAGMENT_PIXELS;
-            i = s->macroblock_fragments[current_macroblock_entry + 5];
-        }
-        fragment_width = plane_width / FRAGMENT_PIXELS;
 
         if(ABS(stride) > 2048)
             return; //various tables are fixed size
@@ -1855,12 +1605,9 @@ static void render_slice(Vp3DecodeContext *s, int slice)
                                 motion_source + stride + 1 + d,
                                 stride, 8);
                         }
-                        dequantizer = s->inter_dequant;
+                        dequantizer = s->qmat[1][plane];
                     }else{
-                        if (plane == 0)
-                            dequantizer = s->intra_y_dequant;
-                        else
-                            dequantizer = s->intra_c_dequant;
+                        dequantizer = s->qmat[0][plane];
                     }
 
                     /* dequantize the DCT coefficients */
@@ -1935,7 +1682,7 @@ static void render_slice(Vp3DecodeContext *s, int slice)
                       (s->all_fragments[i - 1].coding_method != MODE_COPY)) )) {
                     horizontal_filter(
                         output_plane + s->all_fragments[i].first_pixel + 7*stride,
-                        -stride, bounding_values);
+                        -stride, s->bounding_values_array + 127);
                 }
 
                 /* perform the top edge filter if:
@@ -1951,7 +1698,7 @@ static void render_slice(Vp3DecodeContext *s, int slice)
                       (s->all_fragments[i - fragment_width].coding_method != MODE_COPY)) )) {
                     vertical_filter(
                         output_plane + s->all_fragments[i].first_pixel - stride,
-                        -stride, bounding_values);
+                        -stride, s->bounding_values_array + 127);
                 }
 #endif
             }
@@ -1975,7 +1722,7 @@ static void horizontal_filter(unsigned char *first_pixel, int stride,
     unsigned char *end;
     int filter_value;
 
-    for (end= first_pixel + 8*stride; first_pixel < end; first_pixel += stride) {
+    for (end= first_pixel + 8*stride; first_pixel != end; first_pixel += stride) {
         filter_value =
             (first_pixel[-2] - first_pixel[ 1])
          +3*(first_pixel[ 0] - first_pixel[-1]);
@@ -2004,11 +1751,8 @@ static void vertical_filter(unsigned char *first_pixel, int stride,
 
 static void apply_loop_filter(Vp3DecodeContext *s)
 {
-    int x, y, plane;
-    int width, height;
-    int fragment;
-    int stride;
-    unsigned char *plane_data;
+    int plane;
+    int x, y;
     int *bounding_values= s->bounding_values_array+127;
 
 #if 0
@@ -2033,29 +1777,12 @@ static void apply_loop_filter(Vp3DecodeContext *s)
 #endif
 
     for (plane = 0; plane < 3; plane++) {
-
-        if (plane == 0) {
-            /* Y plane parameters */
-            fragment = 0;
-            width = s->fragment_width;
-            height = s->fragment_height;
-            stride = s->current_frame.linesize[0];
-            plane_data = s->current_frame.data[0];
-        } else if (plane == 1) {
-            /* U plane parameters */
-            fragment = s->u_fragment_start;
-            width = s->fragment_width / 2;
-            height = s->fragment_height / 2;
-            stride = s->current_frame.linesize[1];
-            plane_data = s->current_frame.data[1];
-        } else {
-            /* V plane parameters */
-            fragment = s->v_fragment_start;
-            width = s->fragment_width / 2;
-            height = s->fragment_height / 2;
-            stride = s->current_frame.linesize[2];
-            plane_data = s->current_frame.data[2];
-        }
+        int width           = s->fragment_width  >> !!plane;
+        int height          = s->fragment_height >> !!plane;
+        int fragment        = s->fragment_start        [plane];
+        int stride          = s->current_frame.linesize[plane];
+        uint8_t *plane_data = s->current_frame.data    [plane];
+        if (!s->flipped_image) stride = -stride;
 
         for (y = 0; y < height; y++) {
 
@@ -2065,7 +1792,7 @@ START_TIMER
                 if ((x > 0) &&
                     (s->all_fragments[fragment].coding_method != MODE_COPY)) {
                     horizontal_filter(
-                        plane_data + s->all_fragments[fragment].first_pixel - 7*stride,
+                        plane_data + s->all_fragments[fragment].first_pixel,
                         stride, bounding_values);
                 }
 
@@ -2073,7 +1800,7 @@ START_TIMER
                 if ((y > 0) &&
                     (s->all_fragments[fragment].coding_method != MODE_COPY)) {
                     vertical_filter(
-                        plane_data + s->all_fragments[fragment].first_pixel + stride,
+                        plane_data + s->all_fragments[fragment].first_pixel,
                         stride, bounding_values);
                 }
 
@@ -2084,7 +1811,7 @@ START_TIMER
                     (s->all_fragments[fragment].coding_method != MODE_COPY) &&
                     (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) {
                     horizontal_filter(
-                        plane_data + s->all_fragments[fragment + 1].first_pixel - 7*stride,
+                        plane_data + s->all_fragments[fragment + 1].first_pixel,
                         stride, bounding_values);
                 }
 
@@ -2095,7 +1822,7 @@ START_TIMER
                     (s->all_fragments[fragment].coding_method != MODE_COPY) &&
                     (s->all_fragments[fragment + width].coding_method == MODE_COPY)) {
                     vertical_filter(
-                        plane_data + s->all_fragments[fragment + width].first_pixel + stride,
+                        plane_data + s->all_fragments[fragment + width].first_pixel,
                         stride, bounding_values);
                 }
 
@@ -2131,7 +1858,7 @@ static void vp3_calculate_pixel_addresses(Vp3DecodeContext *s)
     }
 
     /* U plane */
-    i = s->u_fragment_start;
+    i = s->fragment_start[1];
     for (y = s->fragment_height / 2; y > 0; y--) {
         for (x = 0; x < s->fragment_width / 2; x++) {
             s->all_fragments[i++].first_pixel =
@@ -2144,7 +1871,7 @@ static void vp3_calculate_pixel_addresses(Vp3DecodeContext *s)
     }
 
     /* V plane */
-    i = s->v_fragment_start;
+    i = s->fragment_start[2];
     for (y = s->fragment_height / 2; y > 0; y--) {
         for (x = 0; x < s->fragment_width / 2; x++) {
             s->all_fragments[i++].first_pixel =
@@ -2178,7 +1905,7 @@ static void theora_calculate_pixel_addresses(Vp3DecodeContext *s)
     }
 
     /* U plane */
-    i = s->u_fragment_start;
+    i = s->fragment_start[1];
     for (y = 1; y <= s->fragment_height / 2; y++) {
         for (x = 0; x < s->fragment_width / 2; x++) {
             s->all_fragments[i++].first_pixel =
@@ -2191,7 +1918,7 @@ static void theora_calculate_pixel_addresses(Vp3DecodeContext *s)
     }
 
     /* V plane */
-    i = s->v_fragment_start;
+    i = s->fragment_start[2];
     for (y = 1; y <= s->fragment_height / 2; y++) {
         for (x = 0; x < s->fragment_width / 2; x++) {
             s->all_fragments[i++].first_pixel =
@@ -2210,7 +1937,7 @@ static void theora_calculate_pixel_addresses(Vp3DecodeContext *s)
 static int vp3_decode_init(AVCodecContext *avctx)
 {
     Vp3DecodeContext *s = avctx->priv_data;
-    int i;
+    int i, inter, plane;
     int c_width;
     int c_height;
     int y_superblock_count;
@@ -2261,8 +1988,8 @@ static int vp3_decode_init(AVCodecContext *avctx)
 
     /* fragment count covers all 8x8 blocks for all 3 planes */
     s->fragment_count = s->fragment_width * s->fragment_height * 3 / 2;
-    s->u_fragment_start = s->fragment_width * s->fragment_height;
-    s->v_fragment_start = s->fragment_width * s->fragment_height * 5 / 4;
+    s->fragment_start[1] = s->fragment_width * s->fragment_height;
+    s->fragment_start[2] = s->fragment_width * s->fragment_height * 5 / 4;
 
     debug_init("  Y plane: %d x %d\n", s->width, s->height);
     debug_init("  C plane: %d x %d\n", c_width, c_height);
@@ -2278,8 +2005,8 @@ static int vp3_decode_init(AVCodecContext *avctx)
         s->fragment_count,
         s->fragment_width,
         s->fragment_height,
-        s->u_fragment_start,
-        s->v_fragment_start);
+        s->fragment_start[1],
+        s->fragment_start[2]);
 
     s->all_fragments = av_malloc(s->fragment_count * sizeof(Vp3Fragment));
     s->coeffs = av_malloc(s->fragment_count * sizeof(Coeff) * 65);
@@ -2293,14 +2020,23 @@ static int vp3_decode_init(AVCodecContext *avctx)
         for (i = 0; i < 64; i++)
             s->coded_ac_scale_factor[i] = vp31_ac_scale_factor[i];
         for (i = 0; i < 64; i++)
-            s->coded_intra_y_dequant[i] = vp31_intra_y_dequant[i];
+            s->base_matrix[0][i] = vp31_intra_y_dequant[i];
         for (i = 0; i < 64; i++)
-            s->coded_intra_c_dequant[i] = vp31_intra_c_dequant[i];
+            s->base_matrix[1][i] = vp31_intra_c_dequant[i];
         for (i = 0; i < 64; i++)
-            s->coded_inter_dequant[i] = vp31_inter_dequant[i];
+            s->base_matrix[2][i] = vp31_inter_dequant[i];
         for (i = 0; i < 64; i++)
             s->filter_limit_values[i] = vp31_filter_limit_values[i];
 
+        for(inter=0; inter<2; inter++){
+            for(plane=0; plane<3; plane++){
+                s->qr_count[inter][plane]= 1;
+                s->qr_size [inter][plane][0]= 63;
+                s->qr_base [inter][plane][0]=
+                s->qr_base [inter][plane][1]= 2*inter + (!!plane)*!inter;
+            }
+        }
+
         /* init VLC tables */
         for (i = 0; i < 16; i++) {
 
@@ -2418,10 +2154,10 @@ static int vp3_decode_frame(AVCodecContext *avctx,
         switch(ptype)
         {
             case 1:
-                theora_decode_comments(avctx, gb);
+                theora_decode_comments(avctx, &gb);
                 break;
             case 2:
-                theora_decode_tables(avctx, gb);
+                theora_decode_tables(avctx, &gb);
                     init_dequantizer(s);
                 break;
             default:
@@ -2435,9 +2171,13 @@ static int vp3_decode_frame(AVCodecContext *avctx,
     if (!s->theora)
         skip_bits(&gb, 1);
     s->last_quality_index = s->quality_index;
-    s->quality_index = get_bits(&gb, 6);
-    if (s->theora >= 0x030200)
-        skip_bits1(&gb);
+
+    s->nqis=0;
+    do{
+        s->qis[s->nqis++]= get_bits(&gb, 6);
+    } while(s->theora >= 0x030200 && s->nqis<3 && get_bits1(&gb));
+
+    s->quality_index= s->qis[0];
 
     if (s->avctx->debug & FF_DEBUG_PICT_INFO)
         av_log(s->avctx, AV_LOG_INFO, " VP3 %sframe #%d: Q index = %d\n",
@@ -2486,7 +2226,7 @@ static int vp3_decode_frame(AVCodecContext *avctx,
         }
 
         /* golden frame is also the current frame */
-        memcpy(&s->current_frame, &s->golden_frame, sizeof(AVFrame));
+        s->current_frame= s->golden_frame;
 
         /* time to figure out pixel addresses? */
         if (!s->pixel_addresses_inited)
@@ -2495,10 +2235,15 @@ static int vp3_decode_frame(AVCodecContext *avctx,
                 vp3_calculate_pixel_addresses(s);
             else
                 theora_calculate_pixel_addresses(s);
+            s->pixel_addresses_inited = 1;
         }
     } else {
         /* allocate a new current frame */
         s->current_frame.reference = 3;
+        if (!s->pixel_addresses_inited) {
+            av_log(s->avctx, AV_LOG_ERROR, "vp3: first frame not a keyframe\n");
+            return -1;
+        }
         if(avctx->get_buffer(avctx, &s->current_frame) < 0) {
             av_log(s->avctx, AV_LOG_ERROR, "vp3: get_buffer() failed\n");
             return -1;
@@ -2553,9 +2298,9 @@ if (!s->keyframe) {
 
     reverse_dc_prediction(s, 0, s->fragment_width, s->fragment_height);
     if ((avctx->flags & CODEC_FLAG_GRAY) == 0) {
-        reverse_dc_prediction(s, s->u_fragment_start,
+        reverse_dc_prediction(s, s->fragment_start[1],
             s->fragment_width / 2, s->fragment_height / 2);
-        reverse_dc_prediction(s, s->v_fragment_start,
+        reverse_dc_prediction(s, s->fragment_start[2],
             s->fragment_width / 2, s->fragment_height / 2);
     }
     STOP_TIMER("reverse_dc_prediction")}
@@ -2582,7 +2327,7 @@ if (!s->keyframe) {
         avctx->release_buffer(avctx, &s->last_frame);
 
     /* shuffle frames (last = current) */
-    memcpy(&s->last_frame, &s->current_frame, sizeof(AVFrame));
+    s->last_frame= s->current_frame;
     s->current_frame.data[0]= NULL; /* ensure that we catch any access to this released frame */
 
     return buf_size;
@@ -2646,19 +2391,12 @@ static int read_huffman_tree(AVCodecContext *avctx, GetBitContext *gb)
     return 0;
 }
 
-static int theora_decode_header(AVCodecContext *avctx, GetBitContext gb)
+static int theora_decode_header(AVCodecContext *avctx, GetBitContext *gb)
 {
     Vp3DecodeContext *s = avctx->priv_data;
-    int major, minor, micro;
 
-    major = get_bits(&gb, 8); /* version major */
-    minor = get_bits(&gb, 8); /* version minor */
-    micro = get_bits(&gb, 8); /* version micro */
-    av_log(avctx, AV_LOG_INFO, "Theora bitstream version %d.%d.%d\n",
-        major, minor, micro);
-
-    /* FIXME: endianess? */
-    s->theora = (major << 16) | (minor << 8) | micro;
+    s->theora = get_bits_long(gb, 24);
+    av_log(avctx, AV_LOG_INFO, "Theora bitstream version %X\n", s->theora);
 
     /* 3.2.0 aka alpha3 has the same frame orientation as original vp3 */
     /* but previous versions have the image flipped relative to vp3 */
@@ -2668,8 +2406,8 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext gb)
         av_log(avctx, AV_LOG_DEBUG, "Old (<alpha3) Theora bitstream, flipped image\n");
     }
 
-    s->width = get_bits(&gb, 16) << 4;
-    s->height = get_bits(&gb, 16) << 4;
+    s->width = get_bits(gb, 16) << 4;
+    s->height = get_bits(gb, 16) << 4;
 
     if(avcodec_check_dimensions(avctx, s->width, s->height)){
         av_log(avctx, AV_LOG_ERROR, "Invalid dimensions (%dx%d)\n", s->width, s->height);
@@ -2679,47 +2417,49 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext gb)
 
     if (s->theora >= 0x030400)
     {
-        skip_bits(&gb, 32); /* total number of superblocks in a frame */
+        skip_bits(gb, 32); /* total number of superblocks in a frame */
         // fixme, the next field is 36bits long
-        skip_bits(&gb, 32); /* total number of blocks in a frame */
-        skip_bits(&gb, 4); /* total number of blocks in a frame */
-        skip_bits(&gb, 32); /* total number of macroblocks in a frame */
+        skip_bits(gb, 32); /* total number of blocks in a frame */
+        skip_bits(gb, 4); /* total number of blocks in a frame */
+        skip_bits(gb, 32); /* total number of macroblocks in a frame */
 
-        skip_bits(&gb, 24); /* frame width */
-        skip_bits(&gb, 24); /* frame height */
+        skip_bits(gb, 24); /* frame width */
+        skip_bits(gb, 24); /* frame height */
     }
     else
     {
-        skip_bits(&gb, 24); /* frame width */
-        skip_bits(&gb, 24); /* frame height */
+        skip_bits(gb, 24); /* frame width */
+        skip_bits(gb, 24); /* frame height */
     }
 
-    skip_bits(&gb, 8); /* offset x */
-    skip_bits(&gb, 8); /* offset y */
+  if (s->theora >= 0x030200) {
+    skip_bits(gb, 8); /* offset x */
+    skip_bits(gb, 8); /* offset y */
+  }
 
-    skip_bits(&gb, 32); /* fps numerator */
-    skip_bits(&gb, 32); /* fps denumerator */
-    skip_bits(&gb, 24); /* aspect numerator */
-    skip_bits(&gb, 24); /* aspect denumerator */
+    skip_bits(gb, 32); /* fps numerator */
+    skip_bits(gb, 32); /* fps denumerator */
+    skip_bits(gb, 24); /* aspect numerator */
+    skip_bits(gb, 24); /* aspect denumerator */
 
     if (s->theora < 0x030200)
-        skip_bits(&gb, 5); /* keyframe frequency force */
-    skip_bits(&gb, 8); /* colorspace */
+        skip_bits(gb, 5); /* keyframe frequency force */
+    skip_bits(gb, 8); /* colorspace */
     if (s->theora >= 0x030400)
-        skip_bits(&gb, 2); /* pixel format: 420,res,422,444 */
-    skip_bits(&gb, 24); /* bitrate */
+        skip_bits(gb, 2); /* pixel format: 420,res,422,444 */
+    skip_bits(gb, 24); /* bitrate */
 
-    skip_bits(&gb, 6); /* quality hint */
+    skip_bits(gb, 6); /* quality hint */
 
     if (s->theora >= 0x030200)
     {
-        skip_bits(&gb, 5); /* keyframe frequency force */
+        skip_bits(gb, 5); /* keyframe frequency force */
 
         if (s->theora < 0x030400)
-            skip_bits(&gb, 5); /* spare bits */
+            skip_bits(gb, 5); /* spare bits */
     }
 
-//    align_get_bits(&gb);
+//    align_get_bits(gb);
 
     avctx->width = s->width;
     avctx->height = s->height;
@@ -2727,132 +2467,89 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext gb)
     return 0;
 }
 
-static inline int theora_get_32bit(GetBitContext gb)
-{
-    int ret = get_bits(&gb, 8);
-    ret += get_bits(&gb, 8) << 8;
-    ret += get_bits(&gb, 8) << 16;
-    ret += get_bits(&gb, 8) << 24;
-
-    return ret;
-}
-
-static int theora_decode_comments(AVCodecContext *avctx, GetBitContext gb)
-{
-    Vp3DecodeContext *s = avctx->priv_data;
-    int len;
-
-    if (s->theora <= 0x030200)
-    {
-        int i, comments;
-
-        // vendor string
-        len = get_bits_long(&gb, 32);
-        len = le2me_32(len);
-        while(len--)
-            skip_bits(&gb, 8);
-
-        // user comments
-        comments = get_bits_long(&gb, 32);
-        comments = le2me_32(comments);
-        for (i = 0; i < comments; i++)
-        {
-            len = get_bits_long(&gb, 32);
-            len = be2me_32(len);
-            while(len--)
-                skip_bits(&gb, 8);
-        }
-    }
-    else
-    {
-        do {
-            len = get_bits_long(&gb, 32);
-            len = le2me_32(len);
-            if (len <= 0)
-                break;
-            while (len--)
-                skip_bits(&gb, 8);
-        } while (1);
-    }
-    return 0;
-}
-
-static int theora_decode_tables(AVCodecContext *avctx, GetBitContext gb)
+static int theora_decode_tables(AVCodecContext *avctx, GetBitContext *gb)
 {
     Vp3DecodeContext *s = avctx->priv_data;
-    int i, n, matrices;
+    int i, n, matrices, inter, plane;
 
     if (s->theora >= 0x030200) {
-        n = get_bits(&gb, 3);
+        n = get_bits(gb, 3);
         /* loop filter limit values table */
         for (i = 0; i < 64; i++)
-            s->filter_limit_values[i] = get_bits(&gb, n);
+            s->filter_limit_values[i] = get_bits(gb, n);
     }
 
     if (s->theora >= 0x030200)
-        n = get_bits(&gb, 4) + 1;
+        n = get_bits(gb, 4) + 1;
     else
         n = 16;
     /* quality threshold table */
     for (i = 0; i < 64; i++)
-        s->coded_ac_scale_factor[i] = get_bits(&gb, n);
+        s->coded_ac_scale_factor[i] = get_bits(gb, n);
 
     if (s->theora >= 0x030200)
-        n = get_bits(&gb, 4) + 1;
+        n = get_bits(gb, 4) + 1;
     else
         n = 16;
     /* dc scale factor table */
     for (i = 0; i < 64; i++)
-        s->coded_dc_scale_factor[i] = get_bits(&gb, n);
+        s->coded_dc_scale_factor[i] = get_bits(gb, n);
 
     if (s->theora >= 0x030200)
-        matrices = get_bits(&gb, 9) + 1;
+        matrices = get_bits(gb, 9) + 1;
     else
         matrices = 3;
-    if (matrices != 3) {
-        av_log(avctx,AV_LOG_ERROR, "unsupported matrices: %d\n", matrices);
-//        return -1;
-    }
-    /* y coeffs */
-    for (i = 0; i < 64; i++)
-        s->coded_intra_y_dequant[i] = get_bits(&gb, 8);
-
-    /* uv coeffs */
-    for (i = 0; i < 64; i++)
-        s->coded_intra_c_dequant[i] = get_bits(&gb, 8);
 
-    /* inter coeffs */
-    for (i = 0; i < 64; i++)
-        s->coded_inter_dequant[i] = get_bits(&gb, 8);
+    if(matrices > 384){
+        av_log(avctx, AV_LOG_ERROR, "invalid number of base matrixes\n");
+        return -1;
+    }
 
-    /* skip unknown matrices */
-    n = matrices - 3;
-    while(n--)
+    for(n=0; n<matrices; n++){
         for (i = 0; i < 64; i++)
-            skip_bits(&gb, 8);
+            s->base_matrix[n][i]= get_bits(gb, 8);
+    }
 
-    for (i = 0; i <= 1; i++) {
-        for (n = 0; n <= 2; n++) {
-            int newqr;
-            if (i > 0 || n > 0)
-                newqr = get_bits(&gb, 1);
-            else
-                newqr = 1;
+    for (inter = 0; inter <= 1; inter++) {
+        for (plane = 0; plane <= 2; plane++) {
+            int newqr= 1;
+            if (inter || plane > 0)
+                newqr = get_bits(gb, 1);
             if (!newqr) {
-                if (i > 0)
-                    get_bits(&gb, 1);
-            }
-            else {
+                int qtj, plj;
+                if(inter && get_bits(gb, 1)){
+                    qtj = 0;
+                    plj = plane;
+                }else{
+                    qtj= (3*inter + plane - 1) / 3;
+                    plj= (plane + 2) % 3;
+                }
+                s->qr_count[inter][plane]= s->qr_count[qtj][plj];
+                memcpy(s->qr_size[inter][plane], s->qr_size[qtj][plj], sizeof(s->qr_size[0][0]));
+                memcpy(s->qr_base[inter][plane], s->qr_base[qtj][plj], sizeof(s->qr_base[0][0]));
+            } else {
+                int qri= 0;
                 int qi = 0;
-                skip_bits(&gb, av_log2(matrices-1)+1);
-                while (qi < 63) {
-                    qi += get_bits(&gb, av_log2(63-qi)+1) + 1;
-                    skip_bits(&gb, av_log2(matrices-1)+1);
+
+                for(;;){
+                    i= get_bits(gb, av_log2(matrices-1)+1);
+                    if(i>= matrices){
+                        av_log(avctx, AV_LOG_ERROR, "invalid base matrix index\n");
+                        return -1;
+                    }
+                    s->qr_base[inter][plane][qri]= i;
+                    if(qi >= 63)
+                        break;
+                    i = get_bits(gb, av_log2(63-qi)+1) + 1;
+                    s->qr_size[inter][plane][qri++]= i;
+                    qi += i;
                 }
+
                 if (qi > 63) {
                     av_log(avctx, AV_LOG_ERROR, "invalid qi %d > 63\n", qi);
                     return -1;
                 }
+                s->qr_count[inter][plane]= qri;
             }
         }
     }
@@ -2861,11 +2558,11 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext gb)
     for (s->hti = 0; s->hti < 80; s->hti++) {
         s->entries = 0;
         s->huff_code_size = 1;
-        if (!get_bits(&gb, 1)) {
+        if (!get_bits(gb, 1)) {
             s->hbits = 0;
-            read_huffman_tree(avctx, &gb);
+            read_huffman_tree(avctx, gb);
             s->hbits = 1;
-            read_huffman_tree(avctx, &gb);
+            read_huffman_tree(avctx, gb);
         }
     }
 
@@ -2903,7 +2600,7 @@ static int theora_decode_init(AVCodecContext *avctx)
      if (!(ptype & 0x80))
      {
         av_log(avctx, AV_LOG_ERROR, "Invalid extradata!\n");
-        return -1;
+//        return -1;
      }
 
     // FIXME: check for this aswell
@@ -2912,19 +2609,23 @@ static int theora_decode_init(AVCodecContext *avctx)
     switch(ptype)
     {
         case 0x80:
-            theora_decode_header(avctx, gb);
+            theora_decode_header(avctx, &gb);
                 break;
         case 0x81:
 // FIXME: is this needed? it breaks sometimes
 //            theora_decode_comments(avctx, gb);
             break;
         case 0x82:
-            theora_decode_tables(avctx, gb);
+            theora_decode_tables(avctx, &gb);
             break;
         default:
             av_log(avctx, AV_LOG_ERROR, "Unknown Theora config packet: %d\n", ptype&~0x80);
             break;
     }
+    if(8*op_bytes != get_bits_count(&gb))
+        av_log(avctx, AV_LOG_ERROR, "%d bits left in packet %X\n", 8*op_bytes - get_bits_count(&gb), ptype);
+    if (s->theora < 0x030200)
+        break;
   }
 
     vp3_decode_init(avctx);