178 files changed, 29250 insertions, 26153 deletions
diff --git a/src/libffmpeg/diff_to_ffmpeg_cvs.txt b/src/libffmpeg/diff_to_ffmpeg_cvs.txt
index 7a97c12a2..329714dda 100644
--- a/src/libffmpeg/diff_to_ffmpeg_cvs.txt
+++ b/src/libffmpeg/diff_to_ffmpeg_cvs.txt
@@ -1,13 +1,14 @@
+? diff_to_ffmpeg_cvs.txt
 Index: libavcodec/avcodec.h
 ===================================================================
 RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/avcodec.h,v
-retrieving revision 1.426
-diff -u -r1.426 avcodec.h
---- libavcodec/avcodec.h	20 Oct 2005 20:04:45 -0000	1.426
-+++ libavcodec/avcodec.h	23 Oct 2005 12:33:01 -0000
+retrieving revision 1.446
+diff -u -r1.446 avcodec.h
+--- libavcodec/avcodec.h	24 Jan 2006 21:57:26 -0000	1.446
++++ libavcodec/avcodec.h	5 Feb 2006 13:43:22 -0000
 @@ -31,6 +31,13 @@
- #define AV_TIME_BASE 1000000
- #define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE}
+ #define AV_TIME_BASE            1000000
+ #define AV_TIME_BASE_Q          (AVRational){1, AV_TIME_BASE}
  
 +/* FIXME: We cannot use ffmpeg's XvMC capabilities, since that would require
 + * linking the ffmpeg plugin against XvMC libraries, which is a bad thing,
@@ -17,9 +18,9 @@ diff -u -r1.426 avcodec.h
 +#undef HAVE_XVMC
 +
  enum CodecID {
-     CODEC_ID_NONE, 
+     CODEC_ID_NONE,
      CODEC_ID_MPEG1VIDEO,
-@@ -2418,6 +2425,13 @@
+@@ -2573,6 +2580,13 @@
  
  extern unsigned int av_xiphlacing(unsigned char *s, unsigned int v);
  
@@ -36,13 +37,13 @@ diff -u -r1.426 avcodec.h
 Index: libavcodec/dsputil.c
 ===================================================================
 RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/dsputil.c,v
-retrieving revision 1.125
-diff -u -r1.125 dsputil.c
---- libavcodec/dsputil.c	14 Aug 2005 15:42:39 -0000	1.125
-+++ libavcodec/dsputil.c	23 Oct 2005 12:33:20 -0000
+retrieving revision 1.133
+diff -u -r1.133 dsputil.c
+--- libavcodec/dsputil.c	5 Feb 2006 13:35:16 -0000	1.133
++++ libavcodec/dsputil.c	5 Feb 2006 13:43:47 -0000
 @@ -371,6 +371,8 @@
-     assert(s>=0); 
-     
+     assert(s>=0);
+ 
      return s>>2;
 +#else
 +    return 0;
@@ -52,15 +53,15 @@ diff -u -r1.125 dsputil.c
 Index: libavcodec/dsputil.h
 ===================================================================
 RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/dsputil.h,v
-retrieving revision 1.120
-diff -u -r1.120 dsputil.h
---- libavcodec/dsputil.h	19 Sep 2005 23:26:47 -0000	1.120
-+++ libavcodec/dsputil.h	23 Oct 2005 12:33:24 -0000
+retrieving revision 1.127
+diff -u -r1.127 dsputil.h
+--- libavcodec/dsputil.h	5 Feb 2006 13:35:16 -0000	1.127
++++ libavcodec/dsputil.h	5 Feb 2006 13:43:50 -0000
 @@ -31,6 +31,9 @@
  #include "common.h"
  #include "avcodec.h"
  
-+#if defined(ARCH_X86)
++#if defined(ARCH_X86) || defined(ARCH_X86_64)
 +#define HAVE_MMX 1
 +#endif
  
@@ -69,21 +70,21 @@ diff -u -r1.120 dsputil.h
 Index: libavcodec/motion_est.c
 ===================================================================
 RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/motion_est.c,v
-retrieving revision 1.110
-diff -u -r1.110 motion_est.c
---- libavcodec/motion_est.c	26 Aug 2005 19:05:44 -0000	1.110
-+++ libavcodec/motion_est.c	23 Oct 2005 12:33:36 -0000
-@@ -20,6 +20,9 @@
-  *
+retrieving revision 1.120
+diff -u -r1.120 motion_est.c
+--- libavcodec/motion_est.c	22 Jan 2006 20:54:52 -0000	1.120
++++ libavcodec/motion_est.c	5 Feb 2006 13:44:03 -0000
+@@ -21,6 +21,9 @@
   * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
   */
-+
+ 
 +/* motion estimation only needed for encoders */
 +#ifdef CONFIG_ENCODERS
-  
++
  /**
   * @file motion_est.c
-@@ -2038,3 +2041,5 @@
+  * Motion estimation.
+@@ -2111,3 +2114,5 @@
          }
      }
  }
@@ -92,10 +93,10 @@ diff -u -r1.110 motion_est.c
 Index: libavcodec/mpeg12.c
 ===================================================================
 RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/mpeg12.c,v
-retrieving revision 1.242
-diff -u -r1.242 mpeg12.c
---- libavcodec/mpeg12.c	14 Aug 2005 15:42:39 -0000	1.242
-+++ libavcodec/mpeg12.c	23 Oct 2005 12:34:08 -0000
+retrieving revision 1.248
+diff -u -r1.248 mpeg12.c
+--- libavcodec/mpeg12.c	4 Feb 2006 20:32:02 -0000	1.248
++++ libavcodec/mpeg12.c	5 Feb 2006 13:44:22 -0000
 @@ -34,6 +34,13 @@
  //#include <assert.h>
  
@@ -108,9 +109,9 @@ diff -u -r1.242 mpeg12.c
 +
 +
  /* Start codes. */
- #define SEQ_END_CODE		0x000001b7
- #define SEQ_START_CODE		0x000001b3
-@@ -2812,10 +2819,12 @@
+ #define SEQ_END_CODE            0x000001b7
+ #define SEQ_START_CODE          0x000001b3
+@@ -2786,10 +2793,12 @@
              s->chroma_intra_matrix[j] = v;
          }
  #ifdef DEBUG
@@ -123,7 +124,7 @@ diff -u -r1.242 mpeg12.c
  #endif
      } else {
          for(i=0;i<64;i++) {
-@@ -2837,10 +2846,12 @@
+@@ -2811,10 +2820,12 @@
              s->chroma_inter_matrix[j] = v;
          }
  #ifdef DEBUG
@@ -139,10 +140,10 @@ diff -u -r1.242 mpeg12.c
 Index: libavcodec/mpegvideo.c
 ===================================================================
 RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/mpegvideo.c,v
-retrieving revision 1.488
-diff -u -r1.488 mpegvideo.c
---- libavcodec/mpegvideo.c	14 Aug 2005 15:42:40 -0000	1.488
-+++ libavcodec/mpegvideo.c	23 Oct 2005 12:35:02 -0000
+retrieving revision 1.509
+diff -u -r1.509 mpegvideo.c
+--- libavcodec/mpegvideo.c	5 Feb 2006 13:35:16 -0000	1.509
++++ libavcodec/mpegvideo.c	5 Feb 2006 13:45:03 -0000
 @@ -38,6 +38,14 @@
  //#undef NDEBUG
  //#include <assert.h>
@@ -158,7 +159,7 @@ diff -u -r1.488 mpegvideo.c
  #ifdef CONFIG_ENCODERS
  static void encode_picture(MpegEncContext *s, int picture_number);
  #endif //CONFIG_ENCODERS
-@@ -1108,6 +1116,8 @@
+@@ -1135,6 +1143,8 @@
          s->low_delay= 0; //s->max_b_frames ? 0 : 1;
          avctx->delay= s->low_delay ? 0 : (s->max_b_frames + 1);
          break;
@@ -167,7 +168,7 @@ diff -u -r1.488 mpegvideo.c
      case CODEC_ID_MPEG2VIDEO:
          s->out_format = FMT_MPEG1;
          s->low_delay= 0; //s->max_b_frames ? 0 : 1;
-@@ -1242,6 +1252,7 @@
+@@ -1270,6 +1280,7 @@
          avctx->delay=0;
          s->low_delay=1;
          break;
@@ -175,16 +176,16 @@ diff -u -r1.488 mpegvideo.c
      default:
          return -1;
      }
-@@ -1263,6 +1274,8 @@
+@@ -1291,6 +1302,8 @@
      ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
      ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
-     
+ 
 +/* xine: do not need this for decode or MPEG-1 encoding modes */
 +#if 0
  #ifdef CONFIG_H261_ENCODER
      if (s->out_format == FMT_H261)
          ff_h261_encode_init(s);
-@@ -1271,6 +1284,8 @@
+@@ -1299,6 +1312,8 @@
          h263_encode_init(s);
      if(s->msmpeg4_version)
          ff_msmpeg4_encode_init(s);
@@ -193,7 +194,7 @@ diff -u -r1.488 mpegvideo.c
      if (s->out_format == FMT_MPEG1)
          ff_mpeg1_encode_init(s);
  
-@@ -1319,9 +1334,12 @@
+@@ -1347,9 +1362,12 @@
  
      ff_rate_control_uninit(s);
  
@@ -205,8 +206,8 @@ diff -u -r1.488 mpegvideo.c
 +#endif /* #if 0 */
  
      av_freep(&avctx->extradata);
-       
-@@ -2350,8 +2368,11 @@
+ 
+@@ -2494,8 +2512,11 @@
  
          MPV_frame_end(s);
  
@@ -215,10 +216,10 @@ diff -u -r1.488 mpegvideo.c
          if (s->out_format == FMT_MJPEG)
              mjpeg_picture_trailer(s);
 +#endif /* #if 0 */
-         
+ 
          if(s->flags&CODEC_FLAG_PASS1)
              ff_write_pass1_stats(s);
-@@ -4297,6 +4318,8 @@
+@@ -4442,6 +4463,8 @@
      case CODEC_ID_MPEG1VIDEO:
      case CODEC_ID_MPEG2VIDEO:
          mpeg1_encode_mb(s, s->block, motion_x, motion_y); break;
@@ -227,7 +228,7 @@ diff -u -r1.488 mpegvideo.c
      case CODEC_ID_MPEG4:
          mpeg4_encode_mb(s, s->block, motion_x, motion_y); break;
      case CODEC_ID_MSMPEG4V2:
-@@ -4317,6 +4340,7 @@
+@@ -4462,6 +4485,7 @@
          h263_encode_mb(s, s->block, motion_x, motion_y); break;
      case CODEC_ID_MJPEG:
          mjpeg_encode_mb(s, s->block); break;
@@ -235,7 +236,7 @@ diff -u -r1.488 mpegvideo.c
      default:
          assert(0);
      }
-@@ -4532,6 +4556,8 @@
+@@ -4677,6 +4701,8 @@
                 +sse(s, s->new_picture.data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,s->dest[2], w>>1, h>>1, s->uvlinesize);
  }
  
@@ -244,7 +245,7 @@ diff -u -r1.488 mpegvideo.c
  static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
      MpegEncContext *s= arg;
  
-@@ -4575,6 +4601,7 @@
+@@ -4720,6 +4746,7 @@
      }
      return 0;
  }
@@ -252,7 +253,7 @@ diff -u -r1.488 mpegvideo.c
  
  static int mb_var_thread(AVCodecContext *c, void *arg){
      MpegEncContext *s= arg;
-@@ -4599,6 +4626,8 @@
+@@ -4744,6 +4771,8 @@
  }
  
  static void write_slice_end(MpegEncContext *s){
@@ -261,7 +262,7 @@ diff -u -r1.488 mpegvideo.c
      if(s->codec_id==CODEC_ID_MPEG4){
          if(s->partitioned_frame){
              ff_mpeg4_merge_partitions(s);
-@@ -4608,6 +4637,7 @@
+@@ -4753,6 +4782,7 @@
      }else if(s->out_format == FMT_MJPEG){
          ff_mjpeg_stuffing(&s->pb);
      }
@@ -269,7 +270,7 @@ diff -u -r1.488 mpegvideo.c
  
      align_put_bits(&s->pb);
      flush_put_bits(&s->pb);
-@@ -4661,10 +4691,13 @@
+@@ -4806,10 +4836,13 @@
      case CODEC_ID_FLV1:
          s->gob_index = ff_h263_get_gob_height(s);
          break;
@@ -283,7 +284,7 @@ diff -u -r1.488 mpegvideo.c
      }
  
      s->resync_mb_x=0;
-@@ -4737,9 +4770,12 @@
+@@ -4882,9 +4915,12 @@
                      if(s->start_mb_y != mb_y || mb_x!=0){
                          write_slice_end(s);
  
@@ -294,11 +295,11 @@ diff -u -r1.488 mpegvideo.c
                          }
 +#endif /* #if 0 */
                      }
-                 
+ 
                      assert((put_bits_count(&s->pb)&7) == 0);
-@@ -4763,19 +4799,25 @@
+@@ -4908,19 +4944,25 @@
                      }
-                     
+ 
                      switch(s->codec_id){
 +/* xine: do not need this for decode or MPEG-1 encoding modes */
 +#if 0
@@ -316,24 +317,24 @@ diff -u -r1.488 mpegvideo.c
 +#if 0
                      case CODEC_ID_H263:
                      case CODEC_ID_H263P:
-                         h263_encode_gob_header(s, mb_y);                       
+                         h263_encode_gob_header(s, mb_y);
                      break;
 +#endif /* #if 0 */
                      }
  
                      if(s->flags&CODEC_FLAG_PASS1){
-@@ -4888,7 +4930,10 @@
-                     
+@@ -5033,7 +5075,10 @@
+ 
                      s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
                      s->mb_intra= 0;
 +/* xine: do not need this for decode or MPEG-1 encoding modes */
 +#if 0
                      ff_mpeg4_set_direct_mv(s, mx, my);
 +#endif /* #if 0 */
-                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb, 
+                     encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
                                   &dmin, &next_block, mx, my);
                  }
-@@ -5074,7 +5119,10 @@
+@@ -5219,7 +5264,10 @@
                      s->mb_intra= 0;
                      motion_x=s->b_direct_mv_table[xy][0];
                      motion_y=s->b_direct_mv_table[xy][1];
@@ -344,7 +345,7 @@ diff -u -r1.488 mpegvideo.c
                      break;
                  case CANDIDATE_MB_TYPE_BIDIR:
                      s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
-@@ -5182,8 +5230,11 @@
+@@ -5327,8 +5375,11 @@
      }
  
      //not beautiful here but we must write it before flushing so it has to be here
@@ -356,7 +357,24 @@ diff -u -r1.488 mpegvideo.c
  
      write_slice_end(s);
  
-@@ -5246,10 +5297,13 @@
+@@ -5389,6 +5440,8 @@
+         s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
+ 
+     if(s->adaptive_quant){
++/* xine: do not need this for decode or MPEG-1 encoding modes */
++#if 0
+         switch(s->codec_id){
+         case CODEC_ID_MPEG4:
+             ff_clean_mpeg4_qscales(s);
+@@ -5399,6 +5452,7 @@
+             ff_clean_h263_qscales(s);
+             break;
+         }
++#endif /* #if 0 */
+ 
+         s->lambda= s->lambda_table[0];
+         //FIXME broken
+@@ -5419,10 +5473,13 @@
      s->me.mb_var_sum_temp    =
      s->me.mc_mb_var_sum_temp = 0;
  
@@ -367,10 +385,10 @@ diff -u -r1.488 mpegvideo.c
      if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
          ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
 +#endif /* #if 0 */
-         
+ 
      s->me.scene_change_score=0;
-     
-@@ -5268,6 +5322,8 @@
+ 
+@@ -5452,6 +5509,8 @@
          ff_update_duplicate_context(s->thread_context[i], s);
      }
  
@@ -379,7 +397,7 @@ diff -u -r1.488 mpegvideo.c
      ff_init_me(s);
  
      /* Estimate motion for every MB */
-@@ -5282,6 +5338,8 @@
+@@ -5466,6 +5525,8 @@
  
          s->avctx->execute(s->avctx, estimate_motion_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
      }else /* if(s->pict_type == I_TYPE) */{
@@ -388,7 +406,7 @@ diff -u -r1.488 mpegvideo.c
          /* I-Frame */
          for(i=0; i<s->mb_stride*s->mb_height; i++)
              s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
-@@ -5305,6 +5363,8 @@
+@@ -5489,6 +5550,8 @@
  //printf("Scene change detected, encoding as I Frame %d %d\n", s->current_picture.mb_var_sum, s->current_picture.mc_mb_var_sum);
      }
  
@@ -397,30 +415,15 @@ diff -u -r1.488 mpegvideo.c
      if(!s->umvplus){
          if(s->pict_type==P_TYPE || s->pict_type==S_TYPE) {
              s->f_code= ff_get_best_fcode(s, s->p_mv_table, CANDIDATE_MB_TYPE_INTER);
-@@ -5358,11 +5418,14 @@
+@@ -5542,6 +5605,7 @@
              }
          }
      }
 +#endif /* #if 0 */
  
-     if (!s->fixed_qscale) 
-         s->current_picture.quality = ff_rate_estimate_qscale(s); //FIXME pic_ptr
- 
-     if(s->adaptive_quant){
-+/* xine: do not need this for decode or MPEG-1 encoding modes */
-+#if 0
-         switch(s->codec_id){
-         case CODEC_ID_MPEG4:
-             ff_clean_mpeg4_qscales(s);
-@@ -5373,6 +5436,7 @@
-             ff_clean_h263_qscales(s);
-             break;
-         }
-+#endif /* #if 0 */
+     estimate_qp(s, 0);
  
-         s->lambda= s->lambda_table[0];
-         //FIXME broken
-@@ -5408,6 +5472,8 @@
+@@ -5572,6 +5636,8 @@
  
      s->last_bits= put_bits_count(&s->pb);
      switch(s->out_format) {
@@ -429,7 +432,7 @@ diff -u -r1.488 mpegvideo.c
      case FMT_MJPEG:
          mjpeg_picture_header(s);
          break;
-@@ -5436,11 +5502,15 @@
+@@ -5600,11 +5666,15 @@
          else
              h263_encode_picture_header(s, picture_number);
          break;
@@ -448,11 +451,11 @@ diff -u -r1.488 mpegvideo.c
 Index: libavcodec/snow.c
 ===================================================================
 RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/snow.c,v
-retrieving revision 1.63
-diff -u -r1.63 snow.c
---- libavcodec/snow.c	21 Sep 2005 23:09:16 -0000	1.63
-+++ libavcodec/snow.c	23 Oct 2005 12:35:34 -0000
-@@ -2037,6 +2037,7 @@
+retrieving revision 1.87
+diff -u -r1.87 snow.c
+--- libavcodec/snow.c	30 Jan 2006 23:33:18 -0000	1.87
++++ libavcodec/snow.c	5 Feb 2006 13:45:30 -0000
+@@ -2036,6 +2036,7 @@
  #define P_MV1 P[9]
  #define FLAG_QPEL   1 //must be 1
  
@@ -460,15 +463,15 @@ diff -u -r1.63 snow.c
  static int encode_q_branch(SnowContext *s, int level, int x, int y){
      uint8_t p_buffer[1024];
      uint8_t i_buffer[1024];
-@@ -2263,6 +2264,7 @@
+@@ -2243,6 +2244,7 @@
          return score;
      }
  }
 +#endif
  
- static void decode_q_branch(SnowContext *s, int level, int x, int y){
-     const int w= s->b_width << s->block_max_depth;
-@@ -2316,6 +2318,7 @@
+ static always_inline int same_block(BlockNode *a, BlockNode *b){
+     if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
+@@ -2347,6 +2349,7 @@
      }
  }
  
@@ -476,7 +479,7 @@ diff -u -r1.63 snow.c
  static void encode_blocks(SnowContext *s){
      int x, y;
      int w= s->b_width;
-@@ -2331,6 +2334,7 @@
+@@ -2368,6 +2371,7 @@
          }
      }
  }
@@ -484,7 +487,7 @@ diff -u -r1.63 snow.c
  
  static void decode_blocks(SnowContext *s){
      int x, y;
-@@ -3348,6 +3352,7 @@
+@@ -3935,6 +3939,7 @@
      }
  }
  
@@ -492,15 +495,15 @@ diff -u -r1.63 snow.c
  static int encode_init(AVCodecContext *avctx)
  {
      SnowContext *s = avctx->priv_data;
-@@ -3409,6 +3414,7 @@
-     s->chroma_v_shift= 1;
+@@ -4003,6 +4008,7 @@
+ 
      return 0;
  }
 +#endif
  
  static int frame_start(SnowContext *s){
     AVFrame tmp;
-@@ -3434,6 +3440,7 @@
+@@ -4028,6 +4034,7 @@
      return 0;
  }
  
@@ -508,15 +511,15 @@ diff -u -r1.63 snow.c
  static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
      SnowContext *s = avctx->priv_data;
      RangeCoder * const c= &s->c;
-@@ -3628,6 +3635,7 @@
-     
+@@ -4230,6 +4237,7 @@
+ 
      return ff_rac_terminate(c);
  }
 +#endif
  
  static void common_end(SnowContext *s){
      int plane_index, level, orientation;
-@@ -3651,6 +3659,7 @@
+@@ -4254,6 +4262,7 @@
      }
  }
  
@@ -524,7 +527,7 @@ diff -u -r1.63 snow.c
  static int encode_end(AVCodecContext *avctx)
  {
      SnowContext *s = avctx->priv_data;
-@@ -3660,6 +3669,7 @@
+@@ -4263,6 +4272,7 @@
  
      return 0;
  }
@@ -535,88 +538,31 @@ diff -u -r1.63 snow.c
 Index: libavcodec/utils.c
 ===================================================================
 RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/utils.c,v
-retrieving revision 1.160
-diff -u -r1.160 utils.c
---- libavcodec/utils.c	20 Sep 2005 21:43:45 -0000	1.160
-+++ libavcodec/utils.c	23 Oct 2005 12:35:47 -0000
-@@ -1241,11 +1241,11 @@
+retrieving revision 1.173
+diff -u -r1.173 utils.c
+--- libavcodec/utils.c	30 Jan 2006 23:33:18 -0000	1.173
++++ libavcodec/utils.c	5 Feb 2006 13:45:40 -0000
+@@ -1276,11 +1276,11 @@
      AVClass* avc= ptr ? *(AVClass**)ptr : NULL;
      if(level>av_log_level)
- 	return;
+         return;
 -#undef fprintf
 +/* #undef fprintf */
      if(print_prefix && avc) {
- 	    fprintf(stderr, "[%s @ %p]", avc->item_name(ptr), avc);
+             fprintf(stderr, "[%s @ %p]", avc->item_name(ptr), avc);
      }
 -#define fprintf please_use_av_log
 +/* #define fprintf please_use_av_log */
-         
-     print_prefix= strstr(fmt, "\n") != NULL;
-         
-Index: libavcodec/i386/dsputil_mmx.c
-===================================================================
-RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/i386/dsputil_mmx.c,v
-retrieving revision 1.104
-diff -u -r1.104 dsputil_mmx.c
---- libavcodec/i386/dsputil_mmx.c	9 Oct 2005 23:38:52 -0000	1.104
-+++ libavcodec/i386/dsputil_mmx.c	23 Oct 2005 12:36:21 -0000
-@@ -617,31 +617,32 @@
- }
  
- static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
-+    void *dst_reg = dst, *src_reg = src;
-+
-     asm volatile( //FIXME could save 1 instruction if done as 8x4 ...
--        "movd  %4, %%mm0		\n\t"
--        "movd  %5, %%mm1		\n\t"
--        "movd  %6, %%mm2		\n\t"
--        "movd  %7, %%mm3		\n\t"
-+        "movd  (%1), %%mm0		\n\t"
-+        "movd  (%1,%5), %%mm1		\n\t"
-+        "lea (%1, %5, 2), %1		\n\t"
-+        "movd  (%1), %%mm2		\n\t"
-+        "movd  (%1,%5), %%mm3		\n\t"
-         "punpcklbw %%mm1, %%mm0		\n\t"
-         "punpcklbw %%mm3, %%mm2		\n\t"
-         "movq %%mm0, %%mm1		\n\t"
-         "punpcklwd %%mm2, %%mm0		\n\t"
-         "punpckhwd %%mm2, %%mm1		\n\t"
--        "movd  %%mm0, %0		\n\t"
-+        "movd  %%mm0, (%0)		\n\t"
-         "punpckhdq %%mm0, %%mm0		\n\t"
--        "movd  %%mm0, %1		\n\t"
--        "movd  %%mm1, %2		\n\t"
-+        "movd  %%mm0, (%0,%4)		\n\t"
-+        "lea (%0, %4, 2), %0		\n\t"
-+        "movd  %%mm1, (%0)		\n\t"
-         "punpckhdq %%mm1, %%mm1		\n\t"
--        "movd  %%mm1, %3		\n\t"
--        
--        : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
--          "=m" (*(uint32_t*)(dst + 1*dst_stride)),
--          "=m" (*(uint32_t*)(dst + 2*dst_stride)),
--          "=m" (*(uint32_t*)(dst + 3*dst_stride))
--        :  "m" (*(uint32_t*)(src + 0*src_stride)),
--           "m" (*(uint32_t*)(src + 1*src_stride)),
--           "m" (*(uint32_t*)(src + 2*src_stride)),
--           "m" (*(uint32_t*)(src + 3*src_stride))
-+        "movd  %%mm1, (%0,%4)		\n\t"
-+        : "=&r" (dst_reg),
-+          "=&r" (src_reg)
-+        : "0"   (dst_reg),
-+          "1"   (src_reg),
-+          "r"   (dst_stride),
-+          "r"   (src_stride)
-     );
- }
+     print_prefix= strstr(fmt, "\n") != NULL;
  
 Index: libavcodec/mlib/dsputil_mlib.c
 ===================================================================
 RCS file: /cvsroot/ffmpeg/ffmpeg/libavcodec/mlib/dsputil_mlib.c,v
-retrieving revision 1.15
-diff -u -r1.15 dsputil_mlib.c
---- libavcodec/mlib/dsputil_mlib.c	15 Mar 2004 01:21:01 -0000	1.15
-+++ libavcodec/mlib/dsputil_mlib.c	23 Oct 2005 12:36:24 -0000
+retrieving revision 1.18
+diff -u -r1.18 dsputil_mlib.c
+--- libavcodec/mlib/dsputil_mlib.c	12 Jan 2006 22:43:20 -0000	1.18
++++ libavcodec/mlib/dsputil_mlib.c	5 Feb 2006 13:45:43 -0000
 @@ -20,6 +20,8 @@
  #include "../dsputil.h"
  #include "../mpegvideo.h"
@@ -645,7 +591,7 @@ diff -u -r1.15 dsputil_mlib.c
  {
 +  if (xine_mm_accel() & MM_ACCEL_MLIB) {
      if(s->avctx->dct_algo==FF_DCT_AUTO || s->avctx->dct_algo==FF_DCT_MLIB){
- 	s->dsp.fdct = ff_fdct_mlib;
+         s->dsp.fdct = ff_fdct_mlib;
      }
 @@ -459,4 +464,5 @@
          s->dsp.idct    = ff_idct_mlib;
@@ -656,10 +602,10 @@ diff -u -r1.15 dsputil_mlib.c
 Index: libavutil/common.h
 ===================================================================
 RCS file: /cvsroot/ffmpeg/ffmpeg/libavutil/common.h,v
-retrieving revision 1.153
-diff -u -r1.153 common.h
---- libavutil/common.h	19 Sep 2005 23:26:47 -0000	1.153
-+++ libavutil/common.h	23 Oct 2005 12:36:30 -0000
+retrieving revision 1.161
+diff -u -r1.161 common.h
+--- libavutil/common.h	30 Jan 2006 00:22:41 -0000	1.161
++++ libavutil/common.h	5 Feb 2006 13:45:46 -0000
 @@ -6,6 +6,12 @@
  #ifndef COMMON_H
  #define COMMON_H
@@ -673,7 +619,7 @@ diff -u -r1.153 common.h
  #if defined(WIN32) && !defined(__MINGW32__) && !defined(__CYGWIN__)
  #    define CONFIG_WIN32
  #endif
-@@ -185,8 +191,10 @@
+@@ -218,8 +224,10 @@
  
  #ifdef HAVE_AV_CONFIG_H
  
@@ -685,7 +631,7 @@ diff -u -r1.153 common.h
  #endif
  
  #include <float.h>
-@@ -205,10 +213,12 @@
+@@ -238,10 +246,12 @@
  
  #ifdef HAVE_AV_CONFIG_H
  
@@ -699,7 +645,7 @@ diff -u -r1.153 common.h
  
  #endif /* !CONFIG_WIN32 && !CONFIG_OS2 */
  
-@@ -235,7 +245,9 @@
+@@ -275,7 +285,9 @@
  /* debug stuff */
  
  #    ifndef DEBUG
@@ -709,7 +655,7 @@ diff -u -r1.153 common.h
  #    endif
  #    include <assert.h>
  
-@@ -530,8 +542,8 @@
+@@ -573,8 +585,8 @@
  #define sprintf sprintf_is_forbidden_due_to_security_issues_use_snprintf
  #define strcat strcat_is_forbidden_due_to_security_issues_use_pstrcat
  #if !(defined(LIBAVFORMAT_BUILD) || defined(_FRAMEHOOK_H))
@@ -720,7 +666,7 @@ diff -u -r1.153 common.h
  #endif
  
  #define CHECKED_ALLOCZ(p, size)\
-@@ -574,4 +586,16 @@
+@@ -617,4 +629,16 @@
  
  #endif /* HAVE_AV_CONFIG_H */
  
diff --git a/src/libffmpeg/libavcodec/4xm.c b/src/libffmpeg/libavcodec/4xm.c
index 0b4b72fac..3ca2338d2 100644
--- a/src/libffmpeg/libavcodec/4xm.c
+++ b/src/libffmpeg/libavcodec/4xm.c
@@ -14,14 +14,14 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
- 
+
 /**
  * @file 4xm.c
  * 4XM codec.
  */
- 
+
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
@@ -121,9 +121,9 @@ typedef struct FourXContext{
     int mv[256];
     VLC pre_vlc;
     int last_dc;
-    DCTELEM __align8 block[6][64];
+    DECLARE_ALIGNED_8(DCTELEM, block[6][64]);
     uint8_t *bitstream_buffer;
-    int bitstream_buffer_size;
+    unsigned int bitstream_buffer_size;
     CFrameBuffer cfrm[CFRAME_BUFFER_COUNT];
 } FourXContext;
 
@@ -141,7 +141,7 @@ static void idct(DCTELEM block[64]){
     int z5, z10, z11, z12, z13;
     int i;
     int temp[64];
-    
+
     for(i=0; i<8; i++){
         tmp10 = block[8*0 + i] + block[8*4 + i];
         tmp11 = block[8*0 + i] - block[8*4 + i];
@@ -153,7 +153,7 @@ static void idct(DCTELEM block[64]){
         tmp3 = tmp10 - tmp13;
         tmp1 = tmp11 + tmp12;
         tmp2 = tmp11 - tmp12;
-        
+
         z13 = block[8*5 + i] + block[8*3 + i];
         z10 = block[8*5 + i] - block[8*3 + i];
         z11 = block[8*1 + i] + block[8*7 + i];
@@ -179,7 +179,7 @@ static void idct(DCTELEM block[64]){
         temp[8*4 + i] = tmp3 + tmp4;
         temp[8*3 + i] = tmp3 - tmp4;
     }
-  
+
     for(i=0; i<8*8; i+=8){
         tmp10 = temp[0 + i] + temp[4 + i];
         tmp11 = temp[0 + i] - temp[4 + i];
@@ -223,7 +223,7 @@ static void init_vlcs(FourXContext *f){
     int i;
 
     for(i=0; i<4; i++){
-        init_vlc(&block_type_vlc[i], BLOCK_TYPE_VLC_BITS, 7, 
+        init_vlc(&block_type_vlc[i], BLOCK_TYPE_VLC_BITS, 7,
                  &block_type_tab[i][0][1], 2, 1,
                  &block_type_tab[i][0][0], 2, 1, 1);
     }
@@ -282,7 +282,7 @@ static void decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src, int lo
     const int index= size2index[log2h][log2w];
     const int h= 1<<log2h;
     int code= get_vlc2(&f->gb, block_type_vlc[index].table, BLOCK_TYPE_VLC_BITS, 1);
-    
+
     assert(code>=0 && code<=6);
 
     if(code == 0){
@@ -326,41 +326,41 @@ static int decode_p_frame(FourXContext *f, uint8_t *buf, int length){
     const unsigned int bitstream_size= get32(buf+8);
     const unsigned int bytestream_size= get32(buf+16);
     const unsigned int wordstream_size= get32(buf+12);
-    
+
     if(bitstream_size+ bytestream_size+ wordstream_size + 20 != length
        || bitstream_size  > (1<<26)
        || bytestream_size > (1<<26)
        || wordstream_size > (1<<26)
        ){
-        av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n", bitstream_size, bytestream_size, wordstream_size, 
+        av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n", bitstream_size, bytestream_size, wordstream_size,
         bitstream_size+ bytestream_size+ wordstream_size - length);
         return -1;
     }
-    
+
     f->bitstream_buffer= av_fast_realloc(f->bitstream_buffer, &f->bitstream_buffer_size, bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE);
     f->dsp.bswap_buf((uint32_t*)f->bitstream_buffer, (uint32_t*)(buf + 20), bitstream_size/4);
     init_get_bits(&f->gb, f->bitstream_buffer, 8*bitstream_size);
 
     f->wordstream= (uint16_t*)(buf + 20 + bitstream_size);
     f->bytestream= buf + 20 + bitstream_size + wordstream_size;
-    
+
     init_mv(f);
-    
+
     for(y=0; y<height; y+=8){
         for(x=0; x<width; x+=8){
             decode_p_block(f, dst + x, src + x, 3, 3, stride);
         }
-        src += 8*stride; 
-        dst += 8*stride; 
+        src += 8*stride;
+        dst += 8*stride;
     }
-    
+
     if(bitstream_size != (get_bits_count(&f->gb)+31)/32*4)
-        av_log(f->avctx, AV_LOG_ERROR, " %d %d %d bytes left\n", 
-            bitstream_size - (get_bits_count(&f->gb)+31)/32*4, 
+        av_log(f->avctx, AV_LOG_ERROR, " %d %td %td bytes left\n",
+            bitstream_size - (get_bits_count(&f->gb)+31)/32*4,
             bytestream_size - (f->bytestream - (buf + 20 + bitstream_size + wordstream_size)),
             wordstream_size - (((uint8_t*)f->wordstream) - (buf + 20 + bitstream_size))
         );
-    
+
     return 0;
 }
 
@@ -387,7 +387,7 @@ static int decode_i_block(FourXContext *f, DCTELEM *block){
     i = 1;
     for(;;) {
         code = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
-        
+
         /* EOB */
         if (code == 0)
             break;
@@ -417,7 +417,7 @@ static inline void idct_put(FourXContext *f, int x, int y){
     int stride= f->current_picture.linesize[0]>>1;
     int i;
     uint16_t *dst = ((uint16_t*)f->current_picture.data[0]) + y * stride + x;
-    
+
     for(i=0; i<4; i++){
         block[i][0] += 0x80*8*8;
         idct(block[i]);
@@ -431,7 +431,7 @@ static inline void idct_put(FourXContext *f, int x, int y){
 y= ( 1b + 4g + 2r)/14
 cb=( 3b - 2g - 1r)/14
 cr=(-1b - 4g + 5r)/14
-*/ 
+*/
     for(y=0; y<8; y++){
         for(x=0; x<8; x++){
             DCTELEM *temp= block[(x>>2) + 2*(y>>2)] + 2*(x&3) + 2*8*(y&3); //FIXME optimize
@@ -439,9 +439,9 @@ cr=(-1b - 4g + 5r)/14
             int cr= block[5][x + 8*y];
             int cg= (cb + cr)>>1;
             int y;
-            
+
             cb+=cb;
-            
+
             y = temp[0];
             dst[0       ]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
             y = temp[1];
@@ -458,14 +458,14 @@ cr=(-1b - 4g + 5r)/14
 
 static int decode_i_mb(FourXContext *f){
     int i;
-    
+
     f->dsp.clear_blocks(f->block[0]);
-    
+
     for(i=0; i<6; i++){
         if(decode_i_block(f, f->block[i]) < 0)
             return -1;
     }
-    
+
     return 0;
 }
 
@@ -478,7 +478,7 @@ static uint8_t *read_huffman_tables(FourXContext *f, uint8_t * const buf){
     int start, end;
     uint8_t *ptr= buf;
     int j;
-    
+
     memset(frequency, 0, sizeof(frequency));
     memset(up, -1, sizeof(up));
 
@@ -486,23 +486,23 @@ static uint8_t *read_huffman_tables(FourXContext *f, uint8_t * const buf){
     end= *ptr++;
     for(;;){
         int i;
-        
+
         for(i=start; i<=end; i++){
             frequency[i]= *ptr++;
 //            printf("%d %d %d\n", start, end, frequency[i]);
         }
         start= *ptr++;
         if(start==0) break;
-        
+
         end= *ptr++;
     }
     frequency[256]=1;
 
-    while((ptr - buf)&3) ptr++; // 4byte align 
+    while((ptr - buf)&3) ptr++; // 4byte align
 
 //    for(j=0; j<16; j++)
 //        printf("%2X", ptr[j]);
-    
+
     for(j=257; j<512; j++){
         int min_freq[2]= {256*256, 256*256};
         int smallest[2]= {0, 0};
@@ -519,11 +519,11 @@ static uint8_t *read_huffman_tables(FourXContext *f, uint8_t * const buf){
             }
         }
         if(min_freq[1] == 256*256) break;
-        
+
         frequency[j]= min_freq[0] + min_freq[1];
         flag[ smallest[0] ]= 0;
         flag[ smallest[1] ]= 1;
-        up[ smallest[0] ]= 
+        up[ smallest[0] ]=
         up[ smallest[1] ]= j;
         frequency[ smallest[0] ]= frequency[ smallest[1] ]= 0;
     }
@@ -538,15 +538,15 @@ static uint8_t *read_huffman_tables(FourXContext *f, uint8_t * const buf){
             len++;
             if(len > 31) av_log(f->avctx, AV_LOG_ERROR, "vlc length overflow\n"); //can this happen at all ?
         }
-        
+
         bits_tab[j]= bits;
         len_tab[j]= len;
     }
-    
-    init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257, 
+
+    init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257,
              len_tab , 1, 1,
              bits_tab, 4, 4, 0);
-             
+
     return ptr;
 }
 
@@ -560,14 +560,14 @@ static int decode_i_frame(FourXContext *f, uint8_t *buf, int length){
     const int token_count __attribute__((unused)) = get32(buf + bitstream_size + 8);
     unsigned int prestream_size= 4*get32(buf + bitstream_size + 4);
     uint8_t *prestream= buf + bitstream_size + 12;
-    
+
     if(prestream_size + bitstream_size + 12 != length
        || bitstream_size > (1<<26)
        || prestream_size > (1<<26)){
         av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n", prestream_size, bitstream_size, length);
         return -1;
     }
-   
+
     prestream= read_huffman_tables(f, prestream);
 
     init_get_bits(&f->gb, buf + 4, 8*bitstream_size);
@@ -579,7 +579,7 @@ static int decode_i_frame(FourXContext *f, uint8_t *buf, int length){
     init_get_bits(&f->pre_gb, f->bitstream_buffer, 8*prestream_size);
 
     f->last_dc= 0*128*8*8;
-    
+
     for(y=0; y<height; y+=16){
         for(x=0; x<width; x+=16){
             if(decode_i_mb(f) < 0)
@@ -587,16 +587,16 @@ static int decode_i_frame(FourXContext *f, uint8_t *buf, int length){
 
             idct_put(f, x, y);
         }
-        dst += 16*stride; 
+        dst += 16*stride;
     }
 
     if(get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256)
         av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n");
-    
+
     return 0;
 }
 
-static int decode_frame(AVCodecContext *avctx, 
+static int decode_frame(AVCodecContext *avctx,
                         void *data, int *data_size,
                         uint8_t *buf, int buf_size)
 {
@@ -621,7 +621,7 @@ static int decode_frame(AVCodecContext *avctx,
             if(f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
                 av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n", f->cfrm[i].id);
         }
-        
+
         for(i=0; i<CFRAME_BUFFER_COUNT; i++){
             if(f->cfrm[i].id   == id) break;
             if(f->cfrm[i].size == 0 ) free_index= i;
@@ -632,20 +632,20 @@ static int decode_frame(AVCodecContext *avctx,
             f->cfrm[i].id= id;
         }
         cfrm= &f->cfrm[i];
-        
+
         cfrm->data= av_fast_realloc(cfrm->data, &cfrm->allocated_size, cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE);
-        
+
         memcpy(cfrm->data + cfrm->size, buf+20, data_size);
         cfrm->size += data_size;
-        
+
         if(cfrm->size >= whole_size){
             buf= cfrm->data;
             frame_size= cfrm->size;
-            
+
             if(id != avctx->frame_number){
                 av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n", id, avctx->frame_number);
             }
-            
+
             cfrm->size= cfrm->id= 0;
             frame_4cc= ff_get_fourcc("pfrm");
         }else
@@ -653,7 +653,7 @@ static int decode_frame(AVCodecContext *avctx,
     }else{
         buf= buf + 12;
         frame_size= buf_size - 12;
-    }    
+    }
 
     temp= f->current_picture;
     f->current_picture= f->last_picture;
@@ -699,7 +699,7 @@ for(i=0; i<20; i++){
     *data_size = sizeof(AVPicture);
 
     emms_c();
-    
+
     return buf_size;
 }
 
@@ -714,7 +714,7 @@ static void common_init(AVCodecContext *avctx){
 
 static int decode_init(AVCodecContext *avctx){
     FourXContext * const f = avctx->priv_data;
- 
+
     common_init(avctx);
     init_vlcs(f);
 
@@ -735,7 +735,7 @@ static int decode_end(AVCodecContext *avctx){
         f->cfrm[i].allocated_size= 0;
     }
     free_vlc(&f->pre_vlc);
-    
+
     return 0;
 }
 
diff --git a/src/libffmpeg/libavcodec/8bps.c b/src/libffmpeg/libavcodec/8bps.c
index 4d5a64e5d..b16e3bb56 100644
--- a/src/libffmpeg/libavcodec/8bps.c
+++ b/src/libffmpeg/libavcodec/8bps.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -44,11 +44,11 @@ const enum PixelFormat pixfmt_rgb24[] = {PIX_FMT_BGR24, PIX_FMT_RGBA32, -1};
  */
 typedef struct EightBpsContext {
 
-	AVCodecContext *avctx;
-	AVFrame pic;
+        AVCodecContext *avctx;
+        AVFrame pic;
 
-	unsigned char planes;
-	unsigned char planemap[4];
+        unsigned char planes;
+        unsigned char planemap[4];
 } EightBpsContext;
 
 
@@ -59,87 +59,87 @@ typedef struct EightBpsContext {
  */
 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size)
 {
-	EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
-	unsigned char *encoded = (unsigned char *)buf;
-	unsigned char *pixptr, *pixptr_end;
-	unsigned int height = avctx->height; // Real image height
-	unsigned int dlen, p, row;
-	unsigned char *lp, *dp;
-	unsigned char count;
-	unsigned int px_inc;
-	unsigned int planes = c->planes;
-	unsigned char *planemap = c->planemap;
-  
-	if(c->pic.data[0])
-		avctx->release_buffer(avctx, &c->pic);
-
-	c->pic.reference = 0;
-	c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
-	if(avctx->get_buffer(avctx, &c->pic) < 0){
-		av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-		return -1;
-	}
-
-	/* Set data pointer after line lengths */
-	dp = encoded + planes * (height << 1);
-
-	/* Ignore alpha plane, don't know what to do with it */
-	if (planes == 4)
-		planes--;
-
-	px_inc = planes + (avctx->pix_fmt == PIX_FMT_RGBA32);
-
-	for (p = 0; p < planes; p++) {
-		/* Lines length pointer for this plane */
-		lp = encoded + p * (height << 1);
-
-		/* Decode a plane */
-		for(row = 0; row < height; row++) {
-			pixptr = c->pic.data[0] + row * c->pic.linesize[0] + planemap[p];
-			pixptr_end = pixptr + c->pic.linesize[0];
-			dlen = be2me_16(*(unsigned short *)(lp+row*2));
-			/* Decode a row of this plane */
-			while(dlen > 0) {
-				if(dp + 1 >= buf+buf_size) return -1;
-				if ((count = *dp++) <= 127) {
-					count++;
-					dlen -= count + 1;
-					if (pixptr + count * px_inc > pixptr_end)
-					    break;
-					if(dp + count > buf+buf_size) return -1;
-					while(count--) {
-						*pixptr = *dp++;
-						pixptr += px_inc;
-					}
-				} else {
-					count = 257 - count;
-					if (pixptr + count * px_inc > pixptr_end)
-					    break;
-					while(count--) {
-						*pixptr = *dp;
-						pixptr += px_inc;
-					}
-					dp++;
-					dlen -= 2;
-				}
-			}
-		}
-	}
-
-	if (avctx->palctrl) {
-		memcpy (c->pic.data[1], avctx->palctrl->palette, AVPALETTE_SIZE);
-		if (avctx->palctrl->palette_changed) {
-			c->pic.palette_has_changed = 1;
-			avctx->palctrl->palette_changed = 0;
-		} else
-			c->pic.palette_has_changed = 0;
-	}
-
-	*data_size = sizeof(AVFrame);
-	*(AVFrame*)data = c->pic;
-
-	/* always report that the buffer was completely consumed */
-	return buf_size;
+        EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
+        unsigned char *encoded = (unsigned char *)buf;
+        unsigned char *pixptr, *pixptr_end;
+        unsigned int height = avctx->height; // Real image height
+        unsigned int dlen, p, row;
+        unsigned char *lp, *dp;
+        unsigned char count;
+        unsigned int px_inc;
+        unsigned int planes = c->planes;
+        unsigned char *planemap = c->planemap;
+
+        if(c->pic.data[0])
+                avctx->release_buffer(avctx, &c->pic);
+
+        c->pic.reference = 0;
+        c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
+        if(avctx->get_buffer(avctx, &c->pic) < 0){
+                av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+                return -1;
+        }
+
+        /* Set data pointer after line lengths */
+        dp = encoded + planes * (height << 1);
+
+        /* Ignore alpha plane, don't know what to do with it */
+        if (planes == 4)
+                planes--;
+
+        px_inc = planes + (avctx->pix_fmt == PIX_FMT_RGBA32);
+
+        for (p = 0; p < planes; p++) {
+                /* Lines length pointer for this plane */
+                lp = encoded + p * (height << 1);
+
+                /* Decode a plane */
+                for(row = 0; row < height; row++) {
+                        pixptr = c->pic.data[0] + row * c->pic.linesize[0] + planemap[p];
+                        pixptr_end = pixptr + c->pic.linesize[0];
+                        dlen = be2me_16(*(unsigned short *)(lp+row*2));
+                        /* Decode a row of this plane */
+                        while(dlen > 0) {
+                                if(dp + 1 >= buf+buf_size) return -1;
+                                if ((count = *dp++) <= 127) {
+                                        count++;
+                                        dlen -= count + 1;
+                                        if (pixptr + count * px_inc > pixptr_end)
+                                            break;
+                                        if(dp + count > buf+buf_size) return -1;
+                                        while(count--) {
+                                                *pixptr = *dp++;
+                                                pixptr += px_inc;
+                                        }
+                                } else {
+                                        count = 257 - count;
+                                        if (pixptr + count * px_inc > pixptr_end)
+                                            break;
+                                        while(count--) {
+                                                *pixptr = *dp;
+                                                pixptr += px_inc;
+                                        }
+                                        dp++;
+                                        dlen -= 2;
+                                }
+                        }
+                }
+        }
+
+        if (avctx->palctrl) {
+                memcpy (c->pic.data[1], avctx->palctrl->palette, AVPALETTE_SIZE);
+                if (avctx->palctrl->palette_changed) {
+                        c->pic.palette_has_changed = 1;
+                        avctx->palctrl->palette_changed = 0;
+                } else
+                        c->pic.palette_has_changed = 0;
+        }
+
+        *data_size = sizeof(AVFrame);
+        *(AVFrame*)data = c->pic;
+
+        /* always report that the buffer was completely consumed */
+        return buf_size;
 }
 
 
@@ -150,53 +150,53 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
  */
 static int decode_init(AVCodecContext *avctx)
 {
-	EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
+        EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
 
-	c->avctx = avctx;
-	avctx->has_b_frames = 0;
+        c->avctx = avctx;
+        avctx->has_b_frames = 0;
 
-	c->pic.data[0] = NULL;
+        c->pic.data[0] = NULL;
 
     if (avcodec_check_dimensions(avctx, avctx->width, avctx->height) < 0) {
         return 1;
     }
 
-	switch (avctx->bits_per_sample) {
-		case 8:
-			avctx->pix_fmt = PIX_FMT_PAL8;
-			c->planes = 1;
-			c->planemap[0] = 0; // 1st plane is palette indexes
-			if (avctx->palctrl == NULL) {
-				av_log(avctx, AV_LOG_ERROR, "Error: PAL8 format but no palette from demuxer.\n");
-				return -1;
-			}
-			break;
-		case 24:
-			avctx->pix_fmt = avctx->get_format(avctx, pixfmt_rgb24);
-			c->planes = 3;
-			c->planemap[0] = 2; // 1st plane is red
-			c->planemap[1] = 1; // 2nd plane is green
-			c->planemap[2] = 0; // 3rd plane is blue
-			break;
-		case 32:
-			avctx->pix_fmt = PIX_FMT_RGBA32;
-			c->planes = 4;
+        switch (avctx->bits_per_sample) {
+                case 8:
+                        avctx->pix_fmt = PIX_FMT_PAL8;
+                        c->planes = 1;
+                        c->planemap[0] = 0; // 1st plane is palette indexes
+                        if (avctx->palctrl == NULL) {
+                                av_log(avctx, AV_LOG_ERROR, "Error: PAL8 format but no palette from demuxer.\n");
+                                return -1;
+                        }
+                        break;
+                case 24:
+                        avctx->pix_fmt = avctx->get_format(avctx, pixfmt_rgb24);
+                        c->planes = 3;
+                        c->planemap[0] = 2; // 1st plane is red
+                        c->planemap[1] = 1; // 2nd plane is green
+                        c->planemap[2] = 0; // 3rd plane is blue
+                        break;
+                case 32:
+                        avctx->pix_fmt = PIX_FMT_RGBA32;
+                        c->planes = 4;
 #ifdef WORDS_BIGENDIAN
-			c->planemap[0] = 1; // 1st plane is red
-			c->planemap[1] = 2; // 2nd plane is green
-			c->planemap[2] = 3; // 3rd plane is blue
-			c->planemap[3] = 0; // 4th plane is alpha???
+                        c->planemap[0] = 1; // 1st plane is red
+                        c->planemap[1] = 2; // 2nd plane is green
+                        c->planemap[2] = 3; // 3rd plane is blue
+                        c->planemap[3] = 0; // 4th plane is alpha???
 #else
-			c->planemap[0] = 2; // 1st plane is red
-			c->planemap[1] = 1; // 2nd plane is green
-			c->planemap[2] = 0; // 3rd plane is blue
-			c->planemap[3] = 3; // 4th plane is alpha???
+                        c->planemap[0] = 2; // 1st plane is red
+                        c->planemap[1] = 1; // 2nd plane is green
+                        c->planemap[2] = 0; // 3rd plane is blue
+                        c->planemap[3] = 3; // 4th plane is alpha???
 #endif
-			break;
-		default:
-			av_log(avctx, AV_LOG_ERROR, "Error: Unsupported color depth: %u.\n", avctx->bits_per_sample);
-			return -1;
-	}
+                        break;
+                default:
+                        av_log(avctx, AV_LOG_ERROR, "Error: Unsupported color depth: %u.\n", avctx->bits_per_sample);
+                        return -1;
+        }
 
   return 0;
 }
@@ -211,24 +211,24 @@ static int decode_init(AVCodecContext *avctx)
  */
 static int decode_end(AVCodecContext *avctx)
 {
-	EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
+        EightBpsContext * const c = (EightBpsContext *)avctx->priv_data;
 
-	if (c->pic.data[0])
-		avctx->release_buffer(avctx, &c->pic);
+        if (c->pic.data[0])
+                avctx->release_buffer(avctx, &c->pic);
 
-	return 0;
+        return 0;
 }
 
 
 
 AVCodec eightbps_decoder = {
-	"8bps",
-	CODEC_TYPE_VIDEO,
-	CODEC_ID_8BPS,
-	sizeof(EightBpsContext),
-	decode_init,
-	NULL,
-	decode_end,
-	decode_frame,
-	CODEC_CAP_DR1,
+        "8bps",
+        CODEC_TYPE_VIDEO,
+        CODEC_ID_8BPS,
+        sizeof(EightBpsContext),
+        decode_init,
+        NULL,
+        decode_end,
+        decode_frame,
+        CODEC_CAP_DR1,
 };
diff --git a/src/libffmpeg/libavcodec/Makefile.am b/src/libffmpeg/libavcodec/Makefile.am
index 650e8413d..34a6b522b 100644
--- a/src/libffmpeg/libavcodec/Makefile.am
+++ b/src/libffmpeg/libavcodec/Makefile.am
@@ -3,8 +3,8 @@ include $(top_srcdir)/misc/Makefile.common
 SUBDIRS = armv4l i386 mlib alpha ppc sparc libpostproc
 
 # some of ffmpeg's decoders are not used by xine yet
-EXTRA_DIST = motion_est_template.c imgresample.c \
-	adx.c cljr.c fdctref.c ffv1.c g726.c mdec.c raw.c svq3.c wmv2.c
+EXTRA_DIST = motion_est_template.c \
+	adx.c cljr.c fdctref.c ffv1.c g726.c jpeg_ls.c mdec.c raw.c svq3.c wmv2.c
 
 # we need to compile everything in debug mode, including the encoders,
 # otherwise we get unresolved symbols, because some unsatisfied function calls
@@ -45,6 +45,7 @@ libavcodec_la_SOURCES = \
 	huffyuv.c \
 	idcinvideo.c \
 	imgconvert.c \
+	imgresample.c \
 	indeo2.c \
 	indeo3.c \
 	interplayvideo.c \
@@ -74,6 +75,7 @@ libavcodec_la_SOURCES = \
 	ra288.c \
 	rangecoder.c \
 	ratecontrol.c \
+	resample2.c \
 	roqvideo.c \
 	rpza.c \
 	rv10.c \
diff --git a/src/libffmpeg/libavcodec/aasc.c b/src/libffmpeg/libavcodec/aasc.c
index d2419e98c..462282800 100644
--- a/src/libffmpeg/libavcodec/aasc.c
+++ b/src/libffmpeg/libavcodec/aasc.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
diff --git a/src/libffmpeg/libavcodec/adpcm.c b/src/libffmpeg/libavcodec/adpcm.c
index 3c67242f4..ed3106aa0 100644
--- a/src/libffmpeg/libavcodec/adpcm.c
+++ b/src/libffmpeg/libavcodec/adpcm.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "avcodec.h"
 #include "bitstream.h"
@@ -59,7 +59,7 @@ static const int index_table[16] = {
     -1, -1, -1, -1, 2, 4, 6, 8,
 };
 
-/** 
+/**
  * This is the step table. Note that many programs use slight deviations from
  * this table, but such deviations are negligible:
  */
@@ -205,7 +205,7 @@ static inline unsigned char adpcm_ima_compress_sample(ADPCMChannelStatus *c, sho
 {
     int step_index;
     unsigned char nibble;
-    
+
     int sign = 0; /* sign bit of the nibble (MSB) */
     int delta, predicted_delta;
 
@@ -241,7 +241,7 @@ static inline unsigned char adpcm_ima_compress_sample(ADPCMChannelStatus *c, sho
     CLAMP_TO_SHORT(c->prev_sample);
 
 
-    nibble += sign << 3; /* sign * 8 */   
+    nibble += sign << 3; /* sign * 8 */
 
     /* save back */
     c->step_index = step_index;
@@ -254,14 +254,14 @@ static inline unsigned char adpcm_ms_compress_sample(ADPCMChannelStatus *c, shor
     int predictor, nibble, bias;
 
     predictor = (((c->sample1) * (c->coeff1)) + ((c->sample2) * (c->coeff2))) / 256;
-    
+
     nibble= sample - predictor;
     if(nibble>=0) bias= c->idelta/2;
     else          bias=-c->idelta/2;
-        
+
     nibble= (nibble + bias) / c->idelta;
     nibble= clip(nibble, -8, 7)&0x0F;
-    
+
     predictor += (signed)((nibble & 0x08)?(nibble - 0x10):(nibble)) * c->idelta;
     CLAMP_TO_SHORT(predictor);
 
@@ -300,7 +300,7 @@ static inline unsigned char adpcm_yamaha_compress_sample(ADPCMChannelStatus *c,
 }
 
 static int adpcm_encode_frame(AVCodecContext *avctx,
-			    unsigned char *frame, int buf_size, void *data)
+                            unsigned char *frame, int buf_size, void *data)
 {
     int n, i, st;
     short *samples;
@@ -333,7 +333,7 @@ static int adpcm_encode_frame(AVCodecContext *avctx,
                 *dst++ = 0;
                 samples++;
             }
-        
+
             /* stereo: 4 bytes (8 samples) for left, 4 bytes for right, 4 bytes left, ... */
             for (; n>0; n--) {
                 *dst = adpcm_ima_compress_sample(&c->status[0], samples[0]) & 0x0F;
@@ -375,9 +375,9 @@ static int adpcm_encode_frame(AVCodecContext *avctx,
             c->status[i].coeff2 = AdaptCoeff2[predictor];
         }
         for(i=0; i<avctx->channels; i++){
-            if (c->status[i].idelta < 16) 
+            if (c->status[i].idelta < 16)
                 c->status[i].idelta = 16;
-            
+
             *dst++ = c->status[i].idelta & 0xFF;
             *dst++ = c->status[i].idelta >> 8;
         }
@@ -431,8 +431,8 @@ static int adpcm_decode_init(AVCodecContext * avctx)
 
     switch(avctx->codec->id) {
     case CODEC_ID_ADPCM_CT:
-	c->status[0].step = c->status[1].step = 511;
-	break;
+        c->status[0].step = c->status[1].step = 511;
+        break;
     default:
         break;
     }
@@ -498,16 +498,16 @@ static inline short adpcm_ct_expand_nibble(ADPCMChannelStatus *c, char nibble)
     predictor = c->predictor;
     /* predictor update is not so trivial: predictor is multiplied on 254/256 before updating */
     if(sign)
-	predictor = ((predictor * 254) >> 8) - diff;
+        predictor = ((predictor * 254) >> 8) - diff;
     else
-    	predictor = ((predictor * 254) >> 8) + diff;
+            predictor = ((predictor * 254) >> 8) + diff;
     /* calculate new step and clamp it to range 511..32767 */
     new_step = (ct_adpcm_table[nibble & 7] * c->step) >> 8;
     c->step = new_step;
     if(c->step < 511)
-	c->step = 511;
+        c->step = 511;
     if(c->step > 32767)
-	c->step = 32767;
+        c->step = 32767;
 
     CLAMP_TO_SHORT(predictor);
     c->predictor = predictor;
@@ -528,7 +528,7 @@ static inline short adpcm_yamaha_expand_nibble(ADPCMChannelStatus *c, unsigned c
     return c->predictor;
 }
 
-static void xa_decode(short *out, const unsigned char *in, 
+static void xa_decode(short *out, const unsigned char *in,
     ADPCMChannelStatus *left, ADPCMChannelStatus *right, int inc)
 {
     int i, j;
@@ -612,8 +612,8 @@ static void xa_decode(short *out, const unsigned char *in,
     }
 
 static int adpcm_decode_frame(AVCodecContext *avctx,
-			    void *data, int *data_size,
-			    uint8_t *buf, int buf_size)
+                            void *data, int *data_size,
+                            uint8_t *buf, int buf_size)
 {
     ADPCMContext *c = avctx->priv_data;
     ADPCMChannelStatus *cs;
@@ -701,7 +701,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
                 cs->predictor -= 0x10000;
             CLAMP_TO_SHORT(cs->predictor);
 
-	// XXX: is this correct ??: *samples++ = cs->predictor;
+        // XXX: is this correct ??: *samples++ = cs->predictor;
 
             cs->step_index = *src++;
             if (cs->step_index < 0) cs->step_index = 0;
@@ -710,19 +710,19 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
         }
 
         for(m=4; src < (buf + buf_size);) {
-	    *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[0] & 0x0F, 3);
+            *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[0] & 0x0F, 3);
             if (st)
                 *samples++ = adpcm_ima_expand_nibble(&c->status[1], src[4] & 0x0F, 3);
             *samples++ = adpcm_ima_expand_nibble(&c->status[0], (src[0] >> 4) & 0x0F, 3);
-	    if (st) {
+            if (st) {
                 *samples++ = adpcm_ima_expand_nibble(&c->status[1], (src[4] >> 4) & 0x0F, 3);
-		if (!--m) {
-		    m=4;
-		    src+=4;
-		}
-	    }
-	    src++;
-	}
+                if (!--m) {
+                    m=4;
+                    src+=4;
+                }
+            }
+            src++;
+        }
         break;
     case CODEC_ID_ADPCM_4XM:
         cs = &(c->status[0]);
@@ -739,13 +739,13 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
 
         m= (buf_size - (src - buf))>>st;
         for(i=0; i<m; i++) {
-	    *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] & 0x0F, 4);
+            *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] & 0x0F, 4);
             if (st)
                 *samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] & 0x0F, 4);
             *samples++ = adpcm_ima_expand_nibble(&c->status[0], src[i] >> 4, 4);
-	    if (st)
+            if (st)
                 *samples++ = adpcm_ima_expand_nibble(&c->status[1], src[i+m] >> 4, 4);
-	}
+        }
 
         src += m<<st;
 
@@ -770,7 +770,7 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
         c->status[0].coeff2 = AdaptCoeff2[block_predictor[0]];
         c->status[1].coeff1 = AdaptCoeff1[block_predictor[1]];
         c->status[1].coeff2 = AdaptCoeff2[block_predictor[1]];
-        
+
         c->status[0].sample1 = ((*src & 0xFF) | ((src[1] << 8) & 0xFF00));
         src+=2;
         if (st) c->status[1].sample1 = ((*src & 0xFF) | ((src[1] << 8) & 0xFF00));
@@ -807,16 +807,16 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
         while (src < buf + buf_size) {
 
             /* take care of the top nibble (always left or mono channel) */
-            *samples++ = adpcm_ima_expand_nibble(&c->status[0], 
+            *samples++ = adpcm_ima_expand_nibble(&c->status[0],
                 (src[0] >> 4) & 0x0F, 3);
 
             /* take care of the bottom nibble, which is right sample for
              * stereo, or another mono sample */
             if (st)
-                *samples++ = adpcm_ima_expand_nibble(&c->status[1], 
+                *samples++ = adpcm_ima_expand_nibble(&c->status[1],
                     src[0] & 0x0F, 3);
             else
-                *samples++ = adpcm_ima_expand_nibble(&c->status[0], 
+                *samples++ = adpcm_ima_expand_nibble(&c->status[0],
                     src[0] & 0x0F, 3);
 
             src++;
@@ -869,14 +869,14 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
         while (src < buf + buf_size) {
 
             if (st) {
-                *samples++ = adpcm_ima_expand_nibble(&c->status[0], 
+                *samples++ = adpcm_ima_expand_nibble(&c->status[0],
                     (src[0] >> 4) & 0x0F, 3);
-                *samples++ = adpcm_ima_expand_nibble(&c->status[1], 
+                *samples++ = adpcm_ima_expand_nibble(&c->status[1],
                     src[0] & 0x0F, 3);
             } else {
-                *samples++ = adpcm_ima_expand_nibble(&c->status[0], 
+                *samples++ = adpcm_ima_expand_nibble(&c->status[0],
                     (src[0] >> 4) & 0x0F, 3);
-                *samples++ = adpcm_ima_expand_nibble(&c->status[0], 
+                *samples++ = adpcm_ima_expand_nibble(&c->status[0],
                     src[0] & 0x0F, 3);
             }
 
@@ -884,10 +884,10 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
         }
         break;
     case CODEC_ID_ADPCM_XA:
-        c->status[0].sample1 = c->status[0].sample2 = 
+        c->status[0].sample1 = c->status[0].sample2 =
         c->status[1].sample1 = c->status[1].sample2 = 0;
         while (buf_size >= 128) {
-            xa_decode(samples, src, &c->status[0], &c->status[1], 
+            xa_decode(samples, src, &c->status[0], &c->status[1],
                 avctx->channels);
             src += 128;
             samples += 28 * 8;
@@ -926,11 +926,11 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
                 next_right_sample = (((*src & 0x0F) << 28) >> shift_right);
                 src++;
 
-                next_left_sample = (next_left_sample + 
-                    (current_left_sample * coeff1l) + 
+                next_left_sample = (next_left_sample +
+                    (current_left_sample * coeff1l) +
                     (previous_left_sample * coeff2l) + 0x80) >> 8;
-                next_right_sample = (next_right_sample + 
-                    (current_right_sample * coeff1r) + 
+                next_right_sample = (next_right_sample +
+                    (current_right_sample * coeff1r) +
                     (previous_right_sample * coeff2r) + 0x80) >> 8;
                 CLAMP_TO_SHORT(next_left_sample);
                 CLAMP_TO_SHORT(next_right_sample);
@@ -958,90 +958,90 @@ static int adpcm_decode_frame(AVCodecContext *avctx,
         }
         break;
     case CODEC_ID_ADPCM_CT:
-	while (src < buf + buf_size) {
+        while (src < buf + buf_size) {
             if (st) {
-                *samples++ = adpcm_ct_expand_nibble(&c->status[0], 
+                *samples++ = adpcm_ct_expand_nibble(&c->status[0],
                     (src[0] >> 4) & 0x0F);
-                *samples++ = adpcm_ct_expand_nibble(&c->status[1], 
+                *samples++ = adpcm_ct_expand_nibble(&c->status[1],
                     src[0] & 0x0F);
             } else {
-                *samples++ = adpcm_ct_expand_nibble(&c->status[0], 
+                *samples++ = adpcm_ct_expand_nibble(&c->status[0],
                     (src[0] >> 4) & 0x0F);
-                *samples++ = adpcm_ct_expand_nibble(&c->status[0], 
+                *samples++ = adpcm_ct_expand_nibble(&c->status[0],
                     src[0] & 0x0F);
             }
-	    src++;
+            src++;
         }
         break;
     case CODEC_ID_ADPCM_SWF:
     {
-	GetBitContext gb;
-	const int *table;
-	int k0, signmask;
-	int size = buf_size*8;
-	
-	init_get_bits(&gb, buf, size);
-
-	// first frame, read bits & inital values
-	if (!c->nb_bits)
-	{
-	    c->nb_bits = get_bits(&gb, 2)+2;
-//	    av_log(NULL,AV_LOG_INFO,"nb_bits: %d\n", c->nb_bits);
-	}
-	
-	table = swf_index_tables[c->nb_bits-2];
-	k0 = 1 << (c->nb_bits-2);
-	signmask = 1 << (c->nb_bits-1);
-	
-	while (get_bits_count(&gb) <= size)
-	{
-	    int i;
-
-	    c->nb_samples++;
-	    // wrap around at every 4096 samples...
-	    if ((c->nb_samples & 0xfff) == 1)
-	    {
-		for (i = 0; i <= st; i++)
-		{
-		    *samples++ = c->status[i].predictor = get_sbits(&gb, 16);
-		    c->status[i].step_index = get_bits(&gb, 6);
-		}
-	    }
-
-	    // similar to IMA adpcm
-	    for (i = 0; i <= st; i++)
-	    {
-		int delta = get_bits(&gb, c->nb_bits);
-		int step = step_table[c->status[i].step_index];
-		long vpdiff = 0; // vpdiff = (delta+0.5)*step/4
-		int k = k0;
-		
-		do {
-		    if (delta & k)
-			vpdiff += step;
-		    step >>= 1;
-		    k >>= 1;
-		} while(k);
-		vpdiff += step;
-		
-		if (delta & signmask)
-		    c->status[i].predictor -= vpdiff;
-		else
-		    c->status[i].predictor += vpdiff;
-		
-		c->status[i].step_index += table[delta & (~signmask)];
-		
-		c->status[i].step_index = clip(c->status[i].step_index, 0, 88);
-		c->status[i].predictor = clip(c->status[i].predictor, -32768, 32767);
-		
-		*samples++ = c->status[i].predictor;
-	    }
-	}
-	
-//	src += get_bits_count(&gb)*8;
-	src += size;
-	
-	break;
+        GetBitContext gb;
+        const int *table;
+        int k0, signmask;
+        int size = buf_size*8;
+
+        init_get_bits(&gb, buf, size);
+
+        // first frame, read bits & inital values
+        if (!c->nb_bits)
+        {
+            c->nb_bits = get_bits(&gb, 2)+2;
+//            av_log(NULL,AV_LOG_INFO,"nb_bits: %d\n", c->nb_bits);
+        }
+
+        table = swf_index_tables[c->nb_bits-2];
+        k0 = 1 << (c->nb_bits-2);
+        signmask = 1 << (c->nb_bits-1);
+
+        while (get_bits_count(&gb) <= size)
+        {
+            int i;
+
+            c->nb_samples++;
+            // wrap around at every 4096 samples...
+            if ((c->nb_samples & 0xfff) == 1)
+            {
+                for (i = 0; i <= st; i++)
+                {
+                    *samples++ = c->status[i].predictor = get_sbits(&gb, 16);
+                    c->status[i].step_index = get_bits(&gb, 6);
+                }
+            }
+
+            // similar to IMA adpcm
+            for (i = 0; i <= st; i++)
+            {
+                int delta = get_bits(&gb, c->nb_bits);
+                int step = step_table[c->status[i].step_index];
+                long vpdiff = 0; // vpdiff = (delta+0.5)*step/4
+                int k = k0;
+
+                do {
+                    if (delta & k)
+                        vpdiff += step;
+                    step >>= 1;
+                    k >>= 1;
+                } while(k);
+                vpdiff += step;
+
+                if (delta & signmask)
+                    c->status[i].predictor -= vpdiff;
+                else
+                    c->status[i].predictor += vpdiff;
+
+                c->status[i].step_index += table[delta & (~signmask)];
+
+                c->status[i].step_index = clip(c->status[i].step_index, 0, 88);
+                c->status[i].predictor = clip(c->status[i].predictor, -32768, 32767);
+
+                *samples++ = c->status[i].predictor;
+            }
+        }
+
+//        src += get_bits_count(&gb)*8;
+        src += size;
+
+        break;
     }
     case CODEC_ID_ADPCM_YAMAHA:
         while (src < buf + buf_size) {
diff --git a/src/libffmpeg/libavcodec/adx.c b/src/libffmpeg/libavcodec/adx.c
index a52575c13..c841e4eb8 100644
--- a/src/libffmpeg/libavcodec/adx.c
+++ b/src/libffmpeg/libavcodec/adx.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "avcodec.h"
 
@@ -171,7 +171,7 @@ static int adx_encode_header(AVCodecContext *avctx,unsigned char *buf,size_t buf
         long loop_start_byte;
         long loop_end_sample;
         long loop_end_byte;
-        long 
+        long
     */
     } adxhdr; /* big endian */
     /* offset-6 "(c)CRI" */
diff --git a/src/libffmpeg/libavcodec/alac.c b/src/libffmpeg/libavcodec/alac.c
index 2943b4dc5..21457ab23 100644
--- a/src/libffmpeg/libavcodec/alac.c
+++ b/src/libffmpeg/libavcodec/alac.c
@@ -15,7 +15,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
@@ -84,7 +84,7 @@ static void allocate_buffers(ALACContext *alac)
     alac->outputsamples_buffer_b = av_malloc(alac->setinfo_max_samples_per_frame * 4);
 }
 
-void alac_set_info(ALACContext *alac)
+static void alac_set_info(ALACContext *alac)
 {
     unsigned char *ptr = alac->avctx->extradata;
 
@@ -125,7 +125,7 @@ static int count_leading_zeros(int32_t input)
     return i;
 }
 
-void bastardized_rice_decompress(ALACContext *alac,
+static void bastardized_rice_decompress(ALACContext *alac,
                                  int32_t *output_buffer,
                                  int output_size,
                                  int readsamplesize, /* arg_10 */
@@ -169,9 +169,9 @@ void bastardized_rice_decompress(ALACContext *alac,
             /* read k, that is bits as is */
             k = 31 - rice_kmodifier - count_leading_zeros((history >> 9) + 3);
 
-            if (k < 0) 
+            if (k < 0)
                 k += rice_kmodifier;
-            else 
+            else
                 k = rice_kmodifier;
 
             if (k != 1) {
@@ -444,7 +444,7 @@ static int alac_decode_frame(AVCodecContext *avctx,
     /* initialize from the extradata */
     if (!alac->context_initialized) {
         if (alac->avctx->extradata_size != ALAC_EXTRADATA_SIZE) {
-            av_log(NULL, AV_LOG_ERROR, "alac: expected %d extradata bytes\n", 
+            av_log(NULL, AV_LOG_ERROR, "alac: expected %d extradata bytes\n",
                 ALAC_EXTRADATA_SIZE);
             return input_buffer_size;
         }
@@ -728,7 +728,7 @@ static int alac_decode_frame(AVCodecContext *avctx,
             } else {
                 av_log(NULL, AV_LOG_ERROR, "FIXME: unhandled prediction type: %i\n", prediction_type_b);
             }
-        } else { 
+        } else {
          /* not compressed, easy case */
             if (alac->setinfo_sample_size <= 16) {
                 int i;
diff --git a/src/libffmpeg/libavcodec/alpha/asm.h b/src/libffmpeg/libavcodec/alpha/asm.h
index 6dc997b37..056e043f3 100644
--- a/src/libffmpeg/libavcodec/alpha/asm.h
+++ b/src/libffmpeg/libavcodec/alpha/asm.h
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #ifndef LIBAVCODEC_ALPHA_ASM_H
@@ -84,24 +84,24 @@ static inline uint64_t WORD_VEC(uint64_t x)
         } *) (p))->__l) = l;                                            \
     } while (0)
 struct unaligned_long { uint64_t l; } __attribute__((packed));
-#define ldq_u(p)     (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
-#define uldq(a)	     (((const struct unaligned_long *) (a))->l)
+#define ldq_u(p)        (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
+#define uldq(a)         (((const struct unaligned_long *) (a))->l)
 
 #if GNUC_PREREQ(3,3)
 #define prefetch(p)     __builtin_prefetch((p), 0, 1)
 #define prefetch_en(p)  __builtin_prefetch((p), 0, 0)
 #define prefetch_m(p)   __builtin_prefetch((p), 1, 1)
 #define prefetch_men(p) __builtin_prefetch((p), 1, 0)
-#define cmpbge	__builtin_alpha_cmpbge
+#define cmpbge          __builtin_alpha_cmpbge
 /* Avoid warnings.  */
-#define extql(a, b)	__builtin_alpha_extql(a, (uint64_t) (b))
-#define extwl(a, b)	__builtin_alpha_extwl(a, (uint64_t) (b))
-#define extqh(a, b)	__builtin_alpha_extqh(a, (uint64_t) (b))
-#define zap	__builtin_alpha_zap
-#define zapnot	__builtin_alpha_zapnot
-#define amask	__builtin_alpha_amask
-#define implver	__builtin_alpha_implver
-#define rpcc	__builtin_alpha_rpcc
+#define extql(a, b)     __builtin_alpha_extql(a, (uint64_t) (b))
+#define extwl(a, b)     __builtin_alpha_extwl(a, (uint64_t) (b))
+#define extqh(a, b)     __builtin_alpha_extqh(a, (uint64_t) (b))
+#define zap             __builtin_alpha_zap
+#define zapnot          __builtin_alpha_zapnot
+#define amask           __builtin_alpha_amask
+#define implver         __builtin_alpha_implver
+#define rpcc            __builtin_alpha_rpcc
 #else
 #define prefetch(p)     asm volatile("ldl $31,%0"  : : "m"(*(const char *) (p)) : "memory")
 #define prefetch_en(p)  asm volatile("ldq $31,%0"  : : "m"(*(const char *) (p)) : "memory")
@@ -113,26 +113,26 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
 #define extqh(a, b)  ({ uint64_t __r; asm ("extqh   %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
 #define zap(a, b)    ({ uint64_t __r; asm ("zap     %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
 #define zapnot(a, b) ({ uint64_t __r; asm ("zapnot  %r1,%2,%0"  : "=r" (__r) : "rJ"  (a), "rI" (b)); __r; })
-#define amask(a)     ({ uint64_t __r; asm ("amask   %1,%0"      : "=r" (__r) : "rI"  (a));	     __r; })
-#define implver()    ({ uint64_t __r; asm ("implver %0"         : "=r" (__r));			     __r; })
-#define rpcc()	     ({ uint64_t __r; asm volatile ("rpcc %0"   : "=r" (__r));			     __r; })
+#define amask(a)     ({ uint64_t __r; asm ("amask   %1,%0"      : "=r" (__r) : "rI"  (a));           __r; })
+#define implver()    ({ uint64_t __r; asm ("implver %0"         : "=r" (__r));                       __r; })
+#define rpcc()       ({ uint64_t __r; asm volatile ("rpcc %0"   : "=r" (__r));                       __r; })
 #endif
 #define wh64(p) asm volatile("wh64 (%0)" : : "r"(p) : "memory")
 
 #if GNUC_PREREQ(3,3) && defined(__alpha_max__)
-#define minub8	__builtin_alpha_minub8
-#define minsb8	__builtin_alpha_minsb8
-#define minuw4	__builtin_alpha_minuw4
-#define minsw4	__builtin_alpha_minsw4
-#define maxub8	__builtin_alpha_maxub8
-#define maxsb8	__builtin_alpha_maxsb8
-#define maxuw4	__builtin_alpha_maxuw4	
-#define maxsw4	__builtin_alpha_maxsw4
-#define perr	__builtin_alpha_perr
-#define pklb	__builtin_alpha_pklb
-#define pkwb	__builtin_alpha_pkwb
-#define unpkbl	__builtin_alpha_unpkbl
-#define unpkbw	__builtin_alpha_unpkbw
+#define minub8  __builtin_alpha_minub8
+#define minsb8  __builtin_alpha_minsb8
+#define minuw4  __builtin_alpha_minuw4
+#define minsw4  __builtin_alpha_minsw4
+#define maxub8  __builtin_alpha_maxub8
+#define maxsb8  __builtin_alpha_maxsb8
+#define maxuw4  __builtin_alpha_maxuw4
+#define maxsw4  __builtin_alpha_maxsw4
+#define perr    __builtin_alpha_perr
+#define pklb    __builtin_alpha_pklb
+#define pkwb    __builtin_alpha_pkwb
+#define unpkbl  __builtin_alpha_unpkbl
+#define unpkbw  __builtin_alpha_unpkbw
 #else
 #define minub8(a, b) ({ uint64_t __r; asm (".arch ev6; minub8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
 #define minsb8(a, b) ({ uint64_t __r; asm (".arch ev6; minsb8  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
@@ -143,13 +143,13 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
 #define maxuw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxuw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
 #define maxsw4(a, b) ({ uint64_t __r; asm (".arch ev6; maxsw4  %r1,%2,%0"  : "=r" (__r) : "%rJ" (a), "rI" (b)); __r; })
 #define perr(a, b)   ({ uint64_t __r; asm (".arch ev6; perr    %r1,%r2,%0" : "=r" (__r) : "%rJ" (a), "rJ" (b)); __r; })
-#define pklb(a)      ({ uint64_t __r; asm (".arch ev6; pklb    %r1,%0"     : "=r" (__r) : "rJ"  (a));	     __r; })
-#define pkwb(a)      ({ uint64_t __r; asm (".arch ev6; pkwb    %r1,%0"     : "=r" (__r) : "rJ"  (a));	     __r; })
-#define unpkbl(a)    ({ uint64_t __r; asm (".arch ev6; unpkbl  %r1,%0"     : "=r" (__r) : "rJ"  (a));	     __r; })
-#define unpkbw(a)    ({ uint64_t __r; asm (".arch ev6; unpkbw  %r1,%0"     : "=r" (__r) : "rJ"  (a));	     __r; })
+#define pklb(a)      ({ uint64_t __r; asm (".arch ev6; pklb    %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
+#define pkwb(a)      ({ uint64_t __r; asm (".arch ev6; pkwb    %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
+#define unpkbl(a)    ({ uint64_t __r; asm (".arch ev6; unpkbl  %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
+#define unpkbw(a)    ({ uint64_t __r; asm (".arch ev6; unpkbw  %r1,%0"     : "=r" (__r) : "rJ"  (a));           __r; })
 #endif
 
-#elif defined(__DECC)		/* Digital/Compaq/hp "ccc" compiler */
+#elif defined(__DECC)           /* Digital/Compaq/hp "ccc" compiler */
 
 #include <c_asm.h>
 #define ldq(p) (*(const uint64_t *) (p))
@@ -157,7 +157,7 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
 #define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
 #define stl(l, p) do { *(int32_t *)  (p) = (l); } while (0)
 #define ldq_u(a)     asm ("ldq_u   %v0,0(%a0)", a)
-#define uldq(a)	     (*(const __unaligned uint64_t *) (a))
+#define uldq(a)      (*(const __unaligned uint64_t *) (a))
 #define cmpbge(a, b) asm ("cmpbge  %a0,%a1,%v0", a, b)
 #define extql(a, b)  asm ("extql   %a0,%a1,%v0", a, b)
 #define extwl(a, b)  asm ("extwl   %a0,%a1,%v0", a, b)
@@ -166,7 +166,7 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
 #define zapnot(a, b) asm ("zapnot  %a0,%a1,%v0", a, b)
 #define amask(a)     asm ("amask   %a0,%v0", a)
 #define implver()    asm ("implver %v0")
-#define rpcc()	     asm ("rpcc	   %v0")
+#define rpcc()       asm ("rpcc           %v0")
 #define minub8(a, b) asm ("minub8  %a0,%a1,%v0", a, b)
 #define minsb8(a, b) asm ("minsb8  %a0,%a1,%v0", a, b)
 #define minuw4(a, b) asm ("minuw4  %a0,%a1,%v0", a, b)
diff --git a/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c b/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c
index 496f46120..299a25dc4 100644
--- a/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c
+++ b/src/libffmpeg/libavcodec/alpha/dsputil_alpha.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "asm.h"
@@ -28,11 +28,11 @@ void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
                         int line_size, int h);
 void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
                                 int line_size);
-void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, 
+void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
                                 int line_size);
 void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
                                  int line_size);
-void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels, 
+void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
                                  int line_size);
 
 void get_pixels_mvi(DCTELEM *restrict block,
@@ -48,7 +48,7 @@ int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, i
 #if 0
 /* These functions were the base for the optimized assembler routines,
    and remain here for documentation purposes.  */
-static void put_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels, 
+static void put_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
                                    int line_size)
 {
     int i = 8;
@@ -72,7 +72,7 @@ static void put_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
     } while (--i);
 }
 
-void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels, 
+void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
                             int line_size)
 {
     int h = 8;
@@ -97,7 +97,7 @@ void add_pixels_clamped_mvi(const DCTELEM *block, uint8_t *pixels,
         shorts0 ^= signs0;
         /* Clamp. */
         shorts0 = maxsw4(shorts0, 0);
-        shorts0 = minsw4(shorts0, clampmask);   
+        shorts0 = minsw4(shorts0, clampmask);
 
         /* Next 4.  */
         pix1    = unpkbw(ldl(pixels + 4));
@@ -142,7 +142,7 @@ static inline uint64_t avg2_no_rnd(uint64_t a, uint64_t b)
 
 static inline uint64_t avg2(uint64_t a, uint64_t b)
 {
-    return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);    
+    return (a | b) - (((a ^ b) & BYTE_VEC(0xfe)) >> 1);
 }
 
 #if 0
@@ -353,7 +353,7 @@ void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
 
     put_pixels_clamped_axp_p = c->put_pixels_clamped;
     add_pixels_clamped_axp_p = c->add_pixels_clamped;
-    
+
     c->idct_put = simple_idct_put_axp;
     c->idct_add = simple_idct_add_axp;
     c->idct = simple_idct_axp;
diff --git a/src/libffmpeg/libavcodec/alpha/dsputil_alpha_asm.S b/src/libffmpeg/libavcodec/alpha/dsputil_alpha_asm.S
index 6519a9590..d555b874c 100644
--- a/src/libffmpeg/libavcodec/alpha/dsputil_alpha_asm.S
+++ b/src/libffmpeg/libavcodec/alpha/dsputil_alpha_asm.S
@@ -34,7 +34,7 @@
 #define tf a4
 #define tg a3
 #define th v0
-                
+
         .set noat
         .set noreorder
         .arch pca56
@@ -71,7 +71,7 @@ $unaligned:
         addq    a1, a2, a1
         nop
 
-	ldq_u   t4, 0(a1)
+        ldq_u   t4, 0(a1)
         ldq_u   t5, 8(a1)
         addq    a1, a2, a1
         nop
@@ -120,25 +120,25 @@ $aligned:
         addq    a1, a2, a1
         ldq     t3, 0(a1)
 
-	addq	a0, a2, t4
-	addq    a1, a2, a1
-	addq	t4, a2, t5
-	subq    a3, 4, a3
+        addq    a0, a2, t4
+        addq    a1, a2, a1
+        addq    t4, a2, t5
+        subq    a3, 4, a3
+
+        stq     t0, 0(a0)
+        addq    t5, a2, t6
+        stq     t1, 0(t4)
+        addq    t6, a2, a0
 
-	stq	t0, 0(a0)
-	addq	t5, a2, t6
-	stq	t1, 0(t4)
-	addq	t6, a2, a0
+        stq     t2, 0(t5)
+        stq     t3, 0(t6)
 
-	stq	t2, 0(t5)
-	stq	t3, 0(t6)
-	
-	bne     a3, $aligned
+        bne     a3, $aligned
         ret
         .end put_pixels_axp_asm
 
 /************************************************************************
- * void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, 
+ * void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
  *                                 int line_size)
  */
         .align 6
@@ -172,17 +172,17 @@ put_pixels_clamped_mvi_asm:
         addq    a1, a2, ta
         maxsw4  t3, zero, t3
         minsw4  t0, t8, t0
-        
+
         minsw4  t1, t8, t1
         minsw4  t2, t8, t2
         minsw4  t3, t8, t3
         pkwb    t0, t0
-        
+
         pkwb    t1, t1
         pkwb    t2, t2
         pkwb    t3, t3
         stl     t0, 0(a1)
-        
+
         stl     t1, 4(a1)
         addq    ta, a2, a1
         stl     t2, 0(ta)
@@ -193,7 +193,7 @@ put_pixels_clamped_mvi_asm:
         .end put_pixels_clamped_mvi_asm
 
 /************************************************************************
- * void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, 
+ * void add_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
  *                                 int line_size)
  */
         .align 6
@@ -236,18 +236,18 @@ add_pixels_clamped_mvi_asm:
         bic     t0, tg, t0      # 0 2
         unpkbw  t7, t7          # 2 0
         and     t3, tg, t5      # 1 1
-        addq    t0, t1, t0      # 0 3 
+        addq    t0, t1, t0      # 0 3
 
         xor     t0, t2, t0      # 0 4
         unpkbw  ta, ta          # 3 0
         and     t6, tg, t8      # 2 1
         maxsw4  t0, zero, t0    # 0 5
-        
+
         bic     t3, tg, t3      # 1 2
         bic     t6, tg, t6      # 2 2
         minsw4  t0, tf, t0      # 0 6
         addq    t3, t4, t3      # 1 3
-        
+
         pkwb    t0, t0          # 0 7
         xor     t3, t5, t3      # 1 4
         maxsw4  t3, zero, t3    # 1 5
@@ -260,14 +260,14 @@ add_pixels_clamped_mvi_asm:
 
         maxsw4  t6, zero, t6    # 2 5
         addq    t9, ta, t9      # 3 3
-        stl     t0, 0(a1)       # 0 8   
+        stl     t0, 0(a1)       # 0 8
         minsw4  t6, tf, t6      # 2 6
 
         xor     t9, tb, t9      # 3 4
         maxsw4  t9, zero, t9    # 3 5
         lda     a0, 32(a0)      # block += 16;
         pkwb    t3, t3          # 1 7
-        
+
         minsw4  t9, tf, t9      # 3 6
         subq    th, 2, th
         pkwb    t6, t6          # 2 7
@@ -279,5 +279,5 @@ add_pixels_clamped_mvi_asm:
         stl     t9, 4(te)       # 3 8
 
         bne     th, 1b
-        ret     
+        ret
         .end add_pixels_clamped_mvi_asm
diff --git a/src/libffmpeg/libavcodec/alpha/motion_est_alpha.c b/src/libffmpeg/libavcodec/alpha/motion_est_alpha.c
index 8b8a0a25c..ea8580be7 100644
--- a/src/libffmpeg/libavcodec/alpha/motion_est_alpha.c
+++ b/src/libffmpeg/libavcodec/alpha/motion_est_alpha.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "asm.h"
@@ -30,7 +30,7 @@ void get_pixels_mvi(DCTELEM *restrict block,
 
         p = ldq(pixels);
         stq(unpkbw(p),       block);
-        stq(unpkbw(p >> 32), block + 4); 
+        stq(unpkbw(p >> 32), block + 4);
 
         pixels += line_size;
         block += 8;
@@ -116,7 +116,7 @@ int pix_abs8x8_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
     return result;
 }
 
-#if 0				/* now done in assembly */
+#if 0                           /* now done in assembly */
 int pix_abs16x16_mvi(uint8_t *pix1, uint8_t *pix2, int line_size)
 {
     int result = 0;
@@ -187,7 +187,7 @@ int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, in
         /* |.......l|lllllllr|rrrrrrr*|
            This case is special because disalign1 would be 8, which
            gets treated as 0 by extqh.  At least it is a bit faster
-           that way :)  */   
+           that way :)  */
         do {
             uint64_t p1_l, p1_r, p2_l, p2_r;
             uint64_t l, m, r;
@@ -201,7 +201,7 @@ int pix_abs16x16_x2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, in
             p2_r  = avg2(extql(m, disalign) | extqh(r, disalign), r);
             pix1 += line_size;
             pix2 += line_size;
-            
+
             result += perr(p1_l, p2_l)
                     + perr(p1_r, p2_r);
         } while (--h);
@@ -288,7 +288,7 @@ int pix_abs16x16_y2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, in
 int pix_abs16x16_xy2_mvi(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
 {
     int result = 0;
-    
+
     uint64_t p1_l, p1_r;
     uint64_t p2_l, p2_r, p2_x;
 
diff --git a/src/libffmpeg/libavcodec/alpha/motion_est_mvi_asm.S b/src/libffmpeg/libavcodec/alpha/motion_est_mvi_asm.S
index 9e6b75f53..276d310ef 100644
--- a/src/libffmpeg/libavcodec/alpha/motion_est_mvi_asm.S
+++ b/src/libffmpeg/libavcodec/alpha/motion_est_mvi_asm.S
@@ -29,7 +29,7 @@
 #define tf a4
 #define tg a3
 #define th v0
-        
+
         .set noat
         .set noreorder
         .arch pca56
@@ -91,7 +91,7 @@ $unaligned:
         ldq     t4, 8(a0)       # ref right
         addq    a0, a2, a0      # pix1
         addq    a1, a2, a1      # pix2
-        /* load line 1 */        
+        /* load line 1 */
         ldq_u   t5, 0(a1)       # left_u
         ldq_u   t6, 8(a1)       # mid
         ldq_u   t7, 16(a1)      # right_u
diff --git a/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c b/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c
index f64fb7472..4c512451e 100644
--- a/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c
+++ b/src/libffmpeg/libavcodec/alpha/mpegvideo_alpha.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "asm.h"
@@ -28,22 +28,22 @@ static void dct_unquantize_h263_intra_axp(MpegEncContext *s, DCTELEM *block,
     uint64_t qmul, qadd;
     uint64_t correction;
     DCTELEM *orig_block = block;
-    DCTELEM block0;
+    DCTELEM block0;             /* might not be used uninitialized */
 
     qadd = WORD_VEC((qscale - 1) | 1);
     qmul = qscale << 1;
-    /* This mask kills spill from negative subwords to the next subword.  */ 
+    /* This mask kills spill from negative subwords to the next subword.  */
     correction = WORD_VEC((qmul - 1) + 1); /* multiplication / addition */
 
     if (!s->h263_aic) {
-        if (n < 4) 
+        if (n < 4)
             block0 = block[0] * s->y_dc_scale;
         else
             block0 = block[0] * s->c_dc_scale;
     } else {
         qadd = 0;
     }
-    n_coeffs = 63; // does not always use zigzag table 
+    n_coeffs = 63; // does not always use zigzag table
 
     for(i = 0; i <= n_coeffs; block += 4, i += 4) {
         uint64_t levels, negmask, zeros, add;
@@ -92,12 +92,10 @@ static void dct_unquantize_h263_inter_axp(MpegEncContext *s, DCTELEM *block,
     int i, n_coeffs;
     uint64_t qmul, qadd;
     uint64_t correction;
-    DCTELEM *orig_block = block;
-    DCTELEM block0;
 
     qadd = WORD_VEC((qscale - 1) | 1);
     qmul = qscale << 1;
-    /* This mask kills spill from negative subwords to the next subword.  */ 
+    /* This mask kills spill from negative subwords to the next subword.  */
     correction = WORD_VEC((qmul - 1) + 1); /* multiplication / addition */
 
     n_coeffs = s->intra_scantable.raster_end[s->block_last_index[n]];
diff --git a/src/libffmpeg/libavcodec/alpha/simple_idct_alpha.c b/src/libffmpeg/libavcodec/alpha/simple_idct_alpha.c
index 293a2f970..3a5db009b 100644
--- a/src/libffmpeg/libavcodec/alpha/simple_idct_alpha.c
+++ b/src/libffmpeg/libavcodec/alpha/simple_idct_alpha.c
@@ -15,7 +15,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * based upon some outcommented c code from mpeg2dec (idct_mmx.c
  * written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
@@ -29,7 +29,7 @@
 
 extern void (*put_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
                                         int line_size);
-extern void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels, 
+extern void (*add_pixels_clamped_axp_p)(const DCTELEM *block, uint8_t *pixels,
                                         int line_size);
 
 // cos(i * M_PI / 16) * sqrt(2) * (1 << 14)
@@ -55,7 +55,7 @@ static inline int idct_row(DCTELEM *row)
 
     if (l == 0 && r == 0)
         return 0;
-    
+
     a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1));
 
     if (((l & ~0xffffUL) | r) == 0) {
@@ -63,7 +63,7 @@ static inline int idct_row(DCTELEM *row)
         t2 = (uint16_t) a0;
         t2 |= t2 << 16;
         t2 |= t2 << 32;
-        
+
         stq(t2, row);
         stq(t2, row + 4);
         return 1;
@@ -123,7 +123,7 @@ static inline int idct_row(DCTELEM *row)
         b3 -= W5 * t;
     }
 
-    
+
     t = extwl(r, 2);            /* row[5] */
     if (t) {
         t = sextw(t);
@@ -285,7 +285,7 @@ void simple_idct_axp(DCTELEM *block)
             stq(v, block + 1 * 4);
             stq(w, block + 2 * 4);
             stq(w, block + 3 * 4);
-	    block += 4 * 4;
+            block += 4 * 4;
         }
     } else {
         for (i = 0; i < 8; i++)
diff --git a/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c b/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c
index 0195c3ca6..cebd176b3 100644
--- a/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c
+++ b/src/libffmpeg/libavcodec/armv4l/dsputil_arm.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "../dsputil.h"
@@ -205,13 +205,13 @@ void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx)
 #endif
         c->idct_put= j_rev_dct_ARM_put;
         c->idct_add= j_rev_dct_ARM_add;
-	c->idct    = j_rev_dct_ARM;
+        c->idct    = j_rev_dct_ARM;
         c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */
     } else if (idct_algo==FF_IDCT_SIMPLEARM){
-	c->idct_put= simple_idct_ARM_put;
-	c->idct_add= simple_idct_ARM_add;
-	c->idct    = simple_idct_ARM;
-	c->idct_permutation_type= FF_NO_IDCT_PERM;
+        c->idct_put= simple_idct_ARM_put;
+        c->idct_add= simple_idct_ARM_add;
+        c->idct    = simple_idct_ARM;
+        c->idct_permutation_type= FF_NO_IDCT_PERM;
 #ifdef HAVE_IPP
     } else if (idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_IPP){
 #else
diff --git a/src/libffmpeg/libavcodec/armv4l/jrevdct_arm.S b/src/libffmpeg/libavcodec/armv4l/jrevdct_arm.S
index 76eda57ea..294ea4750 100644
--- a/src/libffmpeg/libavcodec/armv4l/jrevdct_arm.S
+++ b/src/libffmpeg/libavcodec/armv4l/jrevdct_arm.S
@@ -1,6 +1,6 @@
-/* 
+/*
    C-like prototype :
-	void j_rev_dct_ARM(DCTBLOCK data)
+        void j_rev_dct_ARM(DCTBLOCK data)
 
    With DCTBLOCK being a pointer to an array of 64 'signed shorts'
 
@@ -22,7 +22,7 @@
    COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
    IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-	
+
 */
 #define FIX_0_298631336 2446
 #define FIX_0_541196100 4433
@@ -36,8 +36,8 @@
 #define FIX_M_1_847759065 -15137
 #define FIX_M_1_961570560 -16069
 #define FIX_M_2_562915447 -20995
-#define FIX_0xFFFF 0xFFFF	
-		
+#define FIX_0xFFFF 0xFFFF
+
 #define FIX_0_298631336_ID      0
 #define FIX_0_541196100_ID      4
 #define FIX_0_765366865_ID      8
@@ -51,336 +51,336 @@
 #define FIX_M_1_961570560_ID   40
 #define FIX_M_2_562915447_ID   44
 #define FIX_0xFFFF_ID          48
-	.text
-	.align
-	
-	.global j_rev_dct_ARM
+        .text
+        .align
+
+        .global j_rev_dct_ARM
 j_rev_dct_ARM:
-	stmdb   sp!, { r4 - r12, lr }   @ all callee saved regs
+        stmdb   sp!, { r4 - r12, lr }   @ all callee saved regs
 
-	sub sp, sp, #4                  @ reserve some space on the stack
-	str r0, [ sp ]                  @ save the DCT pointer to the stack
+        sub sp, sp, #4                  @ reserve some space on the stack
+        str r0, [ sp ]                  @ save the DCT pointer to the stack
 
-	mov lr, r0                      @ lr = pointer to the current row
-	mov r12, #8                     @ r12 = row-counter
-	add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array	
+        mov lr, r0                      @ lr = pointer to the current row
+        mov r12, #8                     @ r12 = row-counter
+        add r11, pc, #(const_array-.-8) @ r11 = base pointer to the constants array
 row_loop:
-	ldrsh r0, [lr, # 0]             @ r0 = 'd0'
-	ldrsh r1, [lr, # 8]             @ r1 = 'd1'
-
-	@ Optimization for row that have all items except the first set to 0
-	@ (this works as the DCTELEMS are always 4-byte aligned)
-	ldr r5, [lr, # 0]
-	ldr r2, [lr, # 4]
-	ldr r3, [lr, # 8]
-	ldr r4, [lr, #12]
-	orr r3, r3, r4
-	orr r3, r3, r2
-	orrs r5, r3, r5
-	beq end_of_row_loop             @ nothing to be done as ALL of them are '0'
-	orrs r2, r3, r1
-	beq empty_row
-	
-	ldrsh r2, [lr, # 2]             @ r2 = 'd2'
-	ldrsh r4, [lr, # 4]             @ r4 = 'd4'
-	ldrsh r6, [lr, # 6]             @ r6 = 'd6'
-	
-	ldr r3, [r11, #FIX_0_541196100_ID]
-	add r7, r2, r6
-	ldr r5, [r11, #FIX_M_1_847759065_ID]
-	mul r7, r3, r7                      @ r7 = z1
-	ldr r3, [r11, #FIX_0_765366865_ID]
-	mla r6, r5, r6, r7                  @ r6 = tmp2
-	add r5, r0, r4                      @ r5 = tmp0
-	mla r2, r3, r2, r7                  @ r2 = tmp3
-	sub r3, r0, r4                      @ r3 = tmp1
-
-	add r0, r2, r5, lsl #13             @ r0 = tmp10
-	rsb r2, r2, r5, lsl #13             @ r2 = tmp13
-	add r4, r6, r3, lsl #13             @ r4 = tmp11
-	rsb r3, r6, r3, lsl #13             @ r3 = tmp12
-
-	stmdb   sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
-	
-	ldrsh r3, [lr, #10]             @ r3 = 'd3'
-	ldrsh r5, [lr, #12]             @ r5 = 'd5'
-	ldrsh r7, [lr, #14]             @ r7 = 'd7'
-
-	add r0, r3, r5	                @ r0 = 'z2'
-	add r2, r1, r7                  @ r2 = 'z1'
-	add r4, r3, r7                  @ r4 = 'z3'
-	add r6, r1, r5                  @ r6 = 'z4'
-	ldr r9, [r11, #FIX_1_175875602_ID]
-	add r8, r4, r6                  @ r8 = z3 + z4
-	ldr r10, [r11, #FIX_M_0_899976223_ID]
-	mul r8, r9, r8                  @ r8 = 'z5'
-	ldr r9, [r11, #FIX_M_2_562915447_ID]
-	mul r2, r10, r2                 @ r2 = 'z1'
-	ldr r10, [r11, #FIX_M_1_961570560_ID]
-	mul r0, r9, r0                  @ r0 = 'z2'
-	ldr r9, [r11, #FIX_M_0_390180644_ID]
-	mla r4, r10, r4, r8             @ r4 = 'z3'
-	ldr r10, [r11, #FIX_0_298631336_ID]
-	mla r6, r9, r6, r8              @ r6 = 'z4'
-	ldr r9, [r11, #FIX_2_053119869_ID]
-	mla r7, r10, r7, r2             @ r7 = tmp0 + z1
-	ldr r10, [r11, #FIX_3_072711026_ID]
-	mla r5, r9, r5, r0              @ r5 = tmp1 + z2
-	ldr r9, [r11, #FIX_1_501321110_ID]
-	mla r3, r10, r3, r0             @ r3 = tmp2 + z2
-	add r7, r7, r4                  @ r7 = tmp0
-	mla r1, r9, r1, r2              @ r1 = tmp3 + z1
-	add r5,	r5, r6                  @ r5 = tmp1
-	add r3, r3, r4                  @ r3 = tmp2
-	add r1, r1, r6                  @ r1 = tmp3
-
-	ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
-	                              @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
-	
-	@ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
-	add r8, r0, r1
-	add r8, r8, #(1<<10)
-	mov r8, r8, asr #11
-	strh r8, [lr, # 0]
-	
-	@ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
-	sub r8, r0, r1
-	add r8, r8, #(1<<10)
-	mov r8, r8, asr #11
-	strh r8, [lr, #14]
-	
-	@ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
-	add r8, r6, r3
-	add r8, r8, #(1<<10)
-	mov r8, r8, asr #11
-	strh r8, [lr, # 2]
-	
-	@ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
-	sub r8, r6, r3
-	add r8, r8, #(1<<10)
-	mov r8, r8, asr #11
-	strh r8, [lr, #12]
-	
-	@ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
-	add r8, r4, r5
-	add r8, r8, #(1<<10)
-	mov r8, r8, asr #11
-	strh r8, [lr, # 4]
-	
-	@ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
-	sub r8, r4, r5
-	add r8, r8, #(1<<10)
-	mov r8, r8, asr #11
-	strh r8, [lr, #10]
-	
-	@ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
-	add r8, r2, r7
-	add r8, r8, #(1<<10)
-	mov r8, r8, asr #11
-	strh r8, [lr, # 6]
-	
-	@ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
-	sub r8, r2, r7
-	add r8, r8, #(1<<10)
-	mov r8, r8, asr #11
-	strh r8, [lr, # 8]
-
-	@ End of row loop
-	add lr, lr, #16
-	subs r12, r12, #1
-	bne row_loop
-	beq start_column_loop
-	
+        ldrsh r0, [lr, # 0]             @ r0 = 'd0'
+        ldrsh r1, [lr, # 8]             @ r1 = 'd1'
+
+        @ Optimization for row that have all items except the first set to 0
+        @ (this works as the DCTELEMS are always 4-byte aligned)
+        ldr r5, [lr, # 0]
+        ldr r2, [lr, # 4]
+        ldr r3, [lr, # 8]
+        ldr r4, [lr, #12]
+        orr r3, r3, r4
+        orr r3, r3, r2
+        orrs r5, r3, r5
+        beq end_of_row_loop             @ nothing to be done as ALL of them are '0'
+        orrs r2, r3, r1
+        beq empty_row
+
+        ldrsh r2, [lr, # 2]             @ r2 = 'd2'
+        ldrsh r4, [lr, # 4]             @ r4 = 'd4'
+        ldrsh r6, [lr, # 6]             @ r6 = 'd6'
+
+        ldr r3, [r11, #FIX_0_541196100_ID]
+        add r7, r2, r6
+        ldr r5, [r11, #FIX_M_1_847759065_ID]
+        mul r7, r3, r7                      @ r7 = z1
+        ldr r3, [r11, #FIX_0_765366865_ID]
+        mla r6, r5, r6, r7                  @ r6 = tmp2
+        add r5, r0, r4                      @ r5 = tmp0
+        mla r2, r3, r2, r7                  @ r2 = tmp3
+        sub r3, r0, r4                      @ r3 = tmp1
+
+        add r0, r2, r5, lsl #13             @ r0 = tmp10
+        rsb r2, r2, r5, lsl #13             @ r2 = tmp13
+        add r4, r6, r3, lsl #13             @ r4 = tmp11
+        rsb r3, r6, r3, lsl #13             @ r3 = tmp12
+
+        stmdb   sp!, { r0, r2, r3, r4 } @ save on the stack tmp10, tmp13, tmp12, tmp11
+
+        ldrsh r3, [lr, #10]             @ r3 = 'd3'
+        ldrsh r5, [lr, #12]             @ r5 = 'd5'
+        ldrsh r7, [lr, #14]             @ r7 = 'd7'
+
+        add r0, r3, r5                        @ r0 = 'z2'
+        add r2, r1, r7                  @ r2 = 'z1'
+        add r4, r3, r7                  @ r4 = 'z3'
+        add r6, r1, r5                  @ r6 = 'z4'
+        ldr r9, [r11, #FIX_1_175875602_ID]
+        add r8, r4, r6                  @ r8 = z3 + z4
+        ldr r10, [r11, #FIX_M_0_899976223_ID]
+        mul r8, r9, r8                  @ r8 = 'z5'
+        ldr r9, [r11, #FIX_M_2_562915447_ID]
+        mul r2, r10, r2                 @ r2 = 'z1'
+        ldr r10, [r11, #FIX_M_1_961570560_ID]
+        mul r0, r9, r0                  @ r0 = 'z2'
+        ldr r9, [r11, #FIX_M_0_390180644_ID]
+        mla r4, r10, r4, r8             @ r4 = 'z3'
+        ldr r10, [r11, #FIX_0_298631336_ID]
+        mla r6, r9, r6, r8              @ r6 = 'z4'
+        ldr r9, [r11, #FIX_2_053119869_ID]
+        mla r7, r10, r7, r2             @ r7 = tmp0 + z1
+        ldr r10, [r11, #FIX_3_072711026_ID]
+        mla r5, r9, r5, r0              @ r5 = tmp1 + z2
+        ldr r9, [r11, #FIX_1_501321110_ID]
+        mla r3, r10, r3, r0             @ r3 = tmp2 + z2
+        add r7, r7, r4                  @ r7 = tmp0
+        mla r1, r9, r1, r2              @ r1 = tmp3 + z1
+        add r5,        r5, r6                  @ r5 = tmp1
+        add r3, r3, r4                  @ r3 = tmp2
+        add r1, r1, r6                  @ r1 = tmp3
+
+        ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp12 / r6 = tmp11
+                                      @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
+
+        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS-PASS1_BITS)
+        add r8, r0, r1
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, # 0]
+
+        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS-PASS1_BITS)
+        sub r8, r0, r1
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, #14]
+
+        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS-PASS1_BITS)
+        add r8, r6, r3
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, # 2]
+
+        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS-PASS1_BITS)
+        sub r8, r6, r3
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, #12]
+
+        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS-PASS1_BITS)
+        add r8, r4, r5
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, # 4]
+
+        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS-PASS1_BITS)
+        sub r8, r4, r5
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, #10]
+
+        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS)
+        add r8, r2, r7
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, # 6]
+
+        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS)
+        sub r8, r2, r7
+        add r8, r8, #(1<<10)
+        mov r8, r8, asr #11
+        strh r8, [lr, # 8]
+
+        @ End of row loop
+        add lr, lr, #16
+        subs r12, r12, #1
+        bne row_loop
+        beq start_column_loop
+
 empty_row:
-	ldr r1, [r11, #FIX_0xFFFF_ID]
-	mov r0, r0, lsl #2
-	and r0, r0, r1
-	add r0, r0, r0, lsl #16
-	str r0, [lr, # 0]
-	str r0, [lr, # 4]
-	str r0, [lr, # 8]
-	str r0, [lr, #12]
+        ldr r1, [r11, #FIX_0xFFFF_ID]
+        mov r0, r0, lsl #2
+        and r0, r0, r1
+        add r0, r0, r0, lsl #16
+        str r0, [lr, # 0]
+        str r0, [lr, # 4]
+        str r0, [lr, # 8]
+        str r0, [lr, #12]
 
 end_of_row_loop:
-	@ End of loop
-	add lr, lr, #16
-	subs r12, r12, #1
-	bne row_loop
+        @ End of loop
+        add lr, lr, #16
+        subs r12, r12, #1
+        bne row_loop
 
 start_column_loop:
-	@ Start of column loop
-	ldr lr, [ sp ]
-	mov r12, #8
+        @ Start of column loop
+        ldr lr, [ sp ]
+        mov r12, #8
 column_loop:
-	ldrsh r0, [lr, #( 0*8)]             @ r0 = 'd0'
-	ldrsh r2, [lr, #( 4*8)]             @ r2 = 'd2'
-	ldrsh r4, [lr, #( 8*8)]             @ r4 = 'd4'
-	ldrsh r6, [lr, #(12*8)]             @ r6 = 'd6'
-
-	ldr r3, [r11, #FIX_0_541196100_ID]
-	add r1, r2, r6
-	ldr r5, [r11, #FIX_M_1_847759065_ID]
-	mul r1, r3, r1                      @ r1 = z1
-	ldr r3, [r11, #FIX_0_765366865_ID]
-	mla r6, r5, r6, r1                  @ r6 = tmp2
-	add r5, r0, r4                      @ r5 = tmp0
-	mla r2, r3, r2, r1                  @ r2 = tmp3
-	sub r3, r0, r4                      @ r3 = tmp1
-
-	add r0, r2, r5, lsl #13             @ r0 = tmp10
-	rsb r2, r2, r5, lsl #13             @ r2 = tmp13
-	add r4, r6, r3, lsl #13             @ r4 = tmp11
-	rsb r6, r6, r3, lsl #13             @ r6 = tmp12
-
-	ldrsh r1, [lr, #( 2*8)]             @ r1 = 'd1'
-	ldrsh r3, [lr, #( 6*8)]             @ r3 = 'd3'
-	ldrsh r5, [lr, #(10*8)]             @ r5 = 'd5'
-	ldrsh r7, [lr, #(14*8)]             @ r7 = 'd7'
-
-	@ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
-	orr r9, r1, r3
-	orr r10, r5, r7
-	orrs r10, r9, r10
-	beq empty_odd_column
-
-	stmdb   sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
-		
-	add r0, r3, r5	                @ r0 = 'z2'
-	add r2, r1, r7                  @ r2 = 'z1'
-	add r4, r3, r7                  @ r4 = 'z3'
-	add r6, r1, r5                  @ r6 = 'z4'
-	ldr r9, [r11, #FIX_1_175875602_ID]
-	add r8, r4, r6
-	ldr r10, [r11, #FIX_M_0_899976223_ID]
-	mul r8, r9, r8                  @ r8 = 'z5'
-	ldr r9, [r11, #FIX_M_2_562915447_ID]
-	mul r2, r10, r2                 @ r2 = 'z1'
-	ldr r10, [r11, #FIX_M_1_961570560_ID]
-	mul r0, r9, r0                  @ r0 = 'z2'
-	ldr r9, [r11, #FIX_M_0_390180644_ID]
-	mla r4, r10, r4, r8             @ r4 = 'z3'
-	ldr r10, [r11, #FIX_0_298631336_ID]
-	mla r6, r9, r6, r8              @ r6 = 'z4'
-	ldr r9, [r11, #FIX_2_053119869_ID]
-	mla r7, r10, r7, r2             @ r7 = tmp0 + z1
-	ldr r10, [r11, #FIX_3_072711026_ID]
-	mla r5, r9, r5, r0              @ r5 = tmp1 + z2
-	ldr r9, [r11, #FIX_1_501321110_ID]
-	mla r3, r10, r3, r0             @ r3 = tmp2 + z2
-	add r7, r7, r4                  @ r7 = tmp0
-	mla r1, r9, r1, r2              @ r1 = tmp3 + z1
-	add r5,	r5, r6                  @ r5 = tmp1
-	add r3, r3, r4                  @ r3 = tmp2
-	add r1, r1, r6                  @ r1 = tmp3	
-	
-	ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
-	                              @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0	
-
-	@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
-	add r8, r0, r1
-	add r8, r8, #(1<<17)
-	mov r8, r8, asr #18
-	strh r8, [lr, #( 0*8)]
-	
-	@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
-	sub r8, r0, r1
-	add r8, r8, #(1<<17)
-	mov r8, r8, asr #18
-	strh r8, [lr, #(14*8)]
-	
-	@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
-	add r8, r4, r3
-	add r8, r8, #(1<<17)
-	mov r8, r8, asr #18
-	strh r8, [lr, #( 2*8)]
-	
-	@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
-	sub r8, r4, r3
-	add r8, r8, #(1<<17)
-	mov r8, r8, asr #18
-	strh r8, [lr, #(12*8)]
-	
-	@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
-	add r8, r6, r5
-	add r8, r8, #(1<<17)
-	mov r8, r8, asr #18
-	strh r8, [lr, #( 4*8)]
-	
-	@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
-	sub r8, r6, r5
-	add r8, r8, #(1<<17)
-	mov r8, r8, asr #18
-	strh r8, [lr, #(10*8)]
-	
-	@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
-	add r8, r2, r7
-	add r8, r8, #(1<<17)
-	mov r8, r8, asr #18
-	strh r8, [lr, #( 6*8)]
-	
-	@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
-	sub r8, r2, r7
-	add r8, r8, #(1<<17)
-	mov r8, r8, asr #18
-	strh r8, [lr, #( 8*8)]
-
-	@ End of row loop
-	add lr, lr, #2
-	subs r12, r12, #1
-	bne column_loop
-	beq the_end
-	
+        ldrsh r0, [lr, #( 0*8)]             @ r0 = 'd0'
+        ldrsh r2, [lr, #( 4*8)]             @ r2 = 'd2'
+        ldrsh r4, [lr, #( 8*8)]             @ r4 = 'd4'
+        ldrsh r6, [lr, #(12*8)]             @ r6 = 'd6'
+
+        ldr r3, [r11, #FIX_0_541196100_ID]
+        add r1, r2, r6
+        ldr r5, [r11, #FIX_M_1_847759065_ID]
+        mul r1, r3, r1                      @ r1 = z1
+        ldr r3, [r11, #FIX_0_765366865_ID]
+        mla r6, r5, r6, r1                  @ r6 = tmp2
+        add r5, r0, r4                      @ r5 = tmp0
+        mla r2, r3, r2, r1                  @ r2 = tmp3
+        sub r3, r0, r4                      @ r3 = tmp1
+
+        add r0, r2, r5, lsl #13             @ r0 = tmp10
+        rsb r2, r2, r5, lsl #13             @ r2 = tmp13
+        add r4, r6, r3, lsl #13             @ r4 = tmp11
+        rsb r6, r6, r3, lsl #13             @ r6 = tmp12
+
+        ldrsh r1, [lr, #( 2*8)]             @ r1 = 'd1'
+        ldrsh r3, [lr, #( 6*8)]             @ r3 = 'd3'
+        ldrsh r5, [lr, #(10*8)]             @ r5 = 'd5'
+        ldrsh r7, [lr, #(14*8)]             @ r7 = 'd7'
+
+        @ Check for empty odd column (happens about 20 to 25 % of the time according to my stats)
+        orr r9, r1, r3
+        orr r10, r5, r7
+        orrs r10, r9, r10
+        beq empty_odd_column
+
+        stmdb   sp!, { r0, r2, r4, r6 } @ save on the stack tmp10, tmp13, tmp12, tmp11
+
+        add r0, r3, r5                  @ r0 = 'z2'
+        add r2, r1, r7                  @ r2 = 'z1'
+        add r4, r3, r7                  @ r4 = 'z3'
+        add r6, r1, r5                  @ r6 = 'z4'
+        ldr r9, [r11, #FIX_1_175875602_ID]
+        add r8, r4, r6
+        ldr r10, [r11, #FIX_M_0_899976223_ID]
+        mul r8, r9, r8                  @ r8 = 'z5'
+        ldr r9, [r11, #FIX_M_2_562915447_ID]
+        mul r2, r10, r2                 @ r2 = 'z1'
+        ldr r10, [r11, #FIX_M_1_961570560_ID]
+        mul r0, r9, r0                  @ r0 = 'z2'
+        ldr r9, [r11, #FIX_M_0_390180644_ID]
+        mla r4, r10, r4, r8             @ r4 = 'z3'
+        ldr r10, [r11, #FIX_0_298631336_ID]
+        mla r6, r9, r6, r8              @ r6 = 'z4'
+        ldr r9, [r11, #FIX_2_053119869_ID]
+        mla r7, r10, r7, r2             @ r7 = tmp0 + z1
+        ldr r10, [r11, #FIX_3_072711026_ID]
+        mla r5, r9, r5, r0              @ r5 = tmp1 + z2
+        ldr r9, [r11, #FIX_1_501321110_ID]
+        mla r3, r10, r3, r0             @ r3 = tmp2 + z2
+        add r7, r7, r4                  @ r7 = tmp0
+        mla r1, r9, r1, r2              @ r1 = tmp3 + z1
+        add r5,        r5, r6                  @ r5 = tmp1
+        add r3, r3, r4                  @ r3 = tmp2
+        add r1, r1, r6                  @ r1 = tmp3
+
+        ldmia sp!, { r0, r2, r4, r6 } @ r0 = tmp10 / r2 = tmp13 / r4 = tmp11 / r6 = tmp12
+                                      @ r1 = tmp3  / r3 = tmp2  / r5 = tmp1  / r7 = tmp0
+
+        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
+        add r8, r0, r1
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #( 0*8)]
+
+        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
+        sub r8, r0, r1
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #(14*8)]
+
+        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
+        add r8, r4, r3
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #( 2*8)]
+
+        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
+        sub r8, r4, r3
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #(12*8)]
+
+        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
+        add r8, r6, r5
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #( 4*8)]
+
+        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
+        sub r8, r6, r5
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #(10*8)]
+
+        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
+        add r8, r2, r7
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #( 6*8)]
+
+        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
+        sub r8, r2, r7
+        add r8, r8, #(1<<17)
+        mov r8, r8, asr #18
+        strh r8, [lr, #( 8*8)]
+
+        @ End of row loop
+        add lr, lr, #2
+        subs r12, r12, #1
+        bne column_loop
+        beq the_end
+
 empty_odd_column:
-	@ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
-	@ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
-	add r0, r0, #(1<<17)
-	mov r0, r0, asr #18
-	strh r0, [lr, #( 0*8)]
-	strh r0, [lr, #(14*8)]
-	
-	@ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
-	@ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
-	add r4, r4, #(1<<17)
-	mov r4, r4, asr #18
-	strh r4, [lr, #( 2*8)]
-	strh r4, [lr, #(12*8)]
-	
-	@ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
-	@ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
-	add r6, r6, #(1<<17)
-	mov r6, r6, asr #18
-	strh r6, [lr, #( 4*8)]
-	strh r6, [lr, #(10*8)]
-	
-	@ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
-	@ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
-	add r2, r2, #(1<<17)
-	mov r2, r2, asr #18
-	strh r2, [lr, #( 6*8)]
-	strh r2, [lr, #( 8*8)]
-
-	@ End of row loop
-	add lr, lr, #2
-	subs r12, r12, #1
-	bne column_loop
-		
-the_end:	
-	@ The end....
-	add sp, sp, #4
-	ldmia   sp!, { r4 - r12, pc }   @ restore callee saved regs and return
+        @ Compute DESCALE(tmp10 + tmp3, CONST_BITS+PASS1_BITS+3)
+        @ Compute DESCALE(tmp10 - tmp3, CONST_BITS+PASS1_BITS+3)
+        add r0, r0, #(1<<17)
+        mov r0, r0, asr #18
+        strh r0, [lr, #( 0*8)]
+        strh r0, [lr, #(14*8)]
+
+        @ Compute DESCALE(tmp11 + tmp2, CONST_BITS+PASS1_BITS+3)
+        @ Compute DESCALE(tmp11 - tmp2, CONST_BITS+PASS1_BITS+3)
+        add r4, r4, #(1<<17)
+        mov r4, r4, asr #18
+        strh r4, [lr, #( 2*8)]
+        strh r4, [lr, #(12*8)]
+
+        @ Compute DESCALE(tmp12 + tmp1, CONST_BITS+PASS1_BITS+3)
+        @ Compute DESCALE(tmp12 - tmp1, CONST_BITS+PASS1_BITS+3)
+        add r6, r6, #(1<<17)
+        mov r6, r6, asr #18
+        strh r6, [lr, #( 4*8)]
+        strh r6, [lr, #(10*8)]
+
+        @ Compute DESCALE(tmp13 + tmp0, CONST_BITS+PASS1_BITS+3)
+        @ Compute DESCALE(tmp13 - tmp0, CONST_BITS+PASS1_BITS+3)
+        add r2, r2, #(1<<17)
+        mov r2, r2, asr #18
+        strh r2, [lr, #( 6*8)]
+        strh r2, [lr, #( 8*8)]
+
+        @ End of row loop
+        add lr, lr, #2
+        subs r12, r12, #1
+        bne column_loop
+
+the_end:
+        @ The end....
+        add sp, sp, #4
+        ldmia   sp!, { r4 - r12, pc }   @ restore callee saved regs and return
 
 const_array:
-	.align
-	.word FIX_0_298631336
-	.word FIX_0_541196100
-	.word FIX_0_765366865
-	.word FIX_1_175875602
-	.word FIX_1_501321110
-	.word FIX_2_053119869
-	.word FIX_3_072711026
-	.word FIX_M_0_390180644
-	.word FIX_M_0_899976223
-	.word FIX_M_1_847759065
-	.word FIX_M_1_961570560
-	.word FIX_M_2_562915447
-	.word FIX_0xFFFF
+        .align
+        .word FIX_0_298631336
+        .word FIX_0_541196100
+        .word FIX_0_765366865
+        .word FIX_1_175875602
+        .word FIX_1_501321110
+        .word FIX_2_053119869
+        .word FIX_3_072711026
+        .word FIX_M_0_390180644
+        .word FIX_M_0_899976223
+        .word FIX_M_1_847759065
+        .word FIX_M_1_961570560
+        .word FIX_M_2_562915447
+        .word FIX_0xFFFF
diff --git a/src/libffmpeg/libavcodec/armv4l/mpegvideo_arm.c b/src/libffmpeg/libavcodec/armv4l/mpegvideo_arm.c
index 6e4c9fb3c..263e3c5bc 100644
--- a/src/libffmpeg/libavcodec/armv4l/mpegvideo_arm.c
+++ b/src/libffmpeg/libavcodec/armv4l/mpegvideo_arm.c
@@ -13,7 +13,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
diff --git a/src/libffmpeg/libavcodec/armv4l/simple_idct_arm.S b/src/libffmpeg/libavcodec/armv4l/simple_idct_arm.S
index 95ac0dee4..43751896d 100644
--- a/src/libffmpeg/libavcodec/armv4l/simple_idct_arm.S
+++ b/src/libffmpeg/libavcodec/armv4l/simple_idct_arm.S
@@ -1,4 +1,4 @@
-/* 
+/*
  * simple_idct_arm.S
  * Copyright (C) 2002 Frederic 'dilb' Boulay.
  * All Rights Reserved.
@@ -16,11 +16,11 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  *
  * The function defined in this file, is derived from the simple_idct function
- * from the libavcodec library part of the ffmpeg project. 
+ * from the libavcodec library part of the ffmpeg project.
  */
 
 /* useful constants for the algorithm, they are save in __constant_ptr__ at */
@@ -51,9 +51,9 @@
 #define COL_SHIFTED_1 524288 /* 1<< (COL_SHIFT-1) */
 
 
-	.text
-	.align
-	.global simple_idct_ARM
+        .text
+        .align
+        .global simple_idct_ARM
 
 simple_idct_ARM:
         @@ void simple_idct_ARM(int16_t *block)
@@ -120,8 +120,8 @@ __b_evaluation:
         ldr r11, [r12, #offW7]   @ R11=W7
         mul r5, r10, r7          @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
         mul r7, r11, r7          @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
-		teq r2, #0               @ if null avoid muls
-		mlane r0, r9, r2, r0     @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+                teq r2, #0               @ if null avoid muls
+                mlane r0, r9, r2, r0     @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
         rsbne r2, r2, #0         @ R2=-ROWr16[3]
         mlane r1, r11, r2, r1    @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
         mlane r5, r8, r2, r5     @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
@@ -147,7 +147,7 @@ __b_evaluation:
         @@ MAC16(b3, -W1, row[7]);
         @@ MAC16(b1, -W5, row[7]);
         mov r3, r3, asr #16      @ R3=ROWr16[5]
-		teq r3, #0               @ if null avoid muls
+                teq r3, #0               @ if null avoid muls
         mlane r0, r10, r3, r0    @ R0+=W5*ROWr16[5]=b0
         mov r4, r4, asr #16      @ R4=ROWr16[7]
         mlane r5, r11, r3, r5    @ R5+=W7*ROWr16[5]=b2
@@ -155,7 +155,7 @@ __b_evaluation:
         rsbne r3, r3, #0         @ R3=-ROWr16[5]
         mlane r1, r8, r3, r1     @ R7-=W1*ROWr16[5]=b1
         @@ R3 is free now
-		teq r4, #0               @ if null avoid muls
+                teq r4, #0               @ if null avoid muls
         mlane r0, r11, r4, r0    @ R0+=W7*ROWr16[7]=b0
         mlane r5, r9, r4, r5     @ R5+=W3*ROWr16[7]=b2
         rsbne r4, r4, #0         @ R4=-ROWr16[7]
@@ -187,7 +187,7 @@ __a_evaluation:
         teq r2, #0
         beq __end_bef_a_evaluation
 
-	add r2, r6, r11          @ R2=a0+W6*ROWr16[2] (a1)
+        add r2, r6, r11          @ R2=a0+W6*ROWr16[2] (a1)
         mul r11, r8, r4          @ R11=W2*ROWr16[2]
         sub r4, r6, r11          @ R4=a0-W2*ROWr16[2] (a3)
         add r6, r6, r11          @ R6=a0+W2*ROWr16[2] (a0)
@@ -203,7 +203,7 @@ __a_evaluation:
         @@ a2 -= W4*row[4]
         @@ a3 += W4*row[4]
         ldrsh r11, [r14, #8]     @ R11=ROWr16[4]
-		teq r11, #0              @ if null avoid muls
+                teq r11, #0              @ if null avoid muls
         mulne r11, r9, r11       @ R11=W4*ROWr16[4]
         @@ R9 is free now
         ldrsh r9, [r14, #12]     @ R9=ROWr16[6]
@@ -212,7 +212,7 @@ __a_evaluation:
         subne r3, r3, r11        @ R3-=W4*ROWr16[4] (a2)
         addne r4, r4, r11        @ R4+=W4*ROWr16[4] (a3)
         @@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
-		teq r9, #0               @ if null avoid muls
+                teq r9, #0               @ if null avoid muls
         mulne r11, r10, r9       @ R11=W6*ROWr16[6]
         addne r6, r6, r11        @ R6+=W6*ROWr16[6] (a0)
         mulne r10, r8, r9        @ R10=W2*ROWr16[6]
@@ -294,165 +294,165 @@ __end_row_loop:
 
 
 
-	@@ at this point, R0=block, R1-R11 (free)
-	@@     R12=__const_ptr_, R14=&block[n]
-	add r14, r0, #14        @ R14=&block[7], better start from the last col, and decrease the value until col=0, i.e. R14=block.
+        @@ at this point, R0=block, R1-R11 (free)
+        @@     R12=__const_ptr_, R14=&block[n]
+        add r14, r0, #14        @ R14=&block[7], better start from the last col, and decrease the value until col=0, i.e. R14=block.
 __col_loop:
 
 __b_evaluation2:
-	@@ at this point, R0=block (temp),  R1-R11 (free)
-	@@     R12=__const_ptr_, R14=&block[n]
-	@@ proceed with b0-b3 first, followed by a0-a3
-	@@ MUL16(b0, W1, col[8x1]);
-	@@ MUL16(b1, W3, col[8x1]);
-	@@ MUL16(b2, W5, col[8x1]);
-	@@ MUL16(b3, W7, col[8x1]);
-	@@ MAC16(b0, W3, col[8x3]);
-	@@ MAC16(b1, -W7, col[8x3]);
-	@@ MAC16(b2, -W1, col[8x3]);
-	@@ MAC16(b3, -W5, col[8x3]);
-	ldr r8, [r12, #offW1]    @ R8=W1
-	ldrsh r7, [r14, #16]
-	mul r0, r8, r7           @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
-	ldr r9, [r12, #offW3]    @ R9=W3
-	ldr r10, [r12, #offW5]   @ R10=W5
-	mul r1, r9, r7           @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
-	ldr r11, [r12, #offW7]   @ R11=W7
-	mul r5, r10, r7          @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
-	ldrsh r2, [r14, #48]
-	mul r7, r11, r7          @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
-	teq r2, #0               @ if 0, then avoid muls
-	mlane r0, r9, r2, r0     @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
-	rsbne r2, r2, #0         @ R2=-ROWr16[3]
-	mlane r1, r11, r2, r1    @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
-	mlane r5, r8, r2, r5     @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
-	mlane r7, r10, r2, r7    @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
-
-	@@ at this point, R0=b0,  R1=b1, R2 (free), R3 (free), R4 (free),
-	@@     R5=b2, R6 (free), R7=b3, R8=W1, R9=W3, R10=W5, R11=W7,
-	@@     R12=__const_ptr_, R14=&block[n]
-	@@ MAC16(b0, W5, col[5x8]);
-	@@ MAC16(b2, W7, col[5x8]);
-	@@ MAC16(b3, W3, col[5x8]);
-	@@ MAC16(b1, -W1, col[5x8]);
-	@@ MAC16(b0, W7, col[7x8]);
-	@@ MAC16(b2, W3, col[7x8]);
-	@@ MAC16(b3, -W1, col[7x8]);
-	@@ MAC16(b1, -W5, col[7x8]);
-	ldrsh r3, [r14, #80]     @ R3=COLr16[5x8]
-	teq r3, #0               @ if 0 then avoid muls
-	mlane r0, r10, r3, r0    @ R0+=W5*ROWr16[5x8]=b0
-	mlane r5, r11, r3, r5    @ R5+=W7*ROWr16[5x8]=b2
-	mlane r7, r9, r3, r7     @ R7+=W3*ROWr16[5x8]=b3
-	rsbne r3, r3, #0         @ R3=-ROWr16[5x8]
-	ldrsh r4, [r14, #112]    @ R4=COLr16[7x8]
-	mlane r1, r8, r3, r1     @ R7-=W1*ROWr16[5x8]=b1
-	@@ R3 is free now
-	teq r4, #0               @ if 0 then avoid muls
-	mlane r0, r11, r4, r0    @ R0+=W7*ROWr16[7x8]=b0
-	mlane r5, r9, r4, r5     @ R5+=W3*ROWr16[7x8]=b2
-	rsbne r4, r4, #0         @ R4=-ROWr16[7x8]
-	mlane r7, r8, r4, r7     @ R7-=W1*ROWr16[7x8]=b3
-	mlane r1, r10, r4, r1    @ R1-=W5*ROWr16[7x8]=b1
-	@@ R4 is free now
+        @@ at this point, R0=block (temp),  R1-R11 (free)
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ proceed with b0-b3 first, followed by a0-a3
+        @@ MUL16(b0, W1, col[8x1]);
+        @@ MUL16(b1, W3, col[8x1]);
+        @@ MUL16(b2, W5, col[8x1]);
+        @@ MUL16(b3, W7, col[8x1]);
+        @@ MAC16(b0, W3, col[8x3]);
+        @@ MAC16(b1, -W7, col[8x3]);
+        @@ MAC16(b2, -W1, col[8x3]);
+        @@ MAC16(b3, -W5, col[8x3]);
+        ldr r8, [r12, #offW1]    @ R8=W1
+        ldrsh r7, [r14, #16]
+        mul r0, r8, r7           @ R0=W1*ROWr16[1]=b0 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+        ldr r9, [r12, #offW3]    @ R9=W3
+        ldr r10, [r12, #offW5]   @ R10=W5
+        mul r1, r9, r7           @ R1=W3*ROWr16[1]=b1 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+        ldr r11, [r12, #offW7]   @ R11=W7
+        mul r5, r10, r7          @ R5=W5*ROWr16[1]=b2 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+        ldrsh r2, [r14, #48]
+        mul r7, r11, r7          @ R7=W7*ROWr16[1]=b3 (ROWr16[1] must be the second arg, to have the possibility to save 1 cycle)
+        teq r2, #0               @ if 0, then avoid muls
+        mlane r0, r9, r2, r0     @ R0+=W3*ROWr16[3]=b0 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+        rsbne r2, r2, #0         @ R2=-ROWr16[3]
+        mlane r1, r11, r2, r1    @ R1-=W7*ROWr16[3]=b1 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+        mlane r5, r8, r2, r5     @ R5-=W1*ROWr16[3]=b2 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+        mlane r7, r10, r2, r7    @ R7-=W5*ROWr16[3]=b3 (ROWr16[3] must be the second arg, to have the possibility to save 1 cycle)
+
+        @@ at this point, R0=b0,  R1=b1, R2 (free), R3 (free), R4 (free),
+        @@     R5=b2, R6 (free), R7=b3, R8=W1, R9=W3, R10=W5, R11=W7,
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ MAC16(b0, W5, col[5x8]);
+        @@ MAC16(b2, W7, col[5x8]);
+        @@ MAC16(b3, W3, col[5x8]);
+        @@ MAC16(b1, -W1, col[5x8]);
+        @@ MAC16(b0, W7, col[7x8]);
+        @@ MAC16(b2, W3, col[7x8]);
+        @@ MAC16(b3, -W1, col[7x8]);
+        @@ MAC16(b1, -W5, col[7x8]);
+        ldrsh r3, [r14, #80]     @ R3=COLr16[5x8]
+        teq r3, #0               @ if 0 then avoid muls
+        mlane r0, r10, r3, r0    @ R0+=W5*ROWr16[5x8]=b0
+        mlane r5, r11, r3, r5    @ R5+=W7*ROWr16[5x8]=b2
+        mlane r7, r9, r3, r7     @ R7+=W3*ROWr16[5x8]=b3
+        rsbne r3, r3, #0         @ R3=-ROWr16[5x8]
+        ldrsh r4, [r14, #112]    @ R4=COLr16[7x8]
+        mlane r1, r8, r3, r1     @ R7-=W1*ROWr16[5x8]=b1
+        @@ R3 is free now
+        teq r4, #0               @ if 0 then avoid muls
+        mlane r0, r11, r4, r0    @ R0+=W7*ROWr16[7x8]=b0
+        mlane r5, r9, r4, r5     @ R5+=W3*ROWr16[7x8]=b2
+        rsbne r4, r4, #0         @ R4=-ROWr16[7x8]
+        mlane r7, r8, r4, r7     @ R7-=W1*ROWr16[7x8]=b3
+        mlane r1, r10, r4, r1    @ R1-=W5*ROWr16[7x8]=b1
+        @@ R4 is free now
 __end_b_evaluation2:
-	@@ at this point, R0=b0,  R1=b1, R2 (free), R3 (free), R4 (free),
-	@@     R5=b2, R6 (free), R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
-	@@     R12=__const_ptr_, R14=&block[n]
+        @@ at this point, R0=b0,  R1=b1, R2 (free), R3 (free), R4 (free),
+        @@     R5=b2, R6 (free), R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
+        @@     R12=__const_ptr_, R14=&block[n]
 
 __a_evaluation2:
-	@@ a0 = (W4 * col[8x0]) + (1 << (COL_SHIFT - 1));
-	@@ a1 = a0 + W6 * row[2];
-	@@ a2 = a0 - W6 * row[2];
-	@@ a3 = a0 - W2 * row[2];
-	@@ a0 = a0 + W2 * row[2];
-	ldrsh r6, [r14, #0]
-	ldr r9, [r12, #offW4]    @ R9=W4
-	mul r6, r9, r6           @ R6=W4*ROWr16[0]
-	ldr r10, [r12, #offW6]   @ R10=W6
-	ldrsh r4, [r14, #32]      @ R4=ROWr16[2] (a3 not defined yet)
-	add r6, r6, #COL_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(COL_SHIFT-1) (a0)
-	mul r11, r10, r4         @ R11=W6*ROWr16[2]
-	ldr r8, [r12, #offW2]    @ R8=W2
-	add r2, r6, r11          @ R2=a0+W6*ROWr16[2] (a1)
-	sub r3, r6, r11          @ R3=a0-W6*ROWr16[2] (a2)
-	mul r11, r8, r4          @ R11=W2*ROWr16[2]
-	sub r4, r6, r11          @ R4=a0-W2*ROWr16[2] (a3)
-	add r6, r6, r11          @ R6=a0+W2*ROWr16[2] (a0)
-
-	@@ at this point, R0=b0,  R1=b1, R2=a1, R3=a2, R4=a3,
-	@@     R5=b2, R6=a0, R7=b3, R8=W2, R9=W4, R10=W6, R11 (free),
-	@@     R12=__const_ptr_, R14=&block[n]
-	@@ a0 += W4*row[4]
-	@@ a1 -= W4*row[4]
-	@@ a2 -= W4*row[4]
-	@@ a3 += W4*row[4]
-	ldrsh r11, [r14, #64]     @ R11=ROWr16[4]
-	teq r11, #0              @ if null avoid muls
-	mulne r11, r9, r11       @ R11=W4*ROWr16[4]
-	@@ R9 is free now
-	addne r6, r6, r11        @ R6+=W4*ROWr16[4] (a0)
-	subne r2, r2, r11        @ R2-=W4*ROWr16[4] (a1)
-	subne r3, r3, r11        @ R3-=W4*ROWr16[4] (a2)
-	ldrsh r9, [r14, #96]     @ R9=ROWr16[6]
-	addne r4, r4, r11        @ R4+=W4*ROWr16[4] (a3)
-	@@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
-	teq r9, #0               @ if null avoid muls
-	mulne r11, r10, r9       @ R11=W6*ROWr16[6]
-	addne r6, r6, r11        @ R6+=W6*ROWr16[6] (a0)
-	mulne r10, r8, r9        @ R10=W2*ROWr16[6]
-	@@ a0 += W6*row[6];
-	@@ a3 -= W6*row[6];
-	@@ a1 -= W2*row[6];
-	@@ a2 += W2*row[6];
-	subne r4, r4, r11        @ R4-=W6*ROWr16[6] (a3)
-	subne r2, r2, r10        @ R2-=W2*ROWr16[6] (a1)
-	addne r3, r3, r10        @ R3+=W2*ROWr16[6] (a2)
+        @@ a0 = (W4 * col[8x0]) + (1 << (COL_SHIFT - 1));
+        @@ a1 = a0 + W6 * row[2];
+        @@ a2 = a0 - W6 * row[2];
+        @@ a3 = a0 - W2 * row[2];
+        @@ a0 = a0 + W2 * row[2];
+        ldrsh r6, [r14, #0]
+        ldr r9, [r12, #offW4]    @ R9=W4
+        mul r6, r9, r6           @ R6=W4*ROWr16[0]
+        ldr r10, [r12, #offW6]   @ R10=W6
+        ldrsh r4, [r14, #32]     @ R4=ROWr16[2] (a3 not defined yet)
+        add r6, r6, #COL_SHIFTED_1 @ R6=W4*ROWr16[0] + 1<<(COL_SHIFT-1) (a0)
+        mul r11, r10, r4         @ R11=W6*ROWr16[2]
+        ldr r8, [r12, #offW2]    @ R8=W2
+        add r2, r6, r11          @ R2=a0+W6*ROWr16[2] (a1)
+        sub r3, r6, r11          @ R3=a0-W6*ROWr16[2] (a2)
+        mul r11, r8, r4          @ R11=W2*ROWr16[2]
+        sub r4, r6, r11          @ R4=a0-W2*ROWr16[2] (a3)
+        add r6, r6, r11          @ R6=a0+W2*ROWr16[2] (a0)
+
+        @@ at this point, R0=b0,  R1=b1, R2=a1, R3=a2, R4=a3,
+        @@     R5=b2, R6=a0, R7=b3, R8=W2, R9=W4, R10=W6, R11 (free),
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ a0 += W4*row[4]
+        @@ a1 -= W4*row[4]
+        @@ a2 -= W4*row[4]
+        @@ a3 += W4*row[4]
+        ldrsh r11, [r14, #64]    @ R11=ROWr16[4]
+        teq r11, #0              @ if null avoid muls
+        mulne r11, r9, r11       @ R11=W4*ROWr16[4]
+        @@ R9 is free now
+        addne r6, r6, r11        @ R6+=W4*ROWr16[4] (a0)
+        subne r2, r2, r11        @ R2-=W4*ROWr16[4] (a1)
+        subne r3, r3, r11        @ R3-=W4*ROWr16[4] (a2)
+        ldrsh r9, [r14, #96]     @ R9=ROWr16[6]
+        addne r4, r4, r11        @ R4+=W4*ROWr16[4] (a3)
+        @@ W6 alone is no more useful, save W2*ROWr16[6] in it instead
+        teq r9, #0               @ if null avoid muls
+        mulne r11, r10, r9       @ R11=W6*ROWr16[6]
+        addne r6, r6, r11        @ R6+=W6*ROWr16[6] (a0)
+        mulne r10, r8, r9        @ R10=W2*ROWr16[6]
+        @@ a0 += W6*row[6];
+        @@ a3 -= W6*row[6];
+        @@ a1 -= W2*row[6];
+        @@ a2 += W2*row[6];
+        subne r4, r4, r11        @ R4-=W6*ROWr16[6] (a3)
+        subne r2, r2, r10        @ R2-=W2*ROWr16[6] (a1)
+        addne r3, r3, r10        @ R3+=W2*ROWr16[6] (a2)
 __end_a_evaluation2:
-	@@ at this point, R0=b0,  R1=b1, R2=a1, R3=a2, R4=a3,
-	@@     R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
-	@@     R12=__const_ptr_, R14=&block[n]
-	@@ col[0 ] = ((a0 + b0) >> COL_SHIFT);
-	@@ col[8 ] = ((a1 + b1) >> COL_SHIFT);
-	@@ col[16] = ((a2 + b2) >> COL_SHIFT);
-	@@ col[24] = ((a3 + b3) >> COL_SHIFT);
-	@@ col[32] = ((a3 - b3) >> COL_SHIFT);
-	@@ col[40] = ((a2 - b2) >> COL_SHIFT);
-	@@ col[48] = ((a1 - b1) >> COL_SHIFT);
-	@@ col[56] = ((a0 - b0) >> COL_SHIFT);
-	@@@@@ no optimisation here @@@@@
-	add r8, r6, r0           @ R8=a0+b0
-	add r9, r2, r1           @ R9=a1+b1
-	mov r8, r8, asr #COL_SHIFT
-	mov r9, r9, asr #COL_SHIFT
-	strh r8, [r14, #0]
-	strh r9, [r14, #16]
-	add r8, r3, r5           @ R8=a2+b2
-	add r9, r4, r7           @ R9=a3+b3
-	mov r8, r8, asr #COL_SHIFT
-	mov r9, r9, asr #COL_SHIFT
-	strh r8, [r14, #32]
-	strh r9, [r14, #48]
-	sub r8, r4, r7           @ R8=a3-b3
-	sub r9, r3, r5           @ R9=a2-b2
-	mov r8, r8, asr #COL_SHIFT
-	mov r9, r9, asr #COL_SHIFT
-	strh r8, [r14, #64]
-	strh r9, [r14, #80]
-	sub r8, r2, r1           @ R8=a1-b1
-	sub r9, r6, r0           @ R9=a0-b0
-	mov r8, r8, asr #COL_SHIFT
-	mov r9, r9, asr #COL_SHIFT
-	strh r8, [r14, #96]
-	strh r9, [r14, #112]
+        @@ at this point, R0=b0,  R1=b1, R2=a1, R3=a2, R4=a3,
+        @@     R5=b2, R6=a0, R7=b3, R8 (free), R9 (free), R10 (free), R11 (free),
+        @@     R12=__const_ptr_, R14=&block[n]
+        @@ col[0 ] = ((a0 + b0) >> COL_SHIFT);
+        @@ col[8 ] = ((a1 + b1) >> COL_SHIFT);
+        @@ col[16] = ((a2 + b2) >> COL_SHIFT);
+        @@ col[24] = ((a3 + b3) >> COL_SHIFT);
+        @@ col[32] = ((a3 - b3) >> COL_SHIFT);
+        @@ col[40] = ((a2 - b2) >> COL_SHIFT);
+        @@ col[48] = ((a1 - b1) >> COL_SHIFT);
+        @@ col[56] = ((a0 - b0) >> COL_SHIFT);
+        @@@@@ no optimisation here @@@@@
+        add r8, r6, r0           @ R8=a0+b0
+        add r9, r2, r1           @ R9=a1+b1
+        mov r8, r8, asr #COL_SHIFT
+        mov r9, r9, asr #COL_SHIFT
+        strh r8, [r14, #0]
+        strh r9, [r14, #16]
+        add r8, r3, r5           @ R8=a2+b2
+        add r9, r4, r7           @ R9=a3+b3
+        mov r8, r8, asr #COL_SHIFT
+        mov r9, r9, asr #COL_SHIFT
+        strh r8, [r14, #32]
+        strh r9, [r14, #48]
+        sub r8, r4, r7           @ R8=a3-b3
+        sub r9, r3, r5           @ R9=a2-b2
+        mov r8, r8, asr #COL_SHIFT
+        mov r9, r9, asr #COL_SHIFT
+        strh r8, [r14, #64]
+        strh r9, [r14, #80]
+        sub r8, r2, r1           @ R8=a1-b1
+        sub r9, r6, r0           @ R9=a0-b0
+        mov r8, r8, asr #COL_SHIFT
+        mov r9, r9, asr #COL_SHIFT
+        strh r8, [r14, #96]
+        strh r9, [r14, #112]
 
 __end_col_loop:
-	@@ at this point, R0-R11 (free)
-	@@     R12=__const_ptr_, R14=&block[n]
-	ldr r0, [sp, #0]         @ R0=block
-	teq r0, r14              @ compare current &block[n] to block, when block is reached, the loop is finished.
-	sub r14, r14, #2
-	bne __col_loop
+        @@ at this point, R0-R11 (free)
+        @@     R12=__const_ptr_, R14=&block[n]
+        ldr r0, [sp, #0]         @ R0=block
+        teq r0, r14              @ compare current &block[n] to block, when block is reached, the loop is finished.
+        sub r14, r14, #2
+        bne __col_loop
 
 
 
@@ -466,15 +466,15 @@ __end_simple_idct_ARM:
 
 @@ kind of sub-function, here not to overload the common case.
 __end_bef_a_evaluation:
-	add r2, r6, r11          @ R2=a0+W6*ROWr16[2] (a1)
+        add r2, r6, r11          @ R2=a0+W6*ROWr16[2] (a1)
         mul r11, r8, r4          @ R11=W2*ROWr16[2]
         sub r4, r6, r11          @ R4=a0-W2*ROWr16[2] (a3)
         add r6, r6, r11          @ R6=a0+W2*ROWr16[2] (a0)
-	bal __end_a_evaluation
+        bal __end_a_evaluation
 
 
 __constant_ptr__:  @@ see #defines at the beginning of the source code for values.
-	.align
+        .align
         .word   W1
         .word   W2
         .word   W3
diff --git a/src/libffmpeg/libavcodec/asv1.c b/src/libffmpeg/libavcodec/asv1.c
index e07880e4f..1cb15d812 100644
--- a/src/libffmpeg/libavcodec/asv1.c
+++ b/src/libffmpeg/libavcodec/asv1.c
@@ -14,14 +14,14 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
- 
+
 /**
  * @file asv1.c
  * ASUS V1/V2 codec.
  */
- 
+
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
@@ -31,7 +31,7 @@
 
 #define VLC_BITS 6
 #define ASV2_LEVEL_VLC_BITS 10
- 
+
 typedef struct ASV1Context{
     AVCodecContext *avctx;
     DSPContext dsp;
@@ -44,11 +44,11 @@ typedef struct ASV1Context{
     int mb_height;
     int mb_width2;
     int mb_height2;
-    DCTELEM __align8 block[6][64];
-    uint16_t __align8 intra_matrix[64];
-    int __align8 q_intra_matrix[64];
+    DECLARE_ALIGNED_8(DCTELEM, block[6][64]);
+    DECLARE_ALIGNED_8(uint16_t, intra_matrix[64]);
+    DECLARE_ALIGNED_8(int, q_intra_matrix[64]);
     uint8_t *bitstream_buffer;
-    int bitstream_buffer_size;
+    unsigned int bitstream_buffer_size;
 } ASV1Context;
 
 static const uint8_t scantab[64]={
@@ -66,7 +66,7 @@ static const uint8_t scantab[64]={
 static const uint8_t ccp_tab[17][2]={
     {0x2,2}, {0x7,5}, {0xB,5}, {0x3,5},
     {0xD,5}, {0x5,5}, {0x9,5}, {0x1,5},
-    {0xE,5}, {0x6,5}, {0xA,5}, {0x2,5}, 
+    {0xE,5}, {0x6,5}, {0xA,5}, {0x2,5},
     {0xC,5}, {0x4,5}, {0x8,5}, {0x3,2},
     {0xF,5}, //EOB
 };
@@ -116,19 +116,19 @@ static void init_vlcs(ASV1Context *a){
     if (!done) {
         done = 1;
 
-        init_vlc(&ccp_vlc, VLC_BITS, 17, 
+        init_vlc(&ccp_vlc, VLC_BITS, 17,
                  &ccp_tab[0][1], 2, 1,
                  &ccp_tab[0][0], 2, 1, 1);
-        init_vlc(&dc_ccp_vlc, VLC_BITS, 8, 
+        init_vlc(&dc_ccp_vlc, VLC_BITS, 8,
                  &dc_ccp_tab[0][1], 2, 1,
                  &dc_ccp_tab[0][0], 2, 1, 1);
-        init_vlc(&ac_ccp_vlc, VLC_BITS, 16, 
+        init_vlc(&ac_ccp_vlc, VLC_BITS, 16,
                  &ac_ccp_tab[0][1], 2, 1,
                  &ac_ccp_tab[0][0], 2, 1, 1);
-        init_vlc(&level_vlc,  VLC_BITS, 7, 
+        init_vlc(&level_vlc,  VLC_BITS, 7,
                  &level_tab[0][1], 2, 1,
                  &level_tab[0][0], 2, 1, 1);
-        init_vlc(&asv2_level_vlc, ASV2_LEVEL_VLC_BITS, 63, 
+        init_vlc(&asv2_level_vlc, ASV2_LEVEL_VLC_BITS, 63,
                  &asv2_level_tab[0][1], 2, 1,
                  &asv2_level_tab[0][0], 2, 1, 1);
     }
@@ -181,7 +181,7 @@ static inline int asv1_decode_block(ASV1Context *a, DCTELEM block[64]){
     int i;
 
     block[0]= 8*get_bits(&a->gb, 8);
-    
+
     for(i=0; i<11; i++){
         const int ccp= get_vlc2(&a->gb, ccp_vlc.table, VLC_BITS, 1);
 
@@ -206,9 +206,9 @@ static inline int asv2_decode_block(ASV1Context *a, DCTELEM block[64]){
     int i, count, ccp;
 
     count= asv2_get_bits(&a->gb, 4);
-    
+
     block[0]= 8*asv2_get_bits(&a->gb, 8);
-    
+
     ccp= get_vlc2(&a->gb, dc_ccp_vlc.table, VLC_BITS, 1);
     if(ccp){
         if(ccp&4) block[a->scantable.permutated[1]]= (asv2_get_level(&a->gb) * a->intra_matrix[1])>>4;
@@ -226,17 +226,17 @@ static inline int asv2_decode_block(ASV1Context *a, DCTELEM block[64]){
             if(ccp&1) block[a->scantable.permutated[4*i+3]]= (asv2_get_level(&a->gb) * a->intra_matrix[4*i+3])>>4;
         }
     }
-    
+
     return 0;
 }
 
 static inline void asv1_encode_block(ASV1Context *a, DCTELEM block[64]){
     int i;
     int nc_count=0;
-    
+
     put_bits(&a->pb, 8, (block[0] + 32)>>6);
     block[0]= 0;
-    
+
     for(i=0; i<10; i++){
         const int index= scantab[4*i];
         int ccp=0;
@@ -247,11 +247,11 @@ static inline void asv1_encode_block(ASV1Context *a, DCTELEM block[64]){
         if( (block[index + 9] = (block[index + 9]*a->q_intra_matrix[index + 9] + (1<<15))>>16) ) ccp |= 1;
 
         if(ccp){
-            for(;nc_count; nc_count--) 
+            for(;nc_count; nc_count--)
                 put_bits(&a->pb, ccp_tab[0][1], ccp_tab[0][0]);
 
             put_bits(&a->pb, ccp_tab[ccp][1], ccp_tab[ccp][0]);
-            
+
             if(ccp&8) asv1_put_level(&a->pb, block[index + 0]);
             if(ccp&4) asv1_put_level(&a->pb, block[index + 8]);
             if(ccp&2) asv1_put_level(&a->pb, block[index + 1]);
@@ -266,20 +266,20 @@ static inline void asv1_encode_block(ASV1Context *a, DCTELEM block[64]){
 static inline void asv2_encode_block(ASV1Context *a, DCTELEM block[64]){
     int i;
     int count=0;
-    
+
     for(count=63; count>3; count--){
         const int index= scantab[count];
 
-        if( (block[index]*a->q_intra_matrix[index] + (1<<15))>>16 ) 
+        if( (block[index]*a->q_intra_matrix[index] + (1<<15))>>16 )
             break;
     }
-    
+
     count >>= 2;
 
     asv2_put_bits(&a->pb, 4, count);
     asv2_put_bits(&a->pb, 8, (block[0] + 32)>>6);
     block[0]= 0;
-    
+
     for(i=0; i<=count; i++){
         const int index= scantab[4*i];
         int ccp=0;
@@ -305,15 +305,15 @@ static inline int decode_mb(ASV1Context *a, DCTELEM block[6][64]){
     int i;
 
     a->dsp.clear_blocks(block[0]);
-    
+
     if(a->avctx->codec_id == CODEC_ID_ASV1){
         for(i=0; i<6; i++){
-            if( asv1_decode_block(a, block[i]) < 0) 
+            if( asv1_decode_block(a, block[i]) < 0)
                 return -1;
         }
     }else{
         for(i=0; i<6; i++){
-            if( asv2_decode_block(a, block[i]) < 0) 
+            if( asv2_decode_block(a, block[i]) < 0)
                 return -1;
         }
     }
@@ -322,7 +322,7 @@ static inline int decode_mb(ASV1Context *a, DCTELEM block[6][64]){
 
 static inline int encode_mb(ASV1Context *a, DCTELEM block[6][64]){
     int i;
-    
+
     if(a->pb.buf_end - a->pb.buf - (put_bits_count(&a->pb)>>3) < 30*16*16*3/2/8){
         av_log(a->avctx, AV_LOG_ERROR, "encoded frame too large\n");
         return -1;
@@ -341,7 +341,7 @@ static inline int encode_mb(ASV1Context *a, DCTELEM block[6][64]){
 static inline void idct_put(ASV1Context *a, int mb_x, int mb_y){
     DCTELEM (*block)[64]= a->block;
     int linesize= a->picture.linesize[0];
-    
+
     uint8_t *dest_y  = a->picture.data[0] + (mb_y * 16* linesize              ) + mb_x * 16;
     uint8_t *dest_cb = a->picture.data[1] + (mb_y * 8 * a->picture.linesize[1]) + mb_x * 8;
     uint8_t *dest_cr = a->picture.data[2] + (mb_y * 8 * a->picture.linesize[2]) + mb_x * 8;
@@ -361,7 +361,7 @@ static inline void dct_get(ASV1Context *a, int mb_x, int mb_y){
     DCTELEM (*block)[64]= a->block;
     int linesize= a->picture.linesize[0];
     int i;
-    
+
     uint8_t *ptr_y  = a->picture.data[0] + (mb_y * 16* linesize              ) + mb_x * 16;
     uint8_t *ptr_cb = a->picture.data[1] + (mb_y * 8 * a->picture.linesize[1]) + mb_x * 8;
     uint8_t *ptr_cr = a->picture.data[2] + (mb_y * 8 * a->picture.linesize[2]) + mb_x * 8;
@@ -372,7 +372,7 @@ static inline void dct_get(ASV1Context *a, int mb_x, int mb_y){
     a->dsp.get_pixels(block[3], ptr_y + 8*linesize + 8, linesize);
     for(i=0; i<4; i++)
         a->dsp.fdct(block[i]);
-    
+
     if(!(a->avctx->flags&CODEC_FLAG_GRAY)){
         a->dsp.get_pixels(block[4], ptr_cb, a->picture.linesize[1]);
         a->dsp.get_pixels(block[5], ptr_cr, a->picture.linesize[2]);
@@ -381,7 +381,7 @@ static inline void dct_get(ASV1Context *a, int mb_x, int mb_y){
     }
 }
 
-static int decode_frame(AVCodecContext *avctx, 
+static int decode_frame(AVCodecContext *avctx,
                         void *data, int *data_size,
                         uint8_t *buf, int buf_size)
 {
@@ -402,7 +402,7 @@ static int decode_frame(AVCodecContext *avctx,
     p->key_frame= 1;
 
     a->bitstream_buffer= av_fast_realloc(a->bitstream_buffer, &a->bitstream_buffer_size, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
-    
+
     if(avctx->codec_id == CODEC_ID_ASV1)
         a->dsp.bswap_buf((uint32_t*)a->bitstream_buffer, (uint32_t*)buf, buf_size/4);
     else{
@@ -417,7 +417,7 @@ static int decode_frame(AVCodecContext *avctx,
         for(mb_x=0; mb_x<a->mb_width2; mb_x++){
             if( decode_mb(a, a->block) <0)
                 return -1;
-             
+
             idct_put(a, mb_x, mb_y);
         }
     }
@@ -427,7 +427,7 @@ static int decode_frame(AVCodecContext *avctx,
         for(mb_y=0; mb_y<a->mb_height2; mb_y++){
             if( decode_mb(a, a->block) <0)
                 return -1;
-             
+
             idct_put(a, mb_x, mb_y);
         }
     }
@@ -437,11 +437,11 @@ static int decode_frame(AVCodecContext *avctx,
         for(mb_x=0; mb_x<a->mb_width; mb_x++){
             if( decode_mb(a, a->block) <0)
                 return -1;
-             
+
             idct_put(a, mb_x, mb_y);
         }
     }
-#if 0    
+#if 0
 int i;
 printf("%d %d\n", 8*buf_size, get_bits_count(&a->gb));
 for(i=get_bits_count(&a->gb); i<8*buf_size; i++){
@@ -457,7 +457,7 @@ for(i=0; i<s->avctx->extradata_size; i++){
     *data_size = sizeof(AVPicture);
 
     emms_c();
-    
+
     return (get_bits_count(&a->gb)+31)/32*4;
 }
 
@@ -469,7 +469,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
     int mb_x, mb_y;
 
     init_put_bits(&a->pb, buf, buf_size);
-    
+
     *p = *pict;
     p->pict_type= I_TYPE;
     p->key_frame= 1;
@@ -497,13 +497,13 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
         }
     }
     emms_c();
-    
+
     align_put_bits(&a->pb);
     while(put_bits_count(&a->pb)&31)
         put_bits(&a->pb, 8, 0);
-    
+
     size= put_bits_count(&a->pb)/32;
-    
+
     if(avctx->codec_id == CODEC_ID_ASV1)
         a->dsp.bswap_buf((uint32_t*)buf, (uint32_t*)buf, size);
     else{
@@ -511,7 +511,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
         for(i=0; i<4*size; i++)
             buf[i]= ff_reverse[ buf[i] ];
     }
-    
+
     return size*4;
 }
 
@@ -534,7 +534,7 @@ static int decode_init(AVCodecContext *avctx){
     AVFrame *p= (AVFrame*)&a->picture;
     int i;
     const int scale= avctx->codec_id == CODEC_ID_ASV1 ? 1 : 2;
- 
+
     common_init(avctx);
     init_vlcs(a);
     ff_init_scantable(a->dsp.idct_permutation, &a->scantable, scantab);
@@ -569,16 +569,16 @@ static int encode_init(AVCodecContext *avctx){
     const int scale= avctx->codec_id == CODEC_ID_ASV1 ? 1 : 2;
 
     common_init(avctx);
-    
+
     if(avctx->global_quality == 0) avctx->global_quality= 4*FF_QUALITY_SCALE;
 
     a->inv_qscale= (32*scale*FF_QUALITY_SCALE +  avctx->global_quality/2) / avctx->global_quality;
-    
+
     avctx->extradata= av_mallocz(8);
     avctx->extradata_size=8;
     ((uint32_t*)avctx->extradata)[0]= le2me_32(a->inv_qscale);
     ((uint32_t*)avctx->extradata)[1]= le2me_32(ff_get_fourcc("ASUS"));
-    
+
     for(i=0; i<64; i++){
         int q= 32*scale*ff_mpeg1_default_intra_matrix[i];
         a->q_intra_matrix[i]= ((a->inv_qscale<<16) + q/2) / q;
@@ -593,7 +593,7 @@ static int decode_end(AVCodecContext *avctx){
     av_freep(&a->bitstream_buffer);
     av_freep(&a->picture.qscale_table);
     a->bitstream_buffer_size=0;
-    
+
     return 0;
 }
 
diff --git a/src/libffmpeg/libavcodec/avcodec.h b/src/libffmpeg/libavcodec/avcodec.h
index 6864bfba3..430504dc3 100644
--- a/src/libffmpeg/libavcodec/avcodec.h
+++ b/src/libffmpeg/libavcodec/avcodec.h
@@ -15,21 +15,21 @@ extern "C" {
 #include <sys/types.h> /* size_t */
 
 //FIXME the following 2 really dont belong in here
-#define FFMPEG_VERSION_INT     0x000409
-#define FFMPEG_VERSION         "CVS"
+#define FFMPEG_VERSION_INT      0x000409
+#define FFMPEG_VERSION          "CVS"
 
-#define AV_STRINGIFY(s)	AV_TOSTRING(s)
+#define AV_STRINGIFY(s)         AV_TOSTRING(s)
 #define AV_TOSTRING(s) #s
 
-#define LIBAVCODEC_VERSION_INT ((50<<16)+(1<<8)+0)
-#define LIBAVCODEC_VERSION     50.1.0
-#define LIBAVCODEC_BUILD       LIBAVCODEC_VERSION_INT
+#define LIBAVCODEC_VERSION_INT  ((51<<16)+(1<<8)+0)
+#define LIBAVCODEC_VERSION      51.1.0
+#define LIBAVCODEC_BUILD        LIBAVCODEC_VERSION_INT
 
-#define LIBAVCODEC_IDENT       "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION)
+#define LIBAVCODEC_IDENT        "Lavc" AV_STRINGIFY(LIBAVCODEC_VERSION)
 
-#define AV_NOPTS_VALUE int64_t_C(0x8000000000000000)
-#define AV_TIME_BASE 1000000
-#define AV_TIME_BASE_Q (AVRational){1, AV_TIME_BASE}
+#define AV_NOPTS_VALUE          int64_t_C(0x8000000000000000)
+#define AV_TIME_BASE            1000000
+#define AV_TIME_BASE_Q          (AVRational){1, AV_TIME_BASE}
 
 /* FIXME: We cannot use ffmpeg's XvMC capabilities, since that would require
  * linking the ffmpeg plugin against XvMC libraries, which is a bad thing,
@@ -39,7 +39,7 @@ extern "C" {
 #undef HAVE_XVMC
 
 enum CodecID {
-    CODEC_ID_NONE, 
+    CODEC_ID_NONE,
     CODEC_ID_MPEG1VIDEO,
     CODEC_ID_MPEG2VIDEO, /* prefered ID for MPEG Video 1 or 2 decoding */
     CODEC_ID_MPEG2VIDEO_XVMC,
@@ -51,6 +51,7 @@ enum CodecID {
     CODEC_ID_MJPEGB,
     CODEC_ID_LJPEG,
     CODEC_ID_SP5X,
+    CODEC_ID_JPEGLS,
     CODEC_ID_MPEG4,
     CODEC_ID_RAWVIDEO,
     CODEC_ID_MSMPEG4V1,
@@ -119,6 +120,8 @@ enum CodecID {
     CODEC_ID_INDEO2,
     CODEC_ID_FRAPS,
     CODEC_ID_TRUEMOTION2,
+    CODEC_ID_BMP,
+    CODEC_ID_CSCD,
 
     /* various pcm "codecs" */
     CODEC_ID_PCM_S16LE= 0x10000,
@@ -169,7 +172,7 @@ enum CodecID {
     CODEC_ID_INTERPLAY_DPCM,
     CODEC_ID_XAN_DPCM,
     CODEC_ID_SOL_DPCM,
-    
+
     CODEC_ID_MP2= 0x15000,
     CODEC_ID_MP3, /* prefered ID for MPEG Audio layer 1, 2 or3 decoding */
     CODEC_ID_AAC,
@@ -191,15 +194,17 @@ enum CodecID {
     CODEC_ID_SHORTEN,
     CODEC_ID_ALAC,
     CODEC_ID_WESTWOOD_SND1,
-    CODEC_ID_GSM,    
+    CODEC_ID_GSM,
     CODEC_ID_QDM2,
-    
-    CODEC_ID_OGGTHEORA= 0x16000, 
+    CODEC_ID_COOK,
+    CODEC_ID_TRUESPEECH,
+
+    CODEC_ID_OGGTHEORA= 0x16000,
 
     /* subtitle codecs */
-    CODEC_ID_DVD_SUBTITLE= 0x17000, 
-    CODEC_ID_DVB_SUBTITLE, 
-    
+    CODEC_ID_DVD_SUBTITLE= 0x17000,
+    CODEC_ID_DVB_SUBTITLE,
+
     CODEC_ID_MPEG2TS= 0x20000, /* _FAKE_ codec to indicate a raw MPEG2 transport
                          stream (only used by libavformat) */
 };
@@ -216,7 +221,7 @@ enum CodecType {
 };
 
 /**
- * Pixel format. Notes: 
+ * Pixel format. Notes:
  *
  * PIX_FMT_RGBA32 is handled in an endian-specific manner. A RGBA
  * color is put together as:
@@ -236,7 +241,7 @@ enum CodecType {
 enum PixelFormat {
     PIX_FMT_NONE= -1,
     PIX_FMT_YUV420P,   ///< Planar YUV 4:2:0 (1 Cr & Cb sample per 2x2 Y samples)
-    PIX_FMT_YUV422,    ///< Packed pixel, Y0 Cb Y1 Cr 
+    PIX_FMT_YUV422,    ///< Packed pixel, Y0 Cb Y1 Cr
     PIX_FMT_RGB24,     ///< Packed pixel, 3 bytes per pixel, RGBRGB...
     PIX_FMT_BGR24,     ///< Packed pixel, 3 bytes per pixel, BGRBGR...
     PIX_FMT_YUV422P,   ///< Planar YUV 4:2:2 (1 Cr & Cb sample per 2x1 Y samples)
@@ -244,26 +249,26 @@ enum PixelFormat {
     PIX_FMT_RGBA32,    ///< Packed pixel, 4 bytes per pixel, BGRABGRA..., stored in cpu endianness
     PIX_FMT_YUV410P,   ///< Planar YUV 4:1:0 (1 Cr & Cb sample per 4x4 Y samples)
     PIX_FMT_YUV411P,   ///< Planar YUV 4:1:1 (1 Cr & Cb sample per 4x1 Y samples)
-    PIX_FMT_RGB565,    ///< always stored in cpu endianness 
-    PIX_FMT_RGB555,    ///< always stored in cpu endianness, most significant bit to 1 
+    PIX_FMT_RGB565,    ///< always stored in cpu endianness
+    PIX_FMT_RGB555,    ///< always stored in cpu endianness, most significant bit to 1
     PIX_FMT_GRAY8,
-    PIX_FMT_MONOWHITE, ///< 0 is white 
-    PIX_FMT_MONOBLACK, ///< 0 is black 
-    PIX_FMT_PAL8,      ///< 8 bit with RGBA palette 
+    PIX_FMT_MONOWHITE, ///< 0 is white
+    PIX_FMT_MONOBLACK, ///< 0 is black
+    PIX_FMT_PAL8,      ///< 8 bit with RGBA palette
     PIX_FMT_YUVJ420P,  ///< Planar YUV 4:2:0 full scale (jpeg)
     PIX_FMT_YUVJ422P,  ///< Planar YUV 4:2:2 full scale (jpeg)
     PIX_FMT_YUVJ444P,  ///< Planar YUV 4:4:4 full scale (jpeg)
     PIX_FMT_XVMC_MPEG2_MC,///< XVideo Motion Acceleration via common packet passing(xvmc_render.h)
     PIX_FMT_XVMC_MPEG2_IDCT,
-    PIX_FMT_UYVY422,   ///< Packed pixel, Cb Y0 Cr Y1 
+    PIX_FMT_UYVY422,   ///< Packed pixel, Cb Y0 Cr Y1
     PIX_FMT_UYVY411,   ///< Packed pixel, Cb Y0 Y1 Cr Y2 Y3
     PIX_FMT_NB,
 };
 
 /* currently unused, may be used if 24/32 bits samples ever supported */
 enum SampleFormat {
-    SAMPLE_FMT_S16 = 0,         ///< signed 16 bits 
-    SAMPLE_FMT_S32,             ///< signed 32 bits 
+    SAMPLE_FMT_S16 = 0,         ///< signed 16 bits
+    SAMPLE_FMT_S32,             ///< signed 32 bits
     SAMPLE_FMT_FLT,             ///< float
     SAMPLE_FMT_DBL,             ///< double
 };
@@ -273,7 +278,7 @@ enum SampleFormat {
 
 /**
  * Required number of additionally allocated bytes at the end of the input bitstream for decoding.
- * this is mainly needed because some optimized bitstream readers read 
+ * this is mainly needed because some optimized bitstream readers read
  * 32 or 64 bit at once and could read over the end<br>
  * Note, if the first 23 bits of the additional bytes are not 0 then damaged
  * MPEG bitstreams could cause overread and segfault
@@ -293,7 +298,10 @@ enum Motion_Est_ID {
     ME_LOG,
     ME_PHODS,
     ME_EPZS,
-    ME_X1
+    ME_X1,
+    ME_HEX,
+    ME_UMH,
+    ME_ITER,
 };
 
 enum AVDiscard{
@@ -319,38 +327,38 @@ extern int motion_estimation_method;
 #define FF_MAX_B_FRAMES 8
 
 /* encoding support
-   these flags can be passed in AVCodecContext.flags before initing 
+   these flags can be passed in AVCodecContext.flags before initing
    Note: not everything is supported yet.
 */
 
-#define CODEC_FLAG_QSCALE 0x0002  ///< use fixed qscale 
+#define CODEC_FLAG_QSCALE 0x0002  ///< use fixed qscale
 #define CODEC_FLAG_4MV    0x0004  ///< 4 MV per MB allowed / Advanced prediction for H263
-#define CODEC_FLAG_QPEL   0x0010  ///< use qpel MC 
-#define CODEC_FLAG_GMC    0x0020  ///< use GMC 
-#define CODEC_FLAG_MV0    0x0040  ///< always try a MB with MV=<0,0> 
-#define CODEC_FLAG_PART   0x0080  ///< use data partitioning 
-/* parent program gurantees that the input for b-frame containing streams is not written to 
+#define CODEC_FLAG_QPEL   0x0010  ///< use qpel MC
+#define CODEC_FLAG_GMC    0x0020  ///< use GMC
+#define CODEC_FLAG_MV0    0x0040  ///< always try a MB with MV=<0,0>
+#define CODEC_FLAG_PART   0x0080  ///< use data partitioning
+/* parent program gurantees that the input for b-frame containing streams is not written to
    for at least s->max_b_frames+1 frames, if this is not set than the input will be copied */
 #define CODEC_FLAG_INPUT_PRESERVED 0x0100
-#define CODEC_FLAG_PASS1 0x0200   ///< use internal 2pass ratecontrol in first  pass mode 
-#define CODEC_FLAG_PASS2 0x0400   ///< use internal 2pass ratecontrol in second pass mode 
-#define CODEC_FLAG_EXTERN_HUFF 0x1000 ///< use external huffman table (for mjpeg) 
-#define CODEC_FLAG_GRAY  0x2000   ///< only decode/encode grayscale 
+#define CODEC_FLAG_PASS1 0x0200   ///< use internal 2pass ratecontrol in first  pass mode
+#define CODEC_FLAG_PASS2 0x0400   ///< use internal 2pass ratecontrol in second pass mode
+#define CODEC_FLAG_EXTERN_HUFF 0x1000 ///< use external huffman table (for mjpeg)
+#define CODEC_FLAG_GRAY  0x2000   ///< only decode/encode grayscale
 #define CODEC_FLAG_EMU_EDGE 0x4000///< don't draw edges
-#define CODEC_FLAG_PSNR           0x8000 ///< error[?] variables will be set during encoding 
-#define CODEC_FLAG_TRUNCATED  0x00010000 /** input bitstream might be truncated at a random location instead 
+#define CODEC_FLAG_PSNR           0x8000 ///< error[?] variables will be set during encoding
+#define CODEC_FLAG_TRUNCATED  0x00010000 /** input bitstream might be truncated at a random location instead
                                             of only at frame boundaries */
-#define CODEC_FLAG_NORMALIZE_AQP  0x00020000 ///< normalize adaptive quantization 
-#define CODEC_FLAG_INTERLACED_DCT 0x00040000 ///< use interlaced dct 
+#define CODEC_FLAG_NORMALIZE_AQP  0x00020000 ///< normalize adaptive quantization
+#define CODEC_FLAG_INTERLACED_DCT 0x00040000 ///< use interlaced dct
 #define CODEC_FLAG_LOW_DELAY      0x00080000 ///< force low delay
-#define CODEC_FLAG_ALT_SCAN       0x00100000 ///< use alternate scan 
-#define CODEC_FLAG_TRELLIS_QUANT  0x00200000 ///< use trellis quantization 
-#define CODEC_FLAG_GLOBAL_HEADER  0x00400000 ///< place global headers in extradata instead of every keyframe 
-#define CODEC_FLAG_BITEXACT       0x00800000 ///< use only bitexact stuff (except (i)dct) 
+#define CODEC_FLAG_ALT_SCAN       0x00100000 ///< use alternate scan
+#define CODEC_FLAG_TRELLIS_QUANT  0x00200000 ///< use trellis quantization
+#define CODEC_FLAG_GLOBAL_HEADER  0x00400000 ///< place global headers in extradata instead of every keyframe
+#define CODEC_FLAG_BITEXACT       0x00800000 ///< use only bitexact stuff (except (i)dct)
 /* Fx : Flag for h263+ extra options */
 #define CODEC_FLAG_H263P_AIC      0x01000000 ///< H263 Advanced intra coding / MPEG4 AC prediction (remove this)
 #define CODEC_FLAG_AC_PRED        0x01000000 ///< H263 Advanced intra coding / MPEG4 AC prediction
-#define CODEC_FLAG_H263P_UMV      0x02000000 ///< Unlimited motion vector  
+#define CODEC_FLAG_H263P_UMV      0x02000000 ///< Unlimited motion vector
 #define CODEC_FLAG_CBP_RD         0x04000000 ///< use rate distortion optimization for cbp
 #define CODEC_FLAG_QP_RD          0x08000000 ///< use rate distortion optimization for qp selectioon
 #define CODEC_FLAG_H263P_AIV      0x00000008 ///< H263 Alternative inter vlc
@@ -364,15 +372,22 @@ extern int motion_estimation_method;
 #define CODEC_FLAG2_STRICT_GOP    0x00000002 ///< strictly enforce GOP size
 #define CODEC_FLAG2_NO_OUTPUT     0x00000004 ///< skip bitstream encoding
 #define CODEC_FLAG2_LOCAL_HEADER  0x00000008 ///< place global headers at every keyframe instead of in extradata
+#define CODEC_FLAG2_BPYRAMID      0x00000010 ///< H.264 allow b-frames to be used as references
+#define CODEC_FLAG2_WPRED         0x00000020 ///< H.264 weighted biprediction for b-frames
+#define CODEC_FLAG2_MIXED_REFS    0x00000040 ///< H.264 multiple references per partition
+#define CODEC_FLAG2_8X8DCT        0x00000080 ///< H.264 high profile 8x8 transform
+#define CODEC_FLAG2_FASTPSKIP     0x00000100 ///< H.264 fast pskip
+#define CODEC_FLAG2_AUD           0x00000200 ///< H.264 access unit delimiters
+#define CODEC_FLAG2_BRDO          0x00000400 ///< b-frame rate-distortion optimization
 
 /* Unsupported options :
- * 		Syntax Arithmetic coding (SAC)
- * 		Reference Picture Selection
- * 		Independant Segment Decoding */
+ *              Syntax Arithmetic coding (SAC)
+ *              Reference Picture Selection
+ *              Independant Segment Decoding */
 /* /Fx */
 /* codec capabilities */
 
-#define CODEC_CAP_DRAW_HORIZ_BAND 0x0001 ///< decoder can use draw_horiz_band callback 
+#define CODEC_CAP_DRAW_HORIZ_BAND 0x0001 ///< decoder can use draw_horiz_band callback
 /**
  * Codec uses get_buffer() for allocating buffers.
  * direct rendering method 1
@@ -384,7 +399,7 @@ extern int motion_estimation_method;
 #define CODEC_CAP_TRUNCATED       0x0008
 /* codec can export data for HW decoding (XvMC) */
 #define CODEC_CAP_HWACCEL         0x0010
-/** 
+/**
  * codec has a non zero delay and needs to be feeded with NULL at the end to get the delayed data.
  * if this is not set, the codec is guranteed to never be feeded with NULL data
  */
@@ -650,8 +665,9 @@ typedef struct AVPanScan{
      */\
     int8_t *ref_index[2];
 
-#define FF_QSCALE_TYPE_MPEG1	0
-#define FF_QSCALE_TYPE_MPEG2	1
+#define FF_QSCALE_TYPE_MPEG1 0
+#define FF_QSCALE_TYPE_MPEG2 1
+#define FF_QSCALE_TYPE_H264  2
 
 #define FF_BUFFER_TYPE_INTERNAL 1
 #define FF_BUFFER_TYPE_USER     2 ///< Direct rendering buffers (image is (de)allocated by user)
@@ -687,9 +703,9 @@ typedef struct AVCLASS AVClass;
 struct AVCLASS {
     const char* class_name;
     const char* (*item_name)(void*); /* actually passing a pointer to an AVCodecContext
-					or AVFormatContext, which begin with an AVClass.
-					Needed because av_log is in libavcodec and has no visibility
-					of AVIn/OutputFormat */
+                                        or AVFormatContext, which begin with an AVClass.
+                                        Needed because av_log is in libavcodec and has no visibility
+                                        of AVIn/OutputFormat */
     struct AVOption *option;
 };
 
@@ -705,7 +721,7 @@ typedef struct AVCodecContext {
     /**
      * the average bitrate.
      * - encoding: set by user. unused for constant quantizer encoding
-     * - decoding: set by lavc. 0 or some bitrate if this info is available in the stream 
+     * - decoding: set by lavc. 0 or some bitrate if this info is available in the stream
      */
     int bit_rate;
 
@@ -715,7 +731,7 @@ typedef struct AVCodecContext {
      * - encoding: set by user. unused for constant quantizer encoding
      * - decoding: unused
      */
-    int bit_rate_tolerance; 
+    int bit_rate_tolerance;
 
     /**
      * CODEC_FLAG_*.
@@ -726,13 +742,15 @@ typedef struct AVCodecContext {
 
     /**
      * some codecs needs additionnal format info. It is stored here
-     * - encoding: set by user. 
+     * - encoding: set by user.
      * - decoding: set by lavc. (FIXME is this ok?)
      */
     int sub_id;
 
     /**
      * motion estimation algorithm used for video coding.
+     * 1 (zero), 2 (full), 3 (log), 4 (phods), 5 (epzs), 6 (x1), 7 (hex),
+     * 8 (umh), 9 (iter) [7, 8 are x264 specific, 9 is snow specific]
      * - encoding: MUST be set by user.
      * - decoding: unused
      */
@@ -745,12 +763,13 @@ typedef struct AVCodecContext {
      * mpeg4: global headers (they can be in the bitstream or here)
      * the allocated memory should be FF_INPUT_BUFFER_PADDING_SIZE bytes larger
      * then extradata_size to avoid prolems if its read with the bitstream reader
+     * the bytewise contents of extradata must not depend on the architecture or cpu endianness
      * - encoding: set/allocated/freed by lavc.
      * - decoding: set/allocated/freed by user.
      */
     void *extradata;
     int extradata_size;
-    
+
     /**
      * this is the fundamental unit of time (in seconds) in terms
      * of which frame timestamps are represented. for fixed-fps content,
@@ -760,17 +779,17 @@ typedef struct AVCodecContext {
      * - decoding: set by lavc.
      */
     AVRational time_base;
-    
+
     /* video only */
     /**
      * picture width / height.
-     * - encoding: MUST be set by user. 
+     * - encoding: MUST be set by user.
      * - decoding: set by lavc.
-     * Note, for compatibility its possible to set this instead of 
+     * Note, for compatibility its possible to set this instead of
      * coded_width/height before decoding
      */
     int width, height;
-    
+
 #define FF_ASPECT_EXTENDED 15
 
     /**
@@ -786,15 +805,15 @@ typedef struct AVCodecContext {
      * - decoding: set by lavc.
      */
     enum PixelFormat pix_fmt;
- 
+
     /**
-     * Frame rate emulation. If not zero lower layer (i.e. format handler) 
+     * Frame rate emulation. If not zero lower layer (i.e. format handler)
      * has to read frames at native frame rate.
      * - encoding: set by user.
      * - decoding: unused.
      */
     int rate_emu;
-       
+
     /**
      * if non NULL, 'draw_horiz_band' is called by the libavcodec
      * decoder to draw an horizontal band. It improve cache usage. Not
@@ -812,7 +831,7 @@ typedef struct AVCodecContext {
                             int y, int type, int height);
 
     /* audio only */
-    int sample_rate; ///< samples per sec 
+    int sample_rate; ///< samples per sec
     int channels;
 
     /**
@@ -820,28 +839,28 @@ typedef struct AVCodecContext {
      * - encoding: set by user.
      * - decoding: set by lavc.
      */
-    enum SampleFormat sample_fmt;  ///< sample format, currenly unused 
+    enum SampleFormat sample_fmt;  ///< sample format, currenly unused
 
     /* the following data should not be initialized */
     /**
-     * samples per packet. initialized when calling 'init' 
+     * samples per packet. initialized when calling 'init'
      */
     int frame_size;
-    int frame_number;   ///< audio or video frame number 
-    int real_pict_num;  ///< returns the real picture number of previous encoded frame 
-    
+    int frame_number;   ///< audio or video frame number
+    int real_pict_num;  ///< returns the real picture number of previous encoded frame
+
     /**
-     * number of frames the decoded output will be delayed relative to 
+     * number of frames the decoded output will be delayed relative to
      * the encoded input.
      * - encoding: set by lavc.
      * - decoding: unused
      */
     int delay;
-    
+
     /* - encoding parameters */
     float qcompress;  ///< amount of qscale change between easy & hard scenes (0.0-1.0)
-    float qblur;      ///< amount of qscale smoothing over time (0.0-1.0) 
-    
+    float qblur;      ///< amount of qscale smoothing over time (0.0-1.0)
+
     /**
      * minimum quantizer.
      * - encoding: set by user.
@@ -877,9 +896,11 @@ typedef struct AVCodecContext {
      * - decoding: unused
      */
     float b_quant_factor;
-    
+
     /** obsolete FIXME remove */
     int rc_strategy;
+#define FF_RC_STRATEGY_XVID 1
+
     int b_frame_strategy;
 
     /**
@@ -889,14 +910,14 @@ typedef struct AVCodecContext {
      * - decoding: set by user. 1-> skip b frames, 2-> skip idct/dequant too, 5-> skip everything except header
      */
     int hurry_up;
-    
+
     struct AVCodec *codec;
-    
+
     void *priv_data;
 
     /* unused, FIXME remove*/
     int rtp_mode;
-    
+
     int rtp_payload_size;   /* The size of the RTP payload: the coder will  */
                             /* do it's best to deliver a chunk with size    */
                             /* below rtp_payload_size, the chunk will start */
@@ -904,14 +925,14 @@ typedef struct AVCodecContext {
                             /* This doesn't take account of any particular  */
                             /* headers inside the transmited RTP payload    */
 
-    
+
     /* The RTP callback: This function is called   */
     /* every time the encoder has a packet to send */
     /* Depends on the encoder if the data starts   */
     /* with a Start Code (it should) H.263 does.   */
     /* mb_nb contains the number of macroblocks    */
     /* encoded in the RTP payload                  */
-    void (*rtp_callback)(struct AVCodecContext *avctx, void *data, int size, int mb_nb); 
+    void (*rtp_callback)(struct AVCodecContext *avctx, void *data, int size, int mb_nb);
 
     /* statistics, used for 2-pass encoding */
     int mv_bits;
@@ -922,7 +943,7 @@ typedef struct AVCodecContext {
     int p_count;
     int skip_count;
     int misc_bits;
-    
+
     /**
      * number of bits used for the previously encoded frame.
      * - encoding: set by lavc
@@ -940,7 +961,7 @@ typedef struct AVCodecContext {
     char codec_name[32];
     enum CodecType codec_type; /* see CODEC_TYPE_xxx */
     enum CodecID codec_id; /* see CODEC_ID_xxx */
-    
+
     /**
      * fourcc (LSB first, so "ABCD" -> ('D'<<24) + ('C'<<16) + ('B'<<8) + 'A').
      * this is used to workaround some encoder bugs
@@ -948,7 +969,7 @@ typedef struct AVCodecContext {
      * - decoding: set by user, will be converted to upper case by lavc during init
      */
     unsigned int codec_tag;
-    
+
     /**
      * workaround bugs in encoders which sometimes cannot be detected automatically.
      * - encoding: set by user
@@ -971,21 +992,21 @@ typedef struct AVCodecContext {
 #define FF_BUG_DC_CLIP          4096
 #define FF_BUG_MS               8192 ///< workaround various bugs in microsofts broken decoders
 //#define FF_BUG_FAKE_SCALABILITY 16 //autodetection should work 100%
-        
+
     /**
      * luma single coeff elimination threshold.
      * - encoding: set by user
      * - decoding: unused
      */
     int luma_elim_threshold;
-    
+
     /**
      * chroma single coeff elimination threshold.
      * - encoding: set by user
      * - decoding: unused
      */
     int chroma_elim_threshold;
-    
+
     /**
      * strictly follow the std (MPEG4, ...).
      * - encoding: set by user
@@ -1006,7 +1027,7 @@ typedef struct AVCodecContext {
      * - decoding: unused
      */
     float b_quant_offset;
-    
+
     /**
      * error resilience higher values will detect more errors but may missdetect
      * some more or less valid parts as errors.
@@ -1018,7 +1039,7 @@ typedef struct AVCodecContext {
 #define FF_ER_COMPLIANT       2
 #define FF_ER_AGGRESSIVE      3
 #define FF_ER_VERY_AGGRESSIVE 4
-    
+
     /**
      * called at the beginning of each frame to get a buffer for it.
      * if pic.reference is set then the frame will be read later by lavc
@@ -1028,7 +1049,7 @@ typedef struct AVCodecContext {
      * - decoding: set by lavc, user can override
      */
     int (*get_buffer)(struct AVCodecContext *c, AVFrame *pic);
-    
+
     /**
      * called to release buffers which where allocated with get_buffer.
      * a released buffer can be reused in get_buffer()
@@ -1050,25 +1071,25 @@ typedef struct AVCodecContext {
      * used by some WAV based audio codecs
      */
     int block_align;
-    
+
     int parse_only; /* - decoding only: if true, only parsing is done
                        (function avcodec_parse_frame()). The frame
                        data is returned. Only MPEG codecs support this now. */
-    
+
     /**
      * 0-> h263 quant 1-> mpeg quant.
      * - encoding: set by user.
      * - decoding: unused
      */
     int mpeg_quant;
-    
+
     /**
      * pass1 encoding statistics output buffer.
      * - encoding: set by lavc
      * - decoding: unused
      */
     char *stats_out;
-    
+
     /**
      * pass2 encoding statistics input buffer.
      * concatenated stuff from stats_out of pass1 should be placed here
@@ -1076,7 +1097,7 @@ typedef struct AVCodecContext {
      * - decoding: unused
      */
     char *stats_in;
-    
+
     /**
      * ratecontrol qmin qmax limiting method.
      * 0-> clipping, 1-> use a nice continous function to limit qscale wthin qmin/qmax
@@ -1087,7 +1108,7 @@ typedef struct AVCodecContext {
 
     float rc_qmod_amp;
     int rc_qmod_freq;
-    
+
     /**
      * ratecontrol override, see RcOverride.
      * - encoding: allocated/set/freed by user.
@@ -1095,28 +1116,28 @@ typedef struct AVCodecContext {
      */
     RcOverride *rc_override;
     int rc_override_count;
-    
+
     /**
      * rate control equation.
      * - encoding: set by user
      * - decoding: unused
      */
     char *rc_eq;
-    
+
     /**
      * maximum bitrate.
      * - encoding: set by user.
      * - decoding: unused
      */
     int rc_max_rate;
-    
+
     /**
      * minimum bitrate.
      * - encoding: set by user.
      * - decoding: unused
      */
     int rc_min_rate;
-    
+
     /**
      * decoder bitstream buffer size.
      * - encoding: set by user.
@@ -1133,14 +1154,14 @@ typedef struct AVCodecContext {
      * - decoding: unused
      */
     float i_quant_factor;
-    
+
     /**
      * qscale offset between p and i frames.
      * - encoding: set by user.
      * - decoding: unused
      */
     float i_quant_offset;
-    
+
     /**
      * initial complexity for pass1 ratecontrol.
      * - encoding: set by user.
@@ -1161,28 +1182,28 @@ typedef struct AVCodecContext {
 #define FF_DCT_MLIB    4
 #define FF_DCT_ALTIVEC 5
 #define FF_DCT_FAAN    6
-    
+
     /**
      * luminance masking (0-> disabled).
      * - encoding: set by user
      * - decoding: unused
      */
     float lumi_masking;
-    
+
     /**
      * temporary complexity masking (0-> disabled).
      * - encoding: set by user
      * - decoding: unused
      */
     float temporal_cplx_masking;
-    
+
     /**
      * spatial complexity masking (0-> disabled).
      * - encoding: set by user
      * - decoding: unused
      */
     float spatial_cplx_masking;
-    
+
     /**
      * p block masking (0-> disabled).
      * - encoding: set by user
@@ -1196,11 +1217,11 @@ typedef struct AVCodecContext {
      * - decoding: unused
      */
     float dark_masking;
-    
-    
+
+
     /* for binary compatibility */
     int unused;
-    
+
     /**
      * idct algorithm, see FF_IDCT_* below.
      * - encoding: set by user
@@ -1254,18 +1275,18 @@ typedef struct AVCodecContext {
      * result into program crash)
      */
     unsigned dsp_mask;
-#define FF_MM_FORCE	0x80000000 /* force usage of selected flags (OR) */
+#define FF_MM_FORCE    0x80000000 /* force usage of selected flags (OR) */
     /* lower 16 bits - CPU features */
 #ifdef HAVE_MMX
-#define FF_MM_MMX	0x0001 /* standard MMX */
-#define FF_MM_3DNOW	0x0004 /* AMD 3DNOW */
-#define FF_MM_MMXEXT	0x0002 /* SSE integer functions or AMD MMX ext */
-#define FF_MM_SSE	0x0008 /* SSE functions */
-#define FF_MM_SSE2	0x0010 /* PIV SSE2 functions */
-#define FF_MM_3DNOWEXT	0x0020 /* AMD 3DNowExt */
+#define FF_MM_MMX      0x0001 /* standard MMX */
+#define FF_MM_3DNOW    0x0004 /* AMD 3DNOW */
+#define FF_MM_MMXEXT   0x0002 /* SSE integer functions or AMD MMX ext */
+#define FF_MM_SSE      0x0008 /* SSE functions */
+#define FF_MM_SSE2     0x0010 /* PIV SSE2 functions */
+#define FF_MM_3DNOWEXT 0x0020 /* AMD 3DNowExt */
 #endif /* HAVE_MMX */
 #ifdef HAVE_IWMMXT
-#define FF_MM_IWMMXT	0x0100 /* XScale IWMMXT */
+#define FF_MM_IWMMXT   0x0100 /* XScale IWMMXT */
 #endif /* HAVE_IWMMXT */
 
     /**
@@ -1274,7 +1295,7 @@ typedef struct AVCodecContext {
      * - decoding: set by user
      */
      int bits_per_sample;
-    
+
     /**
      * prediction method (needed for huffyuv).
      * - encoding: set by user
@@ -1284,7 +1305,7 @@ typedef struct AVCodecContext {
 #define FF_PRED_LEFT   0
 #define FF_PRED_PLANE  1
 #define FF_PRED_MEDIAN 2
-    
+
     /**
      * sample aspect ratio (0 if unknown).
      * numerator and denominator must be relative prime and smaller then 256 for some video standards
@@ -1321,7 +1342,7 @@ typedef struct AVCodecContext {
 #define FF_DEBUG_BUGS      0x00001000
 #define FF_DEBUG_VIS_QP    0x00002000
 #define FF_DEBUG_VIS_MB_TYPE 0x00004000
-    
+
     /**
      * debug.
      * - encoding: set by user.
@@ -1338,7 +1359,7 @@ typedef struct AVCodecContext {
      * - decoding: unused
      */
     uint64_t error[4];
-    
+
     /**
      * minimum MB quantizer.
      * - encoding: unused
@@ -1352,7 +1373,7 @@ typedef struct AVCodecContext {
      * - decoding: unused
      */
     int mb_qmax;
-    
+
     /**
      * motion estimation compare function.
      * - encoding: set by user.
@@ -1391,8 +1412,9 @@ typedef struct AVCodecContext {
 #define FF_CMP_W53  11
 #define FF_CMP_W97  12
 #define FF_CMP_DCTMAX 13
+#define FF_CMP_DCT264 14
 #define FF_CMP_CHROMA 256
-    
+
     /**
      * ME diamond size & shape.
      * - encoding: set by user.
@@ -1450,9 +1472,9 @@ typedef struct AVCodecContext {
      * DTG active format information (additionnal aspect ratio
      * information only used in DVB MPEG2 transport streams). 0 if
      * not set.
-     * 
+     *
      * - encoding: unused.
-     * - decoding: set by decoder 
+     * - decoding: set by decoder
      */
     int dtg_active_format;
 #define FF_DTG_AFD_SAME         8
@@ -1466,7 +1488,7 @@ typedef struct AVCodecContext {
     /**
      * Maximum motion estimation search range in subpel units.
      * if 0 then no limit
-     * 
+     *
      * - encoding: set by user.
      * - decoding: unused.
      */
@@ -1479,7 +1501,7 @@ typedef struct AVCodecContext {
      */
     int intra_quant_bias;
 #define FF_DEFAULT_QUANT_BIAS 999999
-    
+
     /**
      * inter quantizer bias.
      * - encoding: set by user.
@@ -1494,15 +1516,15 @@ typedef struct AVCodecContext {
      *             table have to be stored somewhere FIXME
      */
     int color_table_id;
-    
+
     /**
-     * internal_buffer count. 
+     * internal_buffer count.
      * Don't touch, used by lavc default_get_buffer()
      */
     int internal_buffer_count;
-    
+
     /**
-     * internal_buffers. 
+     * internal_buffers.
      * Don't touch, used by lavc default_get_buffer()
      */
     void *internal_buffer;
@@ -1520,7 +1542,7 @@ typedef struct AVCodecContext {
      * - decoding: unused
      */
     int global_quality;
-    
+
 #define FF_CODER_TYPE_VLC   0
 #define FF_CODER_TYPE_AC    1
     /**
@@ -1536,7 +1558,15 @@ typedef struct AVCodecContext {
      * - decoding: unused
      */
     int context_model;
-    
+#if 0
+    /**
+     *
+     * - encoding: unused
+     * - decoding: set by user.
+     */
+    uint8_t * (*realloc)(struct AVCodecContext *s, uint8_t *buf, int buf_size);
+#endif
+
     /**
      * slice flags
      * - encoding: unused
@@ -1553,7 +1583,7 @@ typedef struct AVCodecContext {
      * - decoding: set by decoder
      */
     int xvmc_acceleration;
-    
+
     /**
      * macroblock decision mode
      * - encoding: set by user.
@@ -1577,7 +1607,7 @@ typedef struct AVCodecContext {
      * - decoding: set by lavc
      */
     uint16_t *inter_matrix;
-    
+
     /**
      * fourcc from the AVI stream header (LSB first, so "ABCD" -> ('D'<<24) + ('C'<<16) + ('B'<<8) + 'A').
      * this is used to workaround some encoder bugs
@@ -1621,7 +1651,7 @@ typedef struct AVCodecContext {
      * - decoding: unused
      */
     int noise_reduction;
-    
+
     /**
      * called at the beginning of a frame to get cr buffer for it.
      * buffer type (size, hints) must be the same. lavc won't check it.
@@ -1660,7 +1690,7 @@ typedef struct AVCodecContext {
      * - decoding: unused.
      */
     int error_rate;
-    
+
     /**
      * MP3 antialias algorithm, see FF_AA_* below.
      * - encoding: unused
@@ -1685,7 +1715,7 @@ typedef struct AVCodecContext {
      * - decoding: set by user
      */
     int thread_count;
-    
+
     /**
      * the codec may call this to execute several independant things. it will return only after
      * finishing all tasks, the user may replace this with some multithreaded implementation, the
@@ -1695,7 +1725,7 @@ typedef struct AVCodecContext {
      * - decoding: set by lavc, user can override
      */
     int (*execute)(struct AVCodecContext *c, int (*func)(struct AVCodecContext *c2, void *arg), void **arg2, int *ret, int count);
-    
+
     /**
      * Thread opaque.
      * can be used by execute() to store some per AVCodecContext stuff.
@@ -1705,9 +1735,9 @@ typedef struct AVCodecContext {
     void *thread_opaque;
 
     /**
-     * Motion estimation threshold. under which no motion estimation is 
+     * Motion estimation threshold. under which no motion estimation is
      * performed, but instead the user specified motion vectors are used
-     * 
+     *
      * - encoding: set by user
      * - decoding: unused
      */
@@ -1830,32 +1860,151 @@ typedef struct AVCodecContext {
     int mb_lmax;
 
     /**
-     * 
+     *
      * - encoding: set by user.
      * - decoding: unused
      */
     int me_penalty_compensation;
 
     /**
-     * 
+     *
      * - encoding: unused
      * - decoding: set by user.
      */
     enum AVDiscard skip_loop_filter;
 
     /**
-     * 
+     *
      * - encoding: unused
      * - decoding: set by user.
      */
     enum AVDiscard skip_idct;
 
     /**
-     * 
+     *
      * - encoding: unused
      * - decoding: set by user.
      */
     enum AVDiscard skip_frame;
+
+    /**
+     *
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int bidir_refine;
+
+    /**
+     *
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int brd_scale;
+
+    /**
+     * constant rate factor - quality-based VBR - values ~correspond to qps
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int crf;
+
+    /**
+     * constant quantization parameter rate control method
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int cqp;
+
+    /**
+     * minimum gop size
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int keyint_min;
+
+    /**
+     * number of reference frames
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int refs;
+
+    /**
+     * chroma qp offset from luma
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int chromaoffset;
+
+    /**
+     * influences how often b-frames are used
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int bframebias;
+
+    /**
+     * trellis RD quantization
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int trellis;
+
+    /**
+     * reduce fluctuations in qp (before curve compression)
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    float complexityblur;
+
+    /**
+     * in-loop deblocking filter alphac0 parameter
+     * alpha is in the range -6...6
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int deblockalpha;
+
+    /**
+     * in-loop deblocking filter beta parameter
+     * beta is in the range -6...6
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int deblockbeta;
+
+    /**
+     * macroblock subpartition sizes to consider - p8x8, p4x4, b8x8, i8x8, i4x4
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int partitions;
+#define X264_PART_I4X4 0x001  /* Analyse i4x4 */
+#define X264_PART_I8X8 0x002  /* Analyse i8x8 (requires 8x8 transform) */
+#define X264_PART_P8X8 0x010  /* Analyse p16x8, p8x16 and p8x8 */
+#define X264_PART_P4X4 0x020  /* Analyse p8x4, p4x8, p4x4 */
+#define X264_PART_B8X8 0x100  /* Analyse b16x8, b8x16 and b8x8 */
+
+    /**
+     * direct mv prediction mode - 0 (none), 1 (spatial), 2 (temporal)
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int directpred;
+
+    /**
+     * audio cutoff bandwidth (0 means "automatic") . Currently used only by FAAC
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int cutoff;
+
+    /**
+     * multiplied by qscale for each frame and added to scene_change_score
+     * - encoding: set by user.
+     * - decoding: unused
+     */
+    int scenechange_factor;
 } AVCodecContext;
 
 /**
@@ -1948,6 +2097,7 @@ extern AVCodec rv20_encoder;
 extern AVCodec dvvideo_encoder;
 extern AVCodec mjpeg_encoder;
 extern AVCodec ljpeg_encoder;
+extern AVCodec jpegls_encoder;
 extern AVCodec png_encoder;
 extern AVCodec ppm_encoder;
 extern AVCodec pgm_encoder;
@@ -2009,6 +2159,8 @@ extern AVCodec mp3_decoder;
 extern AVCodec mp3adu_decoder;
 extern AVCodec mp3on4_decoder;
 extern AVCodec qdm2_decoder;
+extern AVCodec cook_decoder;
+extern AVCodec truespeech_decoder;
 extern AVCodec mace3_decoder;
 extern AVCodec mace6_decoder;
 extern AVCodec huffyuv_decoder;
@@ -2062,6 +2214,7 @@ extern AVCodec sonic_decoder;
 extern AVCodec qtrle_decoder;
 extern AVCodec flac_decoder;
 extern AVCodec tscc_decoder;
+extern AVCodec cscd_decoder;
 extern AVCodec ulti_decoder;
 extern AVCodec qdraw_decoder;
 extern AVCodec xl_decoder;
@@ -2077,6 +2230,7 @@ extern AVCodec vorbis_decoder;
 extern AVCodec fraps_decoder;
 extern AVCodec libgsm_encoder;
 extern AVCodec libgsm_decoder;
+extern AVCodec bmp_decoder;
 
 /* pcm codecs */
 #define PCM_CODEC(id, name) \
@@ -2130,6 +2284,7 @@ extern AVCodec ac3_decoder;
 extern AVCodec dts_decoder;
 
 /* subtitles */
+extern AVCodec dvdsub_encoder;
 extern AVCodec dvdsub_decoder;
 extern AVCodec dvbsub_encoder;
 extern AVCodec dvbsub_decoder;
@@ -2141,7 +2296,7 @@ struct AVResampleContext;
 
 typedef struct ReSampleContext ReSampleContext;
 
-ReSampleContext *audio_resample_init(int output_channels, int input_channels, 
+ReSampleContext *audio_resample_init(int output_channels, int input_channels,
                                      int output_rate, int input_rate);
 int audio_resample(ReSampleContext *s, short *output, short *input, int nb_samples);
 void audio_resample_close(ReSampleContext *s);
@@ -2168,7 +2323,7 @@ ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
                                       int padleft, int padright);
 
 
-void img_resample(ImgReSampleContext *s, 
+void img_resample(ImgReSampleContext *s,
                   AVPicture *output, const AVPicture *input);
 
 void img_resample_close(ImgReSampleContext *s);
@@ -2213,11 +2368,11 @@ int avcodec_find_best_pix_fmt(int pix_fmt_mask, int src_pix_fmt,
 #define FF_ALPHA_TRANSP       0x0001 /* image has some totally transparent pixels */
 #define FF_ALPHA_SEMI_TRANSP  0x0002 /* image has some transparent pixels */
 int img_get_alpha_info(const AVPicture *src,
-		       int pix_fmt, int width, int height);
+                       int pix_fmt, int width, int height);
 
 /* convert among pixel formats */
 int img_convert(AVPicture *dst, int dst_pix_fmt,
-                const AVPicture *src, int pix_fmt, 
+                const AVPicture *src, int pix_fmt,
                 int width, int height);
 
 /* deinterlace a picture */
@@ -2265,23 +2420,23 @@ int avcodec_default_execute(AVCodecContext *c, int (*func)(AVCodecContext *c2, v
  */
 int avcodec_open(AVCodecContext *avctx, AVCodec *codec);
 
-int avcodec_decode_audio(AVCodecContext *avctx, int16_t *samples, 
+int avcodec_decode_audio(AVCodecContext *avctx, int16_t *samples,
                          int *frame_size_ptr,
                          uint8_t *buf, int buf_size);
-int avcodec_decode_video(AVCodecContext *avctx, AVFrame *picture, 
+int avcodec_decode_video(AVCodecContext *avctx, AVFrame *picture,
                          int *got_picture_ptr,
                          uint8_t *buf, int buf_size);
 int avcodec_decode_subtitle(AVCodecContext *avctx, AVSubtitle *sub,
                             int *got_sub_ptr,
                             const uint8_t *buf, int buf_size);
-int avcodec_parse_frame(AVCodecContext *avctx, uint8_t **pdata, 
+int avcodec_parse_frame(AVCodecContext *avctx, uint8_t **pdata,
                         int *data_size_ptr,
                         uint8_t *buf, int buf_size);
-int avcodec_encode_audio(AVCodecContext *avctx, uint8_t *buf, int buf_size, 
+int avcodec_encode_audio(AVCodecContext *avctx, uint8_t *buf, int buf_size,
                          const short *samples);
-int avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size, 
+int avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size,
                          const AVFrame *pict);
-int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size, 
+int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
                             const AVSubtitle *sub);
 
 int avcodec_close(AVCodecContext *avctx);
@@ -2305,7 +2460,7 @@ typedef struct AVCodecParserContext {
     void *priv_data;
     struct AVCodecParser *parser;
     int64_t frame_offset; /* offset of the current frame */
-    int64_t cur_offset; /* current offset 
+    int64_t cur_offset; /* current offset
                            (incremented by each av_parser_parse()) */
     int64_t last_frame_offset; /* offset of the last frame */
     /* video info */
@@ -2324,7 +2479,7 @@ typedef struct AVCodecParserContext {
     int64_t cur_frame_offset[AV_PARSER_PTS_NB];
     int64_t cur_frame_pts[AV_PARSER_PTS_NB];
     int64_t cur_frame_dts[AV_PARSER_PTS_NB];
-    
+
     int flags;
 #define PARSER_FLAG_COMPLETE_FRAMES           0x0001
 } AVCodecParserContext;
@@ -2333,9 +2488,9 @@ typedef struct AVCodecParser {
     int codec_ids[5]; /* several codec IDs are permitted */
     int priv_data_size;
     int (*parser_init)(AVCodecParserContext *s);
-    int (*parser_parse)(AVCodecParserContext *s, 
+    int (*parser_parse)(AVCodecParserContext *s,
                         AVCodecContext *avctx,
-                        uint8_t **poutbuf, int *poutbuf_size, 
+                        uint8_t **poutbuf, int *poutbuf_size,
                         const uint8_t *buf, int buf_size);
     void (*parser_close)(AVCodecParserContext *s);
     int (*split)(AVCodecContext *avctx, const uint8_t *buf, int buf_size);
@@ -2346,14 +2501,14 @@ extern AVCodecParser *av_first_parser;
 
 void av_register_codec_parser(AVCodecParser *parser);
 AVCodecParserContext *av_parser_init(int codec_id);
-int av_parser_parse(AVCodecParserContext *s, 
+int av_parser_parse(AVCodecParserContext *s,
                     AVCodecContext *avctx,
-                    uint8_t **poutbuf, int *poutbuf_size, 
+                    uint8_t **poutbuf, int *poutbuf_size,
                     const uint8_t *buf, int buf_size,
                     int64_t pts, int64_t dts);
 int av_parser_change(AVCodecParserContext *s,
                      AVCodecContext *avctx,
-                     uint8_t **poutbuf, int *poutbuf_size, 
+                     uint8_t **poutbuf, int *poutbuf_size,
                      const uint8_t *buf, int buf_size, int keyframe);
 void av_parser_close(AVCodecParserContext *s);
 
diff --git a/src/libffmpeg/libavcodec/bitstream.c b/src/libffmpeg/libavcodec/bitstream.c
index a8f456bd2..49c6ece1b 100755
--- a/src/libffmpeg/libavcodec/bitstream.c
+++ b/src/libffmpeg/libavcodec/bitstream.c
@@ -15,7 +15,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * alternative bitstream reader & writer by Michael Niedermayer <michaelni@gmx.at>
  */
@@ -24,7 +24,7 @@
  * @file bitstream.c
  * bitstream api.
  */
- 
+
 #include "avcodec.h"
 #include "bitstream.h"
 
@@ -49,7 +49,7 @@ void ff_put_string(PutBitContext * pbc, char *s, int put_zero)
 
 /* bit input functions */
 
-/** 
+/**
  * reads 0-32 bits.
  */
 unsigned int get_bits_long(GetBitContext *s, int n){
@@ -60,7 +60,7 @@ unsigned int get_bits_long(GetBitContext *s, int n){
     }
 }
 
-/** 
+/**
  * shows 0-32 bits.
  */
 unsigned int show_bits_long(GetBitContext *s, int n){
@@ -83,7 +83,7 @@ int check_marker(GetBitContext *s, const char *msg)
 {
     int bit= get_bits1(s);
     if(!bit)
-	    av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg);
+            av_log(NULL, AV_LOG_INFO, "Marker bit missing %s\n", msg);
 
     return bit;
 }
@@ -235,7 +235,7 @@ static int build_table(VLC *vlc, int table_nb_bits,
    'nb_bits' set thee decoding table size (2^nb_bits) entries. The
    bigger it is, the faster is the decoding. But it should not be too
    big to save memory and L1 cache. '9' is a good compromise.
-   
+
    'nb_codes' : number of vlcs codes
 
    'bits' : table which gives the size (in bits) of each vlc code.
@@ -249,7 +249,7 @@ static int build_table(VLC *vlc, int table_nb_bits,
    or 'codes' tables.
 
    'wrap' and 'size' allows to use any memory configuration and types
-   (byte/word/long) to store the 'bits' and 'codes' tables.  
+   (byte/word/long) to store the 'bits' and 'codes' tables.
 
    'use_static' should be set to 1 for tables, which should be freed
    with av_free_static(), 0 if free_vlc() will be used.
diff --git a/src/libffmpeg/libavcodec/bitstream.h b/src/libffmpeg/libavcodec/bitstream.h
index 0182b630b..4a3d55d19 100644
--- a/src/libffmpeg/libavcodec/bitstream.h
+++ b/src/libffmpeg/libavcodec/bitstream.h
@@ -13,7 +13,7 @@
 //#define LIBMPEG2_BITSTREAM_READER
 //#define A32_BITSTREAM_READER
 #define LIBMPEG2_BITSTREAM_READER_HACK //add BERO
- 
+
 extern const uint8_t ff_reverse[256];
 
 #if defined(ARCH_X86) || defined(ARCH_X86_64)
@@ -146,7 +146,7 @@ typedef struct RL_VLC_ELEM {
 #    ifdef __GNUC__
 static inline uint32_t unaligned32(const void *v) {
     struct Unaligned {
-	uint32_t i;
+        uint32_t i;
     } __attribute__((packed));
 
     return ((const struct Unaligned *) v)->i;
@@ -173,7 +173,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
 #endif
     //    printf("put_bits=%d %x\n", n, value);
     assert(n == 32 || value < (1U << n));
-    
+
     bit_buf = s->bit_buf;
     bit_left = s->bit_left;
 
@@ -183,7 +183,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
         bit_buf = (bit_buf<<n) | value;
         bit_left-=n;
     } else {
-	bit_buf<<=bit_left;
+        bit_buf<<=bit_left;
         bit_buf |= value >> (n - bit_left);
 #ifdef UNALIGNED_STORES_ARE_BAD
         if (3 & (intptr_t) s->buf_ptr) {
@@ -196,7 +196,7 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
         *(uint32_t *)s->buf_ptr = be2me_32(bit_buf);
         //printf("bitbuf = %08x\n", bit_buf);
         s->buf_ptr+=4;
-	bit_left+=32 - n;
+        bit_left+=32 - n;
         bit_buf = value;
     }
 
@@ -212,28 +212,28 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
 #    ifdef ALIGNED_BITSTREAM_WRITER
 #        if defined(ARCH_X86) || defined(ARCH_X86_64)
     asm volatile(
-	"movl %0, %%ecx			\n\t"
-	"xorl %%eax, %%eax		\n\t"
-	"shrdl %%cl, %1, %%eax		\n\t"
-	"shrl %%cl, %1			\n\t"
-	"movl %0, %%ecx			\n\t"
-	"shrl $3, %%ecx			\n\t"
-	"andl $0xFFFFFFFC, %%ecx	\n\t"
-	"bswapl %1			\n\t"
-	"orl %1, (%2, %%ecx)		\n\t"
-	"bswapl %%eax			\n\t"
-	"addl %3, %0			\n\t"
-	"movl %%eax, 4(%2, %%ecx)	\n\t"
-	: "=&r" (s->index), "=&r" (value)
-	: "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n))
-	: "%eax", "%ecx"
+        "movl %0, %%ecx                 \n\t"
+        "xorl %%eax, %%eax              \n\t"
+        "shrdl %%cl, %1, %%eax          \n\t"
+        "shrl %%cl, %1                  \n\t"
+        "movl %0, %%ecx                 \n\t"
+        "shrl $3, %%ecx                 \n\t"
+        "andl $0xFFFFFFFC, %%ecx        \n\t"
+        "bswapl %1                      \n\t"
+        "orl %1, (%2, %%ecx)            \n\t"
+        "bswapl %%eax                   \n\t"
+        "addl %3, %0                    \n\t"
+        "movl %%eax, 4(%2, %%ecx)       \n\t"
+        : "=&r" (s->index), "=&r" (value)
+        : "r" (s->buf), "r" (n), "0" (s->index), "1" (value<<(-n))
+        : "%eax", "%ecx"
     );
 #        else
     int index= s->index;
     uint32_t *ptr= ((uint32_t *)s->buf)+(index>>5);
-    
-    value<<= 32-n; 
-    
+
+    value<<= 32-n;
+
     ptr[0] |= be2me_32(value>>(index&31));
     ptr[1]  = be2me_32(value<<(32-(index&31)));
 //if(n>24) printf("%d %d\n", n, value);
@@ -243,25 +243,25 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
 #    else //ALIGNED_BITSTREAM_WRITER
 #        if defined(ARCH_X86) || defined(ARCH_X86_64)
     asm volatile(
-	"movl $7, %%ecx			\n\t"
-	"andl %0, %%ecx			\n\t"
-	"addl %3, %%ecx			\n\t"
-	"negl %%ecx			\n\t"
-	"shll %%cl, %1			\n\t"
-	"bswapl %1			\n\t"
-	"movl %0, %%ecx			\n\t"
-	"shrl $3, %%ecx			\n\t"
-	"orl %1, (%%ecx, %2)		\n\t"
-	"addl %3, %0			\n\t"
-	"movl $0, 4(%%ecx, %2)		\n\t"
-	: "=&r" (s->index), "=&r" (value)
-	: "r" (s->buf), "r" (n), "0" (s->index), "1" (value)
-	: "%ecx"
+        "movl $7, %%ecx                 \n\t"
+        "andl %0, %%ecx                 \n\t"
+        "addl %3, %%ecx                 \n\t"
+        "negl %%ecx                     \n\t"
+        "shll %%cl, %1                  \n\t"
+        "bswapl %1                      \n\t"
+        "movl %0, %%ecx                 \n\t"
+        "shrl $3, %%ecx                 \n\t"
+        "orl %1, (%%ecx, %2)            \n\t"
+        "addl %3, %0                    \n\t"
+        "movl $0, 4(%%ecx, %2)          \n\t"
+        : "=&r" (s->index), "=&r" (value)
+        : "r" (s->buf), "r" (n), "0" (s->index), "1" (value)
+        : "%ecx"
     );
 #        else
     int index= s->index;
     uint32_t *ptr= (uint32_t*)(((uint8_t *)s->buf)+(index>>3));
-    
+
     ptr[0] |= be2me_32(value<<(32-n-(index&7) ));
     ptr[1] = 0;
 //if(n>24) printf("%d %d\n", n, value);
@@ -276,9 +276,9 @@ static inline void put_bits(PutBitContext *s, int n, unsigned int value)
 static inline uint8_t* pbBufPtr(PutBitContext *s)
 {
 #ifdef ALT_BITSTREAM_WRITER
-	return s->buf + (s->index>>3);
+        return s->buf + (s->index>>3);
 #else
-	return s->buf_ptr;
+        return s->buf_ptr;
 #endif
 }
 
@@ -290,11 +290,11 @@ static inline void skip_put_bytes(PutBitContext *s, int n){
         assert((put_bits_count(s)&7)==0);
 #ifdef ALT_BITSTREAM_WRITER
         FIXME may need some cleaning of the buffer
-	s->index += n<<3;
+        s->index += n<<3;
 #else
         assert(s->bit_left==32);
-	s->buf_ptr += n;
-#endif    
+        s->buf_ptr += n;
+#endif
 }
 
 /**
@@ -308,7 +308,7 @@ static inline void skip_put_bits(PutBitContext *s, int n){
     s->bit_left -= n;
     s->buf_ptr-= s->bit_left>>5;
     s->bit_left &= 31;
-#endif        
+#endif
 }
 
 /**
@@ -366,10 +366,10 @@ for examples see get_bits, show_bits, skip_bits, get_vlc
 static inline int unaligned32_be(const void *v)
 {
 #ifdef CONFIG_ALIGN
-	const uint8_t *p=v;
-	return (((p[0]<<8) | p[1])<<16) | (p[2]<<8) | (p[3]);
+        const uint8_t *p=v;
+        return (((p[0]<<8) | p[1])<<16) | (p[2]<<8) | (p[3]);
 #else
-	return be2me_32( unaligned32(v)); //original
+        return be2me_32( unaligned32(v)); //original
 #endif
 }
 
@@ -528,8 +528,8 @@ static inline int get_bits_count(GetBitContext *s){
 #if defined(ARCH_X86) || defined(ARCH_X86_64)
 #   define SKIP_CACHE(name, gb, num)\
         asm(\
-            "shldl %2, %1, %0		\n\t"\
-            "shll %2, %1		\n\t"\
+            "shldl %2, %1, %0          \n\t"\
+            "shll %2, %1               \n\t"\
             : "+r" (name##_cache0), "+r" (name##_cache1)\
             : "Ic" ((uint8_t)num)\
            );
@@ -569,9 +569,9 @@ static inline int get_bits_count(GetBitContext *s){
 
 /**
  * read mpeg1 dc style vlc (sign bit + mantisse with no MSB).
- * if MSB not set it is negative 
+ * if MSB not set it is negative
  * @param n length in bits
- * @author BERO  
+ * @author BERO
  */
 static inline int get_xbits(GetBitContext *s, int n){
     register int tmp;
@@ -790,34 +790,20 @@ void free_vlc(VLC *vlc);
     SKIP_BITS(name, gb, n)\
 }
 
-// deprecated, dont use get_vlc for new code, use get_vlc2 instead or use GET_VLC directly
-static inline int get_vlc(GetBitContext *s, VLC *vlc)
-{
-    int code;
-    VLC_TYPE (*table)[2]= vlc->table;
-    
-    OPEN_READER(re, s)
-    UPDATE_CACHE(re, s)
-
-    GET_VLC(code, re, s, table, vlc->bits, 3)    
-
-    CLOSE_READER(re, s)
-    return code;
-}
 
 /**
  * parses a vlc code, faster then get_vlc()
- * @param bits is the number of bits which will be read at once, must be 
+ * @param bits is the number of bits which will be read at once, must be
  *             identical to nb_bits in init_vlc()
  * @param max_depth is the number of times bits bits must be readed to completly
- *                  read the longest vlc code 
+ *                  read the longest vlc code
  *                  = (max_vlc_length + bits - 1) / bits
  */
 static always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
                                   int bits, int max_depth)
 {
     int code;
-    
+
     OPEN_READER(re, s)
     UPDATE_CACHE(re, s)
 
@@ -833,7 +819,7 @@ static always_inline int get_vlc2(GetBitContext *s, VLC_TYPE (*table)[2],
 #include "avcodec.h"
 static inline void print_bin(int bits, int n){
     int i;
-    
+
     for(i=n-1; i>=0; i--){
         av_log(NULL, AV_LOG_DEBUG, "%d", (bits>>i)&1);
     }
@@ -843,7 +829,7 @@ static inline void print_bin(int bits, int n){
 
 static inline int get_bits_trace(GetBitContext *s, int n, char *file, const char *func, int line){
     int r= get_bits(s, n);
-    
+
     print_bin(r, n);
     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d bit @%5d in %s %s:%d\n", r, n, r, get_bits_count(s)-n, file, func, line);
     return r;
@@ -854,16 +840,16 @@ static inline int get_vlc_trace(GetBitContext *s, VLC_TYPE (*table)[2], int bits
     int r= get_vlc2(s, table, bits, max_depth);
     int len= get_bits_count(s) - pos;
     int bits2= show>>(24-len);
-    
+
     print_bin(bits2, len);
-    
+
     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d vlc @%5d in %s %s:%d\n", bits2, len, r, pos, file, func, line);
     return r;
 }
 static inline int get_xbits_trace(GetBitContext *s, int n, char *file, const char *func, int line){
     int show= show_bits(s, n);
     int r= get_xbits(s, n);
-    
+
     print_bin(show, n);
     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d xbt @%5d in %s %s:%d\n", show, n, r, get_bits_count(s)-n, file, func, line);
     return r;
diff --git a/src/libffmpeg/libavcodec/cabac.c b/src/libffmpeg/libavcodec/cabac.c
index 9a598fa47..88790a960 100644
--- a/src/libffmpeg/libavcodec/cabac.c
+++ b/src/libffmpeg/libavcodec/cabac.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -79,14 +79,14 @@ const uint8_t ff_h264_norm_shift[256]= {
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
  1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
 };
 
 /**
@@ -102,7 +102,7 @@ void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size){
 #ifdef STRICT_LIMITS
     c->sym_count =0;
 #endif
-    
+
     c->pb.bit_left++; //avoids firstBitFlag
 }
 
@@ -111,7 +111,7 @@ void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size){
  * @param buf_size size of buf in bits
  */
 void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size){
-    c->bytestream_start= 
+    c->bytestream_start=
     c->bytestream= buf;
     c->bytestream_end= buf + buf_size;
 
@@ -125,10 +125,10 @@ void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size){
     c->range= 0x1FE<<(CABAC_BITS + 1);
 }
 
-void ff_init_cabac_states(CABACContext *c, uint8_t const (*lps_range)[4], 
+void ff_init_cabac_states(CABACContext *c, uint8_t const (*lps_range)[4],
                           uint8_t const *mps_state, uint8_t const *lps_state, int state_count){
     int i, j;
-    
+
     for(i=0; i<state_count; i++){
         for(j=0; j<4; j++){ //FIXME check if this is worth the 1 shift we save
             c->lps_range[2*i+0][j+4]=
@@ -159,20 +159,20 @@ int main(){
     uint8_t r[9*SIZE];
     int i;
     uint8_t state[10]= {0};
-    
+
     ff_init_cabac_encoder(&c, b, SIZE);
     ff_init_cabac_states(&c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
-    
+
     for(i=0; i<SIZE; i++){
         r[i]= random()%7;
     }
-    
+
     for(i=0; i<SIZE; i++){
 START_TIMER
         put_cabac_bypass(&c, r[i]&1);
 STOP_TIMER("put_cabac_bypass")
     }
-    
+
     for(i=0; i<SIZE; i++){
 START_TIMER
         put_cabac(&c, state, r[i]&1);
@@ -183,27 +183,27 @@ STOP_TIMER("put_cabac")
 START_TIMER
         put_cabac_u(&c, state, r[i], 6, 3, i&1);
 STOP_TIMER("put_cabac_u")
-    }    
+    }
 
     for(i=0; i<SIZE; i++){
 START_TIMER
         put_cabac_ueg(&c, state, r[i], 3, 0, 1, 2);
 STOP_TIMER("put_cabac_ueg")
-    }    
-   
+    }
+
     put_cabac_terminate(&c, 1);
-    
+
     ff_init_cabac_decoder(&c, b, SIZE);
-    
+
     memset(state, 0, sizeof(state));
-    
+
     for(i=0; i<SIZE; i++){
 START_TIMER
         if( (r[i]&1) != get_cabac_bypass(&c) )
             av_log(NULL, AV_LOG_ERROR, "CABAC bypass failure at %d\n", i);
 STOP_TIMER("get_cabac_bypass")
     }
-    
+
     for(i=0; i<SIZE; i++){
 START_TIMER
         if( (r[i]&1) != get_cabac(&c, state) )
@@ -227,7 +227,7 @@ STOP_TIMER("get_cabac_ueg")
 #endif
     if(!get_cabac_terminate(&c))
         av_log(NULL, AV_LOG_ERROR, "where's the Terminator?\n");
-    
+
     return 0;
 }
 
diff --git a/src/libffmpeg/libavcodec/cabac.h b/src/libffmpeg/libavcodec/cabac.h
index 15ec88d92..2e4ec7083 100644
--- a/src/libffmpeg/libavcodec/cabac.h
+++ b/src/libffmpeg/libavcodec/cabac.h
@@ -14,10 +14,10 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
- 
+
 /**
  * @file cabac.h
  * Context Adaptive Binary Arithmetic Coder.
@@ -54,13 +54,13 @@ extern const uint8_t ff_h264_norm_shift[256];
 
 void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size);
 void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size);
-void ff_init_cabac_states(CABACContext *c, uint8_t const (*lps_range)[4], 
+void ff_init_cabac_states(CABACContext *c, uint8_t const (*lps_range)[4],
                           uint8_t const *mps_state, uint8_t const *lps_state, int state_count);
 
 
 static inline void put_cabac_bit(CABACContext *c, int b){
-    put_bits(&c->pb, 1, b); 
-    for(;c->outstanding_count; c->outstanding_count--){ 
+    put_bits(&c->pb, 1, b);
+    for(;c->outstanding_count; c->outstanding_count--){
         put_bits(&c->pb, 1, 1-b);
     }
 }
@@ -77,7 +77,7 @@ static inline void renorm_cabac_encoder(CABACContext *c){
             put_cabac_bit(c, 1);
             c->low -= 0x200;
         }
-        
+
         c->range+= c->range;
         c->low += c->low;
     }
@@ -85,7 +85,7 @@ static inline void renorm_cabac_encoder(CABACContext *c){
 
 static inline void put_cabac(CABACContext *c, uint8_t * const state, int bit){
     int RangeLPS= c->lps_range[*state][c->range>>6];
-    
+
     if(bit == ((*state)&1)){
         c->range -= RangeLPS;
         *state= c->mps_state[*state];
@@ -94,7 +94,7 @@ static inline void put_cabac(CABACContext *c, uint8_t * const state, int bit){
         c->range = RangeLPS;
         *state= c->lps_state[*state];
     }
-    
+
     renorm_cabac_encoder(c);
 
 #ifdef STRICT_LIMITS
@@ -138,7 +138,7 @@ static inline void put_cabac_bypass(CABACContext *c, int bit){
         put_cabac_bit(c, 1);
         c->low -= 0x400;
     }
-        
+
 #ifdef STRICT_LIMITS
     c->symCount++;
 #endif
@@ -156,16 +156,16 @@ static inline int put_cabac_terminate(CABACContext *c, int bit){
     }else{
         c->low += c->range;
         c->range= 2;
-        
+
         renorm_cabac_encoder(c);
 
         assert(c->low <= 0x1FF);
         put_cabac_bit(c, c->low>>9);
         put_bits(&c->pb, 2, ((c->low>>7)&3)|1);
-        
+
         flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong
     }
-        
+
 #ifdef STRICT_LIMITS
     c->symCount++;
 #endif
@@ -178,9 +178,9 @@ static inline int put_cabac_terminate(CABACContext *c, int bit){
  */
 static inline void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max, int max_index, int truncated){
     int i;
-    
+
     assert(v <= max);
-    
+
 #if 1
     for(i=0; i<v; i++){
         put_cabac(c, state, 1);
@@ -213,14 +213,14 @@ static inline void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max,
  */
 static inline void put_cabac_ueg(CABACContext *c, uint8_t * state, int v, int max, int is_signed, int k, int max_index){
     int i;
-    
+
     if(v==0)
         put_cabac(c, state, 0);
     else{
         const int sign= v < 0;
-        
+
         if(is_signed) v= ABS(v);
-        
+
         if(v<max){
             for(i=0; i<v; i++){
                 put_cabac(c, state, 1);
@@ -272,14 +272,14 @@ static void refill2(CABACContext *c){
     i= 8 - ff_h264_norm_shift[x>>(CABAC_BITS+1)];
 
     x= -CABAC_MASK;
-    
+
     if(c->bytestream < c->bytestream_end)
 #if CABAC_BITS == 16
         x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
 #else
         x+= c->bytestream[0]<<1;
 #endif
-    
+
     c->low += x<<i;
     c->bytestream+= CABAC_BITS/8;
 }
@@ -305,7 +305,7 @@ static inline void renorm_cabac_decoder_once(CABACContext *c){
 static inline int get_cabac(CABACContext *c, uint8_t * const state){
     int RangeLPS= c->lps_range[*state][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1);
     int bit, lps_mask attribute_unused;
-    
+
     c->range -= RangeLPS;
 #if 1
     if(c->low < c->range){
@@ -327,13 +327,13 @@ static inline int get_cabac(CABACContext *c, uint8_t * const state){
     }
 #else
     lps_mask= (c->range - c->low)>>31;
-    
+
     c->low -= c->range & lps_mask;
     c->range += (RangeLPS - c->range) & lps_mask;
-    
+
     bit= ((*state)^lps_mask)&1;
     *state= c->mps_state[(*state) - (128&lps_mask)];
-    
+
     lps_mask= ff_h264_norm_shift[c->range>>(CABAC_BITS+2)];
     c->range<<= lps_mask;
     c->low  <<= lps_mask;
@@ -341,7 +341,7 @@ static inline int get_cabac(CABACContext *c, uint8_t * const state){
         refill2(c);
 #endif
 
-    return bit;    
+    return bit;
 }
 
 static inline int get_cabac_bypass(CABACContext *c){
@@ -349,7 +349,7 @@ static inline int get_cabac_bypass(CABACContext *c){
 
     if(!(c->low & CABAC_MASK))
         refill(c);
-    
+
     if(c->low < c->range){
         return 0;
     }else{
@@ -369,7 +369,7 @@ static inline int get_cabac_terminate(CABACContext *c){
         return 0;
     }else{
         return c->bytestream - c->bytestream_start;
-    }    
+    }
 }
 
 /**
@@ -377,11 +377,11 @@ static inline int get_cabac_terminate(CABACContext *c){
  */
 static inline int get_cabac_u(CABACContext *c, uint8_t * state, int max, int max_index, int truncated){
     int i;
-    
-    for(i=0; i<max; i++){ 
+
+    for(i=0; i<max; i++){
         if(get_cabac(c, state)==0)
             return i;
-            
+
         if(i< max_index) state++;
     }
 
@@ -394,13 +394,13 @@ static inline int get_cabac_u(CABACContext *c, uint8_t * state, int max, int max
 static inline int get_cabac_ueg(CABACContext *c, uint8_t * state, int max, int is_signed, int k, int max_index){
     int i, v;
     int m= 1<<k;
-    
-    if(get_cabac(c, state)==0) 
+
+    if(get_cabac(c, state)==0)
         return 0;
-        
+
     if(0 < max_index) state++;
-    
-    for(i=1; i<max; i++){ 
+
+    for(i=1; i<max; i++){
         if(get_cabac(c, state)==0){
             if(is_signed && get_cabac_bypass(c)){
                 return -i;
@@ -410,12 +410,12 @@ static inline int get_cabac_ueg(CABACContext *c, uint8_t * state, int max, int i
 
         if(i < max_index) state++;
     }
-    
+
     while(get_cabac_bypass(c)){
         i+= m;
         m+= m;
     }
-    
+
     v=0;
     while(m>>=1){
         v+= v + get_cabac_bypass(c);
diff --git a/src/libffmpeg/libavcodec/cinepak.c b/src/libffmpeg/libavcodec/cinepak.c
index d1e1f0ec1..797681231 100644
--- a/src/libffmpeg/libavcodec/cinepak.c
+++ b/src/libffmpeg/libavcodec/cinepak.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -101,7 +101,7 @@ static void cinepak_decode_codebook (cvid_codebook_t *codebook,
                 codebook[i].u  = 128 + *data++;
                 codebook[i].v  = 128 + *data++;
             } else {
-                /* this codebook type indicates either greyscale or 
+                /* this codebook type indicates either greyscale or
                  * palettized video; if palettized, U & V components will
                  * not be used so it is safe to set them to 128 for the
                  * benefit of greyscale rendering in YUV420P */
@@ -286,7 +286,7 @@ static int cinepak_decode_strip (CinepakContext *s,
         case 0x2100:
         case 0x2400:
         case 0x2500:
-            cinepak_decode_codebook (strip->v4_codebook, chunk_id, 
+            cinepak_decode_codebook (strip->v4_codebook, chunk_id,
                 chunk_size, data);
             break;
 
@@ -294,14 +294,14 @@ static int cinepak_decode_strip (CinepakContext *s,
         case 0x2300:
         case 0x2600:
         case 0x2700:
-            cinepak_decode_codebook (strip->v1_codebook, chunk_id, 
+            cinepak_decode_codebook (strip->v1_codebook, chunk_id,
                 chunk_size, data);
             break;
 
         case 0x3000:
         case 0x3100:
         case 0x3200:
-            return cinepak_decode_vectors (s, strip, chunk_id, 
+            return cinepak_decode_vectors (s, strip, chunk_id,
                 chunk_size, data);
         }
 
@@ -325,7 +325,7 @@ static int cinepak_decode (CinepakContext *s)
 
     frame_flags = s->data[0];
     num_strips  = BE_16 (&s->data[8]);
-    encoded_buf_size = BE_16 (&s->data[2]);
+    encoded_buf_size = ((s->data[1] << 16) | BE_16 (&s->data[2]));
     if (encoded_buf_size != s->size)
         sega_film_data = 1;
     if (sega_film_data)
diff --git a/src/libffmpeg/libavcodec/cljr.c b/src/libffmpeg/libavcodec/cljr.c
index 8072eee18..feb0d8bb2 100644
--- a/src/libffmpeg/libavcodec/cljr.c
+++ b/src/libffmpeg/libavcodec/cljr.c
@@ -14,15 +14,15 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
- 
+
 /**
  * @file cljr.c
  * Cirrus Logic AccuPak codec.
  */
- 
+
 #include "avcodec.h"
 #include "mpegvideo.h"
 
@@ -34,7 +34,7 @@ typedef struct CLJRContext{
     GetBitContext gb;
 } CLJRContext;
 
-static int decode_frame(AVCodecContext *avctx, 
+static int decode_frame(AVCodecContext *avctx,
                         void *data, int *data_size,
                         uint8_t *buf, int buf_size)
 {
@@ -61,13 +61,13 @@ static int decode_frame(AVCodecContext *avctx,
         uint8_t *cb= &a->picture.data[1][ y*a->picture.linesize[1] ];
         uint8_t *cr= &a->picture.data[2][ y*a->picture.linesize[2] ];
         for(x=0; x<avctx->width; x+=4){
-    	    luma[3] = get_bits(&a->gb, 5) << 3;
-	    luma[2] = get_bits(&a->gb, 5) << 3;
-	    luma[1] = get_bits(&a->gb, 5) << 3;
-	    luma[0] = get_bits(&a->gb, 5) << 3;
-	    luma+= 4;
-	    *(cb++) = get_bits(&a->gb, 6) << 2;
-	    *(cr++) = get_bits(&a->gb, 6) << 2;
+                luma[3] = get_bits(&a->gb, 5) << 3;
+            luma[2] = get_bits(&a->gb, 5) << 3;
+            luma[1] = get_bits(&a->gb, 5) << 3;
+            luma[0] = get_bits(&a->gb, 5) << 3;
+            luma+= 4;
+            *(cb++) = get_bits(&a->gb, 6) << 2;
+            *(cr++) = get_bits(&a->gb, 6) << 2;
         }
     }
 
@@ -75,7 +75,7 @@ static int decode_frame(AVCodecContext *avctx,
     *data_size = sizeof(AVPicture);
 
     emms_c();
-    
+
     return buf_size;
 }
 
@@ -92,13 +92,13 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
     p->key_frame= 1;
 
     emms_c();
-    
+
     align_put_bits(&a->pb);
     while(get_bit_count(&a->pb)&31)
         put_bits(&a->pb, 8, 0);
-    
+
     size= get_bit_count(&a->pb)/32;
-    
+
     return size*4;
 }
 #endif
@@ -113,7 +113,7 @@ static void common_init(AVCodecContext *avctx){
 static int decode_init(AVCodecContext *avctx){
 
     common_init(avctx);
-    
+
     avctx->pix_fmt= PIX_FMT_YUV411P;
 
     return 0;
@@ -123,7 +123,7 @@ static int decode_init(AVCodecContext *avctx){
 static int encode_init(AVCodecContext *avctx){
 
     common_init(avctx);
-    
+
     return 0;
 }
 #endif
diff --git a/src/libffmpeg/libavcodec/cyuv.c b/src/libffmpeg/libavcodec/cyuv.c
index 34de8cc04..b64e1a58b 100644
--- a/src/libffmpeg/libavcodec/cyuv.c
+++ b/src/libffmpeg/libavcodec/cyuv.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * Creative YUV (CYUV) Video Decoder
  *   by Mike Melanson (melanson@pcisys.net)
@@ -24,10 +24,10 @@
  */
 
 /**
- * @file cyuv.c 
+ * @file cyuv.c
  * Creative YUV (CYUV) Video Decoder.
  */
- 
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -61,7 +61,7 @@ static int cyuv_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
-static int cyuv_decode_frame(AVCodecContext *avctx, 
+static int cyuv_decode_frame(AVCodecContext *avctx,
                              void *data, int *data_size,
                              uint8_t *buf, int buf_size)
 {
@@ -114,7 +114,7 @@ static int cyuv_decode_frame(AVCodecContext *avctx,
 
     /* iterate through each line in the height */
     for (y_ptr = 0, u_ptr = 0, v_ptr = 0;
-         y_ptr < (s->height * s->frame.linesize[0]); 
+         y_ptr < (s->height * s->frame.linesize[0]);
          y_ptr += s->frame.linesize[0] - s->width,
          u_ptr += s->frame.linesize[1] - s->width / 4,
          v_ptr += s->frame.linesize[2] - s->width / 4) {
diff --git a/src/libffmpeg/libavcodec/dpcm.c b/src/libffmpeg/libavcodec/dpcm.c
index 78ab8cb34..c920cb403 100644
--- a/src/libffmpeg/libavcodec/dpcm.c
+++ b/src/libffmpeg/libavcodec/dpcm.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
@@ -91,7 +91,7 @@ static int sol_table_old[16] =
 static int sol_table_new[16] =
     { 0x0,  0x1,  0x2,  0x3,  0x6,  0xA,  0xF,  0x15,
       0x0, -0x1, -0x2, -0x3, -0x6, -0xA, -0xF, -0x15};
-    
+
 static int sol_table_16[128] = {
     0x000, 0x008, 0x010, 0x020, 0x030, 0x040, 0x050, 0x060, 0x070, 0x080,
     0x090, 0x0A0, 0x0B0, 0x0C0, 0x0D0, 0x0E0, 0x0F0, 0x100, 0x110, 0x120,
@@ -130,7 +130,7 @@ static int dpcm_decode_init(AVCodecContext *avctx)
         }
         break;
 
-        
+
     case CODEC_ID_SOL_DPCM:
         switch(avctx->codec_tag){
         case 1:
@@ -149,7 +149,7 @@ static int dpcm_decode_init(AVCodecContext *avctx)
             return -1;
         }
         break;
-     
+
     default:
         break;
     }
diff --git a/src/libffmpeg/libavcodec/dsputil.c b/src/libffmpeg/libavcodec/dsputil.c
index 5a23672a3..3931c3978 100644
--- a/src/libffmpeg/libavcodec/dsputil.c
+++ b/src/libffmpeg/libavcodec/dsputil.c
@@ -15,16 +15,16 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer <michaelni@gmx.at>
  */
- 
+
 /**
  * @file dsputil.c
  * DSP utils
  */
- 
+
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
@@ -62,76 +62,76 @@ const uint8_t ff_zigzag248_direct[64] = {
 };
 
 /* not permutated inverse zigzag_direct + 1 for MMX quantizer */
-uint16_t __align8 inv_zigzag_direct16[64] = {0, };
+DECLARE_ALIGNED_8(uint16_t, inv_zigzag_direct16[64]) = {0, };
 
 const uint8_t ff_alternate_horizontal_scan[64] = {
-    0,  1,   2,  3,  8,  9, 16, 17, 
+    0,  1,   2,  3,  8,  9, 16, 17,
     10, 11,  4,  5,  6,  7, 15, 14,
-    13, 12, 19, 18, 24, 25, 32, 33, 
+    13, 12, 19, 18, 24, 25, 32, 33,
     26, 27, 20, 21, 22, 23, 28, 29,
-    30, 31, 34, 35, 40, 41, 48, 49, 
+    30, 31, 34, 35, 40, 41, 48, 49,
     42, 43, 36, 37, 38, 39, 44, 45,
-    46, 47, 50, 51, 56, 57, 58, 59, 
+    46, 47, 50, 51, 56, 57, 58, 59,
     52, 53, 54, 55, 60, 61, 62, 63,
 };
 
 const uint8_t ff_alternate_vertical_scan[64] = {
-    0,  8,  16, 24,  1,  9,  2, 10, 
+    0,  8,  16, 24,  1,  9,  2, 10,
     17, 25, 32, 40, 48, 56, 57, 49,
-    41, 33, 26, 18,  3, 11,  4, 12, 
+    41, 33, 26, 18,  3, 11,  4, 12,
     19, 27, 34, 42, 50, 58, 35, 43,
-    51, 59, 20, 28,  5, 13,  6, 14, 
+    51, 59, 20, 28,  5, 13,  6, 14,
     21, 29, 36, 44, 52, 60, 37, 45,
-    53, 61, 22, 30,  7, 15, 23, 31, 
+    53, 61, 22, 30,  7, 15, 23, 31,
     38, 46, 54, 62, 39, 47, 55, 63,
 };
 
 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
 const uint32_t inverse[256]={
-         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757, 
- 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154, 
- 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709, 
- 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333, 
- 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367, 
- 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283, 
-  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315, 
-  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085, 
-  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498, 
-  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675, 
-  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441, 
-  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183, 
-  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712, 
-  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400, 
-  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163, 
-  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641, 
-  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573, 
-  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737, 
-  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493, 
-  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373, 
-  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368, 
-  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671, 
-  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767, 
-  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740, 
-  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751, 
-  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635, 
-  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593, 
-  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944, 
-  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933, 
-  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575, 
-  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532, 
+         0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757,
+ 536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154,
+ 268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709,
+ 178956971,  171798692,  165191050,  159072863,  153391690,  148102321,  143165577,  138547333,
+ 134217728,  130150525,  126322568,  122713352,  119304648,  116080198,  113025456,  110127367,
+ 107374183,  104755300,  102261127,   99882961,   97612894,   95443718,   93368855,   91382283,
+  89478486,   87652394,   85899346,   84215046,   82595525,   81037119,   79536432,   78090315,
+  76695845,   75350304,   74051161,   72796056,   71582789,   70409300,   69273667,   68174085,
+  67108864,   66076420,   65075263,   64103990,   63161284,   62245903,   61356676,   60492498,
+  59652324,   58835169,   58040099,   57266231,   56512728,   55778797,   55063684,   54366675,
+  53687092,   53024288,   52377650,   51746594,   51130564,   50529028,   49941481,   49367441,
+  48806447,   48258060,   47721859,   47197443,   46684428,   46182445,   45691142,   45210183,
+  44739243,   44278014,   43826197,   43383509,   42949673,   42524429,   42107523,   41698712,
+  41297763,   40904451,   40518560,   40139882,   39768216,   39403370,   39045158,   38693400,
+  38347923,   38008561,   37675152,   37347542,   37025581,   36709123,   36398028,   36092163,
+  35791395,   35495598,   35204650,   34918434,   34636834,   34359739,   34087043,   33818641,
+  33554432,   33294321,   33038210,   32786010,   32537632,   32292988,   32051995,   31814573,
+  31580642,   31350127,   31122952,   30899046,   30678338,   30460761,   30246249,   30034737,
+  29826162,   29620465,   29417585,   29217465,   29020050,   28825284,   28633116,   28443493,
+  28256364,   28071682,   27889399,   27709467,   27531842,   27356480,   27183338,   27012373,
+  26843546,   26676816,   26512144,   26349493,   26188825,   26030105,   25873297,   25718368,
+  25565282,   25414008,   25264514,   25116768,   24970741,   24826401,   24683721,   24542671,
+  24403224,   24265352,   24129030,   23994231,   23860930,   23729102,   23598722,   23469767,
+  23342214,   23216040,   23091223,   22967740,   22845571,   22724695,   22605092,   22486740,
+  22369622,   22253717,   22139007,   22025474,   21913099,   21801865,   21691755,   21582751,
+  21474837,   21367997,   21262215,   21157475,   21053762,   20951060,   20849356,   20748635,
+  20648882,   20550083,   20452226,   20355296,   20259280,   20164166,   20069941,   19976593,
+  19884108,   19792477,   19701685,   19611723,   19522579,   19434242,   19346700,   19259944,
+  19173962,   19088744,   19004281,   18920561,   18837576,   18755316,   18673771,   18592933,
+  18512791,   18433337,   18354562,   18276457,   18199014,   18122225,   18046082,   17970575,
+  17895698,   17821442,   17747799,   17674763,   17602325,   17530479,   17459217,   17388532,
   17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
 };
 
 /* Input permutation for the simple_idct_mmx */
 static const uint8_t simple_mmx_permutation[64]={
-	0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 
-	0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 
-	0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 
-	0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, 
-	0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, 
-	0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, 
-	0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 
-	0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
+        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
+        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
+        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
+        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
+        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
+        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
+        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
+        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
 };
 
 static int pix_sum_c(uint8_t * pix, int line_size)
@@ -140,18 +140,18 @@ static int pix_sum_c(uint8_t * pix, int line_size)
 
     s = 0;
     for (i = 0; i < 16; i++) {
-	for (j = 0; j < 16; j += 8) {
-	    s += pix[0];
-	    s += pix[1];
-	    s += pix[2];
-	    s += pix[3];
-	    s += pix[4];
-	    s += pix[5];
-	    s += pix[6];
-	    s += pix[7];
-	    pix += 8;
-	}
-	pix += line_size - 16;
+        for (j = 0; j < 16; j += 8) {
+            s += pix[0];
+            s += pix[1];
+            s += pix[2];
+            s += pix[3];
+            s += pix[4];
+            s += pix[5];
+            s += pix[6];
+            s += pix[7];
+            pix += 8;
+        }
+        pix += line_size - 16;
     }
     return s;
 }
@@ -163,33 +163,33 @@ static int pix_norm1_c(uint8_t * pix, int line_size)
 
     s = 0;
     for (i = 0; i < 16; i++) {
-	for (j = 0; j < 16; j += 8) {
+        for (j = 0; j < 16; j += 8) {
 #if 0
-	    s += sq[pix[0]];
-	    s += sq[pix[1]];
-	    s += sq[pix[2]];
-	    s += sq[pix[3]];
-	    s += sq[pix[4]];
-	    s += sq[pix[5]];
-	    s += sq[pix[6]];
-	    s += sq[pix[7]];
+            s += sq[pix[0]];
+            s += sq[pix[1]];
+            s += sq[pix[2]];
+            s += sq[pix[3]];
+            s += sq[pix[4]];
+            s += sq[pix[5]];
+            s += sq[pix[6]];
+            s += sq[pix[7]];
 #else
 #if LONG_MAX > 2147483647
-	    register uint64_t x=*(uint64_t*)pix;
-	    s += sq[x&0xff];
-	    s += sq[(x>>8)&0xff];
-	    s += sq[(x>>16)&0xff];
-	    s += sq[(x>>24)&0xff];
+            register uint64_t x=*(uint64_t*)pix;
+            s += sq[x&0xff];
+            s += sq[(x>>8)&0xff];
+            s += sq[(x>>16)&0xff];
+            s += sq[(x>>24)&0xff];
             s += sq[(x>>32)&0xff];
             s += sq[(x>>40)&0xff];
             s += sq[(x>>48)&0xff];
             s += sq[(x>>56)&0xff];
 #else
-	    register uint32_t x=*(uint32_t*)pix;
-	    s += sq[x&0xff];
-	    s += sq[(x>>8)&0xff];
-	    s += sq[(x>>16)&0xff];
-	    s += sq[(x>>24)&0xff];
+            register uint32_t x=*(uint32_t*)pix;
+            s += sq[x&0xff];
+            s += sq[(x>>8)&0xff];
+            s += sq[(x>>16)&0xff];
+            s += sq[(x>>24)&0xff];
             x=*(uint32_t*)(pix+4);
             s += sq[x&0xff];
             s += sq[(x>>8)&0xff];
@@ -197,16 +197,16 @@ static int pix_norm1_c(uint8_t * pix, int line_size)
             s += sq[(x>>24)&0xff];
 #endif
 #endif
-	    pix += 8;
-	}
-	pix += line_size - 16;
+            pix += 8;
+        }
+        pix += line_size - 16;
     }
     return s;
 }
 
 static void bswap_buf(uint32_t *dst, uint32_t *src, int w){
     int i;
-    
+
     for(i=0; i+8<=w; i+=8){
         dst[i+0]= bswap_32(src[i+0]);
         dst[i+1]= bswap_32(src[i+1]);
@@ -298,7 +298,7 @@ static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
     int tmp[16*16];
 #if 0
     int level, ori;
-    static const int scale[2][2][4][4]={ 
+    static const int scale[2][2][4][4]={
       {
         {
             //8x8 dec=3
@@ -350,7 +350,7 @@ static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
             int stride= 16<<(dec_count-level);
             int sy= (ori&2) ? stride>>1 : 0;
             int size= 1<<level;
-            
+
             for(i=0; i<size; i++){
                 for(j=0; j<size; j++){
                     int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori];
@@ -368,8 +368,8 @@ static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
             s+= ABS(tmp[16*i+j+3]);
         }
     }
-    assert(s>=0); 
-    
+    assert(s>=0);
+
     return s>>2;
 #else
     return 0;
@@ -412,7 +412,7 @@ static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int lin
 }
 
 static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
-			  const uint8_t *s2, int stride){
+                          const uint8_t *s2, int stride){
     int i;
 
     /* read the pixels */
@@ -433,11 +433,11 @@ static void diff_pixels_c(DCTELEM *restrict block, const uint8_t *s1,
 
 
 static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
-				 int line_size)
+                                 int line_size)
 {
     int i;
     uint8_t *cm = cropTbl + MAX_NEG_CROP;
-    
+
     /* read the pixels */
     for(i=0;i<8;i++) {
         pixels[0] = cm[block[0]];
@@ -455,11 +455,11 @@ static void put_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
 }
 
 static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels,
-				 int line_size)
+                                 int line_size)
 {
     int i;
     uint8_t *cm = cropTbl + MAX_NEG_CROP;
-    
+
     /* read the pixels */
     for(i=0;i<4;i++) {
         pixels[0] = cm[block[0]];
@@ -473,11 +473,11 @@ static void put_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels
 }
 
 static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels,
-				 int line_size)
+                                 int line_size)
 {
     int i;
     uint8_t *cm = cropTbl + MAX_NEG_CROP;
-    
+
     /* read the pixels */
     for(i=0;i<2;i++) {
         pixels[0] = cm[block[0]];
@@ -488,7 +488,7 @@ static void put_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels
     }
 }
 
-static void put_signed_pixels_clamped_c(const DCTELEM *block, 
+static void put_signed_pixels_clamped_c(const DCTELEM *block,
                                         uint8_t *restrict pixels,
                                         int line_size)
 {
@@ -514,7 +514,7 @@ static void add_pixels_clamped_c(const DCTELEM *block, uint8_t *restrict pixels,
 {
     int i;
     uint8_t *cm = cropTbl + MAX_NEG_CROP;
-    
+
     /* read the pixels */
     for(i=0;i<8;i++) {
         pixels[0] = cm[pixels[0] + block[0]];
@@ -535,7 +535,7 @@ static void add_pixels_clamped4_c(const DCTELEM *block, uint8_t *restrict pixels
 {
     int i;
     uint8_t *cm = cropTbl + MAX_NEG_CROP;
-    
+
     /* read the pixels */
     for(i=0;i<4;i++) {
         pixels[0] = cm[pixels[0] + block[0]];
@@ -552,7 +552,7 @@ static void add_pixels_clamped2_c(const DCTELEM *block, uint8_t *restrict pixels
 {
     int i;
     uint8_t *cm = cropTbl + MAX_NEG_CROP;
-    
+
     /* read the pixels */
     for(i=0;i<2;i++) {
         pixels[0] = cm[pixels[0] + block[0]];
@@ -1145,12 +1145,12 @@ static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y
     }
 }
 
-static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy, 
+static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
                   int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
 {
     int y, vx, vy;
     const int s= 1<<shift;
-    
+
     width--;
     height--;
 
@@ -1168,7 +1168,7 @@ static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
             frac_y= src_y&(s-1);
             src_x>>=shift;
             src_y>>=shift;
-  
+
             if((unsigned)src_x < width){
                 if((unsigned)src_y < height){
                     index= src_x + src_y*stride;
@@ -1178,23 +1178,23 @@ static void gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
                                            + src[index+stride+1]*   frac_x )*   frac_y
                                         + r)>>(shift*2);
                 }else{
-                    index= src_x + clip(src_y, 0, height)*stride;                    
-                    dst[y*stride + x]= ( (  src[index         ]*(s-frac_x) 
+                    index= src_x + clip(src_y, 0, height)*stride;
+                    dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)
                                           + src[index       +1]*   frac_x )*s
                                         + r)>>(shift*2);
                 }
             }else{
                 if((unsigned)src_y < height){
-                    index= clip(src_x, 0, width) + src_y*stride;                    
-                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_y) 
+                    index= clip(src_x, 0, width) + src_y*stride;
+                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)
                                            + src[index+stride  ]*   frac_y )*s
                                         + r)>>(shift*2);
                 }else{
-                    index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;                    
+                    index= clip(src_x, 0, width) + clip(src_y, 0, height)*stride;
                     dst[y*stride + x]=    src[index         ];
                 }
             }
-            
+
             vx+= dxx;
             vy+= dyx;
         }
@@ -1216,7 +1216,7 @@ static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int
     int i,j;
     for (i=0; i < height; i++) {
       for (j=0; j < width; j++) {
-	dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
+        dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
       }
       src += stride;
       dst += stride;
@@ -1227,29 +1227,29 @@ static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int
     int i,j;
     for (i=0; i < height; i++) {
       for (j=0; j < width; j++) {
-	dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
+        dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
       }
       src += stride;
       dst += stride;
     }
 }
-    
+
 static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
     int i,j;
     for (i=0; i < height; i++) {
       for (j=0; j < width; j++) {
-	dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
+        dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
       }
       src += stride;
       dst += stride;
     }
 }
-    
+
 static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
     int i,j;
     for (i=0; i < height; i++) {
       for (j=0; j < width; j++) {
-	dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
+        dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
       }
       src += stride;
       dst += stride;
@@ -1260,7 +1260,7 @@ static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int
     int i,j;
     for (i=0; i < height; i++) {
       for (j=0; j < width; j++) {
-	dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
+        dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
       }
       src += stride;
       dst += stride;
@@ -1271,7 +1271,7 @@ static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int
     int i,j;
     for (i=0; i < height; i++) {
       for (j=0; j < width; j++) {
-	dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
+        dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
       }
       src += stride;
       dst += stride;
@@ -1282,7 +1282,7 @@ static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int
     int i,j;
     for (i=0; i < height; i++) {
       for (j=0; j < width; j++) {
-	dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
+        dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
       }
       src += stride;
       dst += stride;
@@ -1293,7 +1293,7 @@ static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int
     int i,j;
     for (i=0; i < height; i++) {
       for (j=0; j < width; j++) {
-	dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
+        dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
       }
       src += stride;
       dst += stride;
@@ -1313,7 +1313,7 @@ static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int
     int i,j;
     for (i=0; i < height; i++) {
       for (j=0; j < width; j++) {
-	dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
+        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
       }
       src += stride;
       dst += stride;
@@ -1324,29 +1324,29 @@ static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int
     int i,j;
     for (i=0; i < height; i++) {
       for (j=0; j < width; j++) {
-	dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
+        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
       }
       src += stride;
       dst += stride;
     }
 }
-    
+
 static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
     int i,j;
     for (i=0; i < height; i++) {
       for (j=0; j < width; j++) {
-	dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
+        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
       }
       src += stride;
       dst += stride;
     }
 }
-    
+
 static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
     int i,j;
     for (i=0; i < height; i++) {
       for (j=0; j < width; j++) {
-	dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+        dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
       }
       src += stride;
       dst += stride;
@@ -1357,7 +1357,7 @@ static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int
     int i,j;
     for (i=0; i < height; i++) {
       for (j=0; j < width; j++) {
-	dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+        dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
       }
       src += stride;
       dst += stride;
@@ -1368,7 +1368,7 @@ static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int
     int i,j;
     for (i=0; i < height; i++) {
       for (j=0; j < width; j++) {
-	dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
+        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
       }
       src += stride;
       dst += stride;
@@ -1379,7 +1379,7 @@ static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int
     int i,j;
     for (i=0; i < height; i++) {
       for (j=0; j < width; j++) {
-	dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+        dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
       }
       src += stride;
       dst += stride;
@@ -1390,7 +1390,7 @@ static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int
     int i,j;
     for (i=0; i < height; i++) {
       for (j=0; j < width; j++) {
-	dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
+        dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
       }
       src += stride;
       dst += stride;
@@ -1489,6 +1489,17 @@ H264_CHROMA_MC(avg_       , op_avg)
 #undef op_avg
 #undef op_put
 
+static inline void copy_block2(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
+{
+    int i;
+    for(i=0; i<h; i++)
+    {
+        ST16(dst   , LD16(src   ));
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+
 static inline void copy_block4(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h)
 {
     int i;
@@ -2054,6 +2065,68 @@ QPEL_MC(0, avg_       , _       , op_avg)
 
 #if 1
 #define H264_LOWPASS(OPNAME, OP, OP2) \
+static void OPNAME ## h264_qpel2_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int h=2;\
+    uint8_t *cm = cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<h; i++)\
+    {\
+        OP(dst[0], (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]));\
+        OP(dst[1], (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]));\
+        dst+=dstStride;\
+        src+=srcStride;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel2_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
+    const int w=2;\
+    uint8_t *cm = cropTbl + MAX_NEG_CROP;\
+    int i;\
+    for(i=0; i<w; i++)\
+    {\
+        const int srcB= src[-2*srcStride];\
+        const int srcA= src[-1*srcStride];\
+        const int src0= src[0 *srcStride];\
+        const int src1= src[1 *srcStride];\
+        const int src2= src[2 *srcStride];\
+        const int src3= src[3 *srcStride];\
+        const int src4= src[4 *srcStride];\
+        OP(dst[0*dstStride], (src0+src1)*20 - (srcA+src2)*5 + (srcB+src3));\
+        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*5 + (srcA+src4));\
+        dst++;\
+        src++;\
+    }\
+}\
+\
+static void OPNAME ## h264_qpel2_hv_lowpass(uint8_t *dst, int16_t *tmp, uint8_t *src, int dstStride, int tmpStride, int srcStride){\
+    const int h=2;\
+    const int w=2;\
+    uint8_t *cm = cropTbl + MAX_NEG_CROP;\
+    int i;\
+    src -= 2*srcStride;\
+    for(i=0; i<h+5; i++)\
+    {\
+        tmp[0]= (src[0]+src[1])*20 - (src[-1]+src[2])*5 + (src[-2]+src[3]);\
+        tmp[1]= (src[1]+src[2])*20 - (src[0 ]+src[3])*5 + (src[-1]+src[4]);\
+        tmp+=tmpStride;\
+        src+=srcStride;\
+    }\
+    tmp -= tmpStride*(h+5-2);\
+    for(i=0; i<w; i++)\
+    {\
+        const int tmpB= tmp[-2*tmpStride];\
+        const int tmpA= tmp[-1*tmpStride];\
+        const int tmp0= tmp[0 *tmpStride];\
+        const int tmp1= tmp[1 *tmpStride];\
+        const int tmp2= tmp[2 *tmpStride];\
+        const int tmp3= tmp[3 *tmpStride];\
+        const int tmp4= tmp[4 *tmpStride];\
+        OP2(dst[0*dstStride], (tmp0+tmp1)*20 - (tmpA+tmp2)*5 + (tmpB+tmp3));\
+        OP2(dst[1*dstStride], (tmp1+tmp2)*20 - (tmp0+tmp3)*5 + (tmpA+tmp4));\
+        dst++;\
+        tmp++;\
+    }\
+}\
 static void OPNAME ## h264_qpel4_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
     const int h=4;\
     uint8_t *cm = cropTbl + MAX_NEG_CROP;\
@@ -2400,6 +2473,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_c(uint8_t *dst, uint8_t *src, i
 
 H264_LOWPASS(put_       , op_put, op2_put)
 H264_LOWPASS(avg_       , op_avg, op2_avg)
+H264_MC(put_, 2)
 H264_MC(put_, 4)
 H264_MC(put_, 8)
 H264_MC(put_, 16)
@@ -2417,7 +2491,7 @@ H264_MC(avg_, 16)
 #define op_scale2(x)  dst[x] = clip_uint8( (src[x]*weights + dst[x]*weightd + offset) >> (log2_denom+1))
 #define H264_WEIGHT(W,H) \
 static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride, int log2_denom, int weight, int offset){ \
-    int attribute_unused x, y; \
+    int y; \
     offset <<= log2_denom; \
     if(log2_denom) offset += 1<<(log2_denom-1); \
     for(y=0; y<H; y++, block += stride){ \
@@ -2442,10 +2516,9 @@ static void weight_h264_pixels ## W ## x ## H ## _c(uint8_t *block, int stride,
         op_scale1(15); \
     } \
 } \
-static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offsetd, int offsets){ \
-    int attribute_unused x, y; \
-    int offset = (offsets + offsetd + 1) >> 1; \
-    offset = ((offset << 1) + 1) << log2_denom; \
+static void biweight_h264_pixels ## W ## x ## H ## _c(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
+    int y; \
+    offset = ((offset + 1) | 1) << log2_denom; \
     for(y=0; y<H; y++, dst += stride, src += stride){ \
         op_scale2(0); \
         op_scale2(1); \
@@ -2498,7 +2571,7 @@ static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int
         dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
         dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
         dst+=dstStride;
-        src+=srcStride;        
+        src+=srcStride;
     }
 }
 
@@ -2582,7 +2655,7 @@ static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, int stride){
 static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
     int x;
     const int strength= ff_h263_loop_filter_strength[qscale];
-    
+
     for(x=0; x<8; x++){
         int d1, d2, ad1;
         int p0= src[x-2*stride];
@@ -2596,19 +2669,19 @@ static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
         else if(d<   strength) d1= d;
         else if(d< 2*strength) d1= 2*strength - d;
         else                   d1= 0;
-        
+
         p1 += d1;
         p2 -= d1;
         if(p1&256) p1= ~(p1>>31);
         if(p2&256) p2= ~(p2>>31);
-        
+
         src[x-1*stride] = p1;
         src[x+0*stride] = p2;
 
         ad1= ABS(d1)>>1;
-        
+
         d2= clip((p0-p3)/4, -ad1, ad1);
-        
+
         src[x-2*stride] = p0 - d2;
         src[x+  stride] = p3 + d2;
     }
@@ -2617,7 +2690,7 @@ static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
 static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
     int y;
     const int strength= ff_h263_loop_filter_strength[qscale];
-    
+
     for(y=0; y<8; y++){
         int d1, d2, ad1;
         int p0= src[y*stride-2];
@@ -2631,19 +2704,19 @@ static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
         else if(d<   strength) d1= d;
         else if(d< 2*strength) d1= 2*strength - d;
         else                   d1= 0;
-        
+
         p1 += d1;
         p2 -= d1;
         if(p1&256) p1= ~(p1>>31);
         if(p2&256) p2= ~(p2>>31);
-        
+
         src[y*stride-1] = p1;
         src[y*stride+0] = p2;
 
         ad1= ABS(d1)>>1;
-        
+
         d2= clip((p0-p3)/4, -ad1, ad1);
-        
+
         src[y*stride-2] = p0 - d2;
         src[y*stride+1] = p3 + d2;
     }
@@ -2664,7 +2737,7 @@ static void h261_loop_filter_c(uint8_t *src, int stride){
             temp[yz] = src[xy - stride] + 2*src[xy] + src[xy + stride];
         }
     }
-        
+
     for(y=0; y<8; y++){
         src[  y*stride] = (temp[  y*8] + 2)>>2;
         src[7+y*stride] = (temp[7+y*8] + 2)>>2;
@@ -2691,14 +2764,14 @@ static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystrid
             const int q0 = pix[0];
             const int q1 = pix[1*xstride];
             const int q2 = pix[2*xstride];
-    
+
             if( ABS( p0 - q0 ) < alpha &&
                 ABS( p1 - p0 ) < beta &&
                 ABS( q1 - q0 ) < beta ) {
-    
+
                 int tc = tc0[i];
                 int i_delta;
-    
+
                 if( ABS( p2 - p0 ) < beta ) {
                     pix[-2*xstride] = p1 + clip( (( p2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - p1, -tc0[i], tc0[i] );
                     tc++;
@@ -2707,7 +2780,7 @@ static inline void h264_loop_filter_luma_c(uint8_t *pix, int xstride, int ystrid
                     pix[   xstride] = q1 + clip( (( q2 + ( ( p0 + q0 + 1 ) >> 1 ) ) >> 1) - q1, -tc0[i], tc0[i] );
                     tc++;
                 }
-    
+
                 i_delta = clip( (((q0 - p0 ) << 2) + (p1 - q1) + 4) >> 3, -tc, tc );
                 pix[-xstride] = clip_uint8( p0 + i_delta );    /* p0' */
                 pix[0]        = clip_uint8( q0 - i_delta );    /* q0' */
@@ -3021,7 +3094,7 @@ static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
     int score1=0;
     int score2=0;
     int x,y;
-    
+
     for(y=0; y<h; y++){
         for(x=0; x<8; x++){
             score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
@@ -3037,7 +3110,7 @@ static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
         s1+= stride;
         s2+= stride;
     }
-    
+
     if(c) return score1 + ABS(score2)*c->avctx->nsse_weight;
     else  return score1 + ABS(score2)*8;
 }
@@ -3062,7 +3135,7 @@ static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
 
     for(i=0; i<8*8; i++){
         rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
-    }    
+    }
 }
 
 /**
@@ -3070,14 +3143,14 @@ static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
  * @param block the block which will be permuted according to the given permutation vector
  * @param permutation the permutation vector
  * @param last the last non zero coefficient in scantable order, used to speed the permutation up
- * @param scantable the used scantable, this is only used to speed the permutation up, the block is not 
+ * @param scantable the used scantable, this is only used to speed the permutation up, the block is not
  *                  (inverse) permutated to scantable order!
  */
 void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scantable, int last)
 {
     int i;
     DCTELEM temp[64];
-    
+
     if(last<=0) return;
     //if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms
 
@@ -3086,7 +3159,7 @@ void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scant
         temp[j]= block[j];
         block[j]=0;
     }
-    
+
     for(i=0; i<=last; i++){
         const int j= scantable[i];
         const int perm_j= permutation[j];
@@ -3100,9 +3173,9 @@ static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
 
 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
     int i;
-    
+
     memset(cmp, 0, sizeof(void*)*5);
-        
+
     for(i=0; i<5; i++){
         switch(type&0xFF){
         case FF_CMP_SAD:
@@ -3117,6 +3190,9 @@ void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
         case FF_CMP_DCT:
             cmp[i]= c->dct_sad[i];
             break;
+        case FF_CMP_DCT264:
+            cmp[i]= c->dct264_sad[i];
+            break;
         case FF_CMP_DCTMAX:
             cmp[i]= c->dct_max[i];
             break;
@@ -3205,7 +3281,7 @@ static void sub_hfyu_median_prediction_c(uint8_t *dst, uint8_t *src1, uint8_t *s
         lt= src1[i];
         l= src2[i];
         dst[i]= l - pred;
-    }    
+    }
 
     *left= l;
     *left_top= lt;
@@ -3230,7 +3306,7 @@ static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t
     int i;
     int temp[64];
     int sum=0;
-    
+
     assert(h==8);
 
     for(i=0; i<8; i++){
@@ -3239,12 +3315,12 @@ static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t
         BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
         BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
         BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
-        
+
         BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
         BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
         BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
         BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
-        
+
         BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
         BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
         BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
@@ -3256,13 +3332,13 @@ static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t
         BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
         BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
         BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
-        
+
         BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
         BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
         BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
         BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
 
-        sum += 
+        sum +=
              BUTTERFLYA(temp[8*0+i], temp[8*4+i])
             +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
             +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
@@ -3282,21 +3358,21 @@ static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_
     int i;
     int temp[64];
     int sum=0;
-    
+
     assert(h==8);
-    
+
     for(i=0; i<8; i++){
         //FIXME try pointer walks
         BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
         BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
         BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
         BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
-        
+
         BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
         BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
         BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
         BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
-        
+
         BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
         BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
         BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
@@ -3308,30 +3384,30 @@ static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_
         BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
         BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
         BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
-        
+
         BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
         BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
         BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
         BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
-    
-        sum += 
+
+        sum +=
              BUTTERFLYA(temp[8*0+i], temp[8*4+i])
             +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
             +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
             +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
     }
-    
+
     sum -= ABS(temp[8*0] + temp[8*4]); // -mean
-    
+
     return sum;
 }
 
 static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
     MpegEncContext * const s= (MpegEncContext *)c;
-    uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
+    DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
     DCTELEM * const temp= (DCTELEM*)aligned_temp;
     int sum=0, i;
-    
+
     assert(h==8);
 
     s->dsp.diff_pixels(temp, src1, src2, stride);
@@ -3339,16 +3415,69 @@ static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2
 
     for(i=0; i<64; i++)
         sum+= ABS(temp[i]);
-        
+
     return sum;
 }
 
+#ifdef CONFIG_GPL
+#define DCT8_1D {\
+    const int s07 = SRC(0) + SRC(7);\
+    const int s16 = SRC(1) + SRC(6);\
+    const int s25 = SRC(2) + SRC(5);\
+    const int s34 = SRC(3) + SRC(4);\
+    const int a0 = s07 + s34;\
+    const int a1 = s16 + s25;\
+    const int a2 = s07 - s34;\
+    const int a3 = s16 - s25;\
+    const int d07 = SRC(0) - SRC(7);\
+    const int d16 = SRC(1) - SRC(6);\
+    const int d25 = SRC(2) - SRC(5);\
+    const int d34 = SRC(3) - SRC(4);\
+    const int a4 = d16 + d25 + (d07 + (d07>>1));\
+    const int a5 = d07 - d34 - (d25 + (d25>>1));\
+    const int a6 = d07 + d34 - (d16 + (d16>>1));\
+    const int a7 = d16 - d25 + (d34 + (d34>>1));\
+    DST(0,  a0 + a1     ) ;\
+    DST(1,  a4 + (a7>>2)) ;\
+    DST(2,  a2 + (a3>>1)) ;\
+    DST(3,  a5 + (a6>>2)) ;\
+    DST(4,  a0 - a1     ) ;\
+    DST(5,  a6 - (a5>>2)) ;\
+    DST(6, (a2>>1) - a3 ) ;\
+    DST(7, (a4>>2) - a7 ) ;\
+}
+
+static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
+    MpegEncContext * const s= (MpegEncContext *)c;
+    int16_t dct[8][8];
+    int i;
+    int sum=0;
+
+    s->dsp.diff_pixels(dct, src1, src2, stride);
+
+#define SRC(x) dct[i][x]
+#define DST(x,v) dct[i][x]= v
+    for( i = 0; i < 8; i++ )
+        DCT8_1D
+#undef SRC
+#undef DST
+
+#define SRC(x) dct[x][i]
+#define DST(x,v) sum += ABS(v)
+    for( i = 0; i < 8; i++ )
+        DCT8_1D
+#undef SRC
+#undef DST
+    return sum;
+}
+#endif
+
 static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
     MpegEncContext * const s= (MpegEncContext *)c;
-    uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
+    DECLARE_ALIGNED_8(uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
     DCTELEM * const temp= (DCTELEM*)aligned_temp;
     int sum=0, i;
-    
+
     assert(h==8);
 
     s->dsp.diff_pixels(temp, src1, src2, stride);
@@ -3356,7 +3485,7 @@ static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2
 
     for(i=0; i<64; i++)
         sum= FFMAX(sum, ABS(temp[i]));
-        
+
     return sum;
 }
 
@@ -3364,40 +3493,40 @@ void simple_idct(DCTELEM *block); //FIXME
 
 static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
     MpegEncContext * const s= (MpegEncContext *)c;
-    uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64*2/8];
+    DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64*2/8]);
     DCTELEM * const temp= (DCTELEM*)aligned_temp;
     DCTELEM * const bak = ((DCTELEM*)aligned_temp)+64;
     int sum=0, i;
 
     assert(h==8);
     s->mb_intra=0;
-    
+
     s->dsp.diff_pixels(temp, src1, src2, stride);
-    
+
     memcpy(bak, temp, 64*sizeof(DCTELEM));
-    
+
     s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
     s->dct_unquantize_inter(s, temp, 0, s->qscale);
-    simple_idct(temp); //FIXME 
-    
+    simple_idct(temp); //FIXME
+
     for(i=0; i<64; i++)
         sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
-        
+
     return sum;
 }
 
 static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
     MpegEncContext * const s= (MpegEncContext *)c;
     const uint8_t *scantable= s->intra_scantable.permutated;
-    uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
-    uint64_t __align8 aligned_bak[stride];
+    DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
+    DECLARE_ALIGNED_8 (uint64_t, aligned_bak[stride]);
     DCTELEM * const temp= (DCTELEM*)aligned_temp;
     uint8_t * const bak= (uint8_t*)aligned_bak;
     int i, last, run, bits, level, distoration, start_i;
     const int esc_length= s->ac_esc_length;
     uint8_t * length;
     uint8_t * last_length;
-    
+
     assert(h==8);
 
     for(i=0; i<8; i++){
@@ -3410,9 +3539,9 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
     s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
 
     bits=0;
-    
+
     if (s->mb_intra) {
-        start_i = 1; 
+        start_i = 1;
         length     = s->intra_ac_vlc_length;
         last_length= s->intra_ac_vlc_last_length;
         bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
@@ -3421,13 +3550,13 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
         length     = s->inter_ac_vlc_length;
         last_length= s->inter_ac_vlc_last_length;
     }
-    
+
     if(last>=start_i){
         run=0;
         for(i=start_i; i<last; i++){
             int j= scantable[i];
             level= temp[j];
-        
+
             if(level){
                 level+=64;
                 if((level&(~127)) == 0){
@@ -3439,16 +3568,16 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
                 run++;
         }
         i= scantable[last];
-       
+
         level= temp[i] + 64;
 
         assert(level - 64);
-        
+
         if((level&(~127)) == 0){
             bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
         }else
             bits+= esc_length;
-    
+
     }
 
     if(last>=0){
@@ -3457,9 +3586,9 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
         else
             s->dct_unquantize_inter(s, temp, 0, s->qscale);
     }
-    
+
     s->dsp.idct_add(bak, stride, temp);
-    
+
     distoration= s->dsp.sse[1](NULL, bak, src1, stride, 8);
 
     return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7);
@@ -3468,7 +3597,7 @@ static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int
 static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
     MpegEncContext * const s= (MpegEncContext *)c;
     const uint8_t *scantable= s->intra_scantable.permutated;
-    uint64_t __align8 aligned_temp[sizeof(DCTELEM)*64/8];
+    DECLARE_ALIGNED_8 (uint64_t, aligned_temp[sizeof(DCTELEM)*64/8]);
     DCTELEM * const temp= (DCTELEM*)aligned_temp;
     int i, last, run, bits, level, start_i;
     const int esc_length= s->ac_esc_length;
@@ -3476,15 +3605,15 @@ static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in
     uint8_t * last_length;
 
     assert(h==8);
-    
+
     s->dsp.diff_pixels(temp, src1, src2, stride);
 
     s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
 
     bits=0;
-    
+
     if (s->mb_intra) {
-        start_i = 1; 
+        start_i = 1;
         length     = s->intra_ac_vlc_length;
         last_length= s->intra_ac_vlc_last_length;
         bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
@@ -3493,13 +3622,13 @@ static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in
         length     = s->inter_ac_vlc_length;
         last_length= s->inter_ac_vlc_last_length;
     }
-    
+
     if(last>=start_i){
         run=0;
         for(i=start_i; i<last; i++){
             int j= scantable[i];
             level= temp[j];
-        
+
             if(level){
                 level+=64;
                 if((level&(~127)) == 0){
@@ -3511,11 +3640,11 @@ static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in
                 run++;
         }
         i= scantable[last];
-                
+
         level= temp[i] + 64;
-        
+
         assert(level - 64);
-        
+
         if((level&(~127)) == 0){
             bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
         }else
@@ -3528,22 +3657,22 @@ static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, in
 static int vsad_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
     int score=0;
     int x,y;
-    
+
     for(y=1; y<h; y++){
         for(x=0; x<16; x+=4){
-            score+= ABS(s[x  ] - s[x  +stride]) + ABS(s[x+1] - s[x+1+stride]) 
+            score+= ABS(s[x  ] - s[x  +stride]) + ABS(s[x+1] - s[x+1+stride])
                    +ABS(s[x+2] - s[x+2+stride]) + ABS(s[x+3] - s[x+3+stride]);
         }
         s+= stride;
     }
-    
+
     return score;
 }
 
 static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
     int score=0;
     int x,y;
-    
+
     for(y=1; y<h; y++){
         for(x=0; x<16; x++){
             score+= ABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
@@ -3551,7 +3680,7 @@ static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int st
         s1+= stride;
         s2+= stride;
     }
-    
+
     return score;
 }
 
@@ -3559,22 +3688,22 @@ static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int st
 static int vsse_intra16_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){
     int score=0;
     int x,y;
-    
+
     for(y=1; y<h; y++){
         for(x=0; x<16; x+=4){
-            score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride]) 
+            score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])
                    +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
         }
         s+= stride;
     }
-    
+
     return score;
 }
 
 static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
     int score=0;
     int x,y;
-    
+
     for(y=1; y<h; y++){
         for(x=0; x<16; x++){
             score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
@@ -3582,13 +3711,16 @@ static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int st
         s1+= stride;
         s2+= stride;
     }
-    
+
     return score;
 }
 
 WARPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
 WARPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
 WARPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
+#ifdef CONFIG_GPL
+WARPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
+#endif
 WARPER8_16_SQ(dct_max8x8_c, dct_max16_c)
 WARPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
 WARPER8_16_SQ(rd8x8_c, rd16_c)
@@ -3652,11 +3784,11 @@ void dsputil_static_init(void)
         cropTbl[i] = 0;
         cropTbl[i + MAX_NEG_CROP + 256] = 255;
     }
-    
+
     for(i=0;i<512;i++) {
         squareTbl[i] = (i - 256) * (i - 256);
     }
-    
+
     for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
 }
 
@@ -3668,15 +3800,15 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
 #ifdef CONFIG_ENCODERS
     if(avctx->dct_algo==FF_DCT_FASTINT) {
         c->fdct = fdct_ifast;
-	c->fdct248 = fdct_ifast248;
-    } 
+        c->fdct248 = fdct_ifast248;
+    }
     else if(avctx->dct_algo==FF_DCT_FAAN) {
         c->fdct = ff_faandct;
-	c->fdct248 = ff_faandct248; 
-    } 
+        c->fdct248 = ff_faandct248;
+    }
     else {
         c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
-	c->fdct248 = ff_fdct248_islow;
+        c->fdct248 = ff_fdct248_islow;
     }
 #endif //CONFIG_ENCODERS
 
@@ -3822,6 +3954,7 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     dspfunc(put_h264_qpel, 0, 16);
     dspfunc(put_h264_qpel, 1, 8);
     dspfunc(put_h264_qpel, 2, 4);
+    dspfunc(put_h264_qpel, 3, 2);
     dspfunc(avg_h264_qpel, 0, 16);
     dspfunc(avg_h264_qpel, 1, 8);
     dspfunc(avg_h264_qpel, 2, 4);
@@ -3863,15 +3996,18 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
     c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
     c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
-        
+
 #define SET_CMP_FUNC(name) \
     c->name[0]= name ## 16_c;\
     c->name[1]= name ## 8x8_c;
-    
+
     SET_CMP_FUNC(hadamard8_diff)
     c->hadamard8_diff[4]= hadamard8_intra16_c;
     SET_CMP_FUNC(dct_sad)
     SET_CMP_FUNC(dct_max)
+#ifdef CONFIG_GPL
+    SET_CMP_FUNC(dct264_sad)
+#endif
     c->sad[0]= pix_abs16_c;
     c->sad[1]= pix_abs8_c;
     c->sse[0]= sse16_c;
@@ -3902,12 +4038,12 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
     c->h264_h_loop_filter_chroma= h264_h_loop_filter_chroma_c;
     c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_c;
     c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_c;
-    
+
     c->h263_h_loop_filter= h263_h_loop_filter_c;
     c->h263_v_loop_filter= h263_v_loop_filter_c;
-    
+
     c->h261_loop_filter= h261_loop_filter_c;
-    
+
     c->try_8x8basis= try_8x8basis_c;
     c->add_8x8basis= add_8x8basis_c;
 
diff --git a/src/libffmpeg/libavcodec/dsputil.h b/src/libffmpeg/libavcodec/dsputil.h
index a9b472f86..dc3bc01e8 100644
--- a/src/libffmpeg/libavcodec/dsputil.h
+++ b/src/libffmpeg/libavcodec/dsputil.h
@@ -15,7 +15,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
@@ -31,7 +31,7 @@
 #include "common.h"
 #include "avcodec.h"
 
-#if defined(ARCH_X86)
+#if defined(ARCH_X86) || defined(ARCH_X86_64)
 #define HAVE_MMX 1
 #endif
 
@@ -102,7 +102,7 @@ typedef void (*tpel_mc_func)(uint8_t *block/*align width (8 or 16)*/, const uint
 typedef void (*qpel_mc_func)(uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);
 typedef void (*h264_chroma_mc_func)(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int srcStride, int h, int x, int y);
 typedef void (*h264_weight_func)(uint8_t *block, int stride, int log2_denom, int weight, int offset);
-typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offsetd, int offsets);
+typedef void (*h264_biweight_func)(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset);
 
 #define DEF_OLD_QPEL(name)\
 void ff_put_        ## name (uint8_t *dst/*align width (8 or 16)*/, uint8_t *src/*align 1*/, int stride);\
@@ -154,12 +154,12 @@ typedef struct DSPContext {
      * global motion compensation.
      */
     void (*gmc )(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int ox, int oy,
-		    int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
+                    int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
     void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
     int (*pix_sum)(uint8_t * pix, int line_size);
     int (*pix_norm1)(uint8_t * pix, int line_size);
 // 16x16 8x8 4x4 2x2 16x8 8x4 4x2 8x16 4x8 2x4
-    
+
     me_cmp_func sad[5]; /* identical to pix_absAxA except additional void * */
     me_cmp_func sse[5];
     me_cmp_func hadamard8_diff[5];
@@ -173,6 +173,7 @@ typedef struct DSPContext {
     me_cmp_func w53[5];
     me_cmp_func w97[5];
     me_cmp_func dct_max[5];
+    me_cmp_func dct264_sad[5];
 
     me_cmp_func me_pre_cmp[5];
     me_cmp_func me_cmp[5];
@@ -183,7 +184,7 @@ typedef struct DSPContext {
 
     /**
      * Halfpel motion compensation with rounding (a+b+1)>>1.
-     * this is an array[4][4] of motion compensation funcions for 4 
+     * this is an array[4][4] of motion compensation funcions for 4
      * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
      * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
      * @param block destination where the result is stored
@@ -195,7 +196,7 @@ typedef struct DSPContext {
 
     /**
      * Halfpel motion compensation with rounding (a+b+1)>>1.
-     * This is an array[4][4] of motion compensation functions for 4 
+     * This is an array[4][4] of motion compensation functions for 4
      * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
      * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
      * @param block destination into which the result is averaged (a+b+1)>>1
@@ -207,7 +208,7 @@ typedef struct DSPContext {
 
     /**
      * Halfpel motion compensation with no rounding (a+b)>>1.
-     * this is an array[2][4] of motion compensation funcions for 2 
+     * this is an array[2][4] of motion compensation funcions for 2
      * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
      * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
      * @param block destination where the result is stored
@@ -219,7 +220,7 @@ typedef struct DSPContext {
 
     /**
      * Halfpel motion compensation with no rounding (a+b)>>1.
-     * this is an array[2][4] of motion compensation funcions for 2 
+     * this is an array[2][4] of motion compensation funcions for 2
      * horizontal blocksizes (8,16) and the 4 halfpel positions<br>
      * *pixels_tab[ 0->16xH 1->8xH ][ xhalfpel + 2*yhalfpel ]
      * @param block destination into which the result is averaged (a+b)>>1
@@ -228,9 +229,9 @@ typedef struct DSPContext {
      * @param h height
      */
     op_pixels_func avg_no_rnd_pixels_tab[4][4];
-    
+
     void (*put_no_rnd_pixels_l2[2])(uint8_t *block/*align width (8 or 16)*/, const uint8_t *a/*align 1*/, const uint8_t *b/*align 1*/, int line_size, int h);
-    
+
     /**
      * Thirdpel motion compensation with rounding (a+b+1)>>1.
      * this is an array[12] of motion compensation funcions for the 9 thirdpel positions<br>
@@ -248,21 +249,21 @@ typedef struct DSPContext {
     qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
     qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
     qpel_mc_func put_mspel_pixels_tab[8];
-    
+
     /**
      * h264 Chram MC
      */
     h264_chroma_mc_func put_h264_chroma_pixels_tab[3];
     h264_chroma_mc_func avg_h264_chroma_pixels_tab[3];
 
-    qpel_mc_func put_h264_qpel_pixels_tab[3][16];
-    qpel_mc_func avg_h264_qpel_pixels_tab[3][16];
-    
+    qpel_mc_func put_h264_qpel_pixels_tab[4][16];
+    qpel_mc_func avg_h264_qpel_pixels_tab[4][16];
+
     h264_weight_func weight_h264_pixels_tab[10];
     h264_biweight_func biweight_h264_pixels_tab[10];
-    
+
     me_cmp_func pix_abs[2][4];
-    
+
     /* huffyuv specific */
     void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
     void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
@@ -279,7 +280,7 @@ typedef struct DSPContext {
     void (*h264_h_loop_filter_chroma)(uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0);
     void (*h264_v_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta);
     void (*h264_h_loop_filter_chroma_intra)(uint8_t *pix, int stride, int alpha, int beta);
-    
+
     void (*h263_v_loop_filter)(uint8_t *src, int stride, int qscale);
     void (*h263_h_loop_filter)(uint8_t *src, int stride, int qscale);
 
@@ -288,23 +289,23 @@ typedef struct DSPContext {
     /* (I)DCT */
     void (*fdct)(DCTELEM *block/* align 16*/);
     void (*fdct248)(DCTELEM *block/* align 16*/);
-    
+
     /* IDCT really*/
     void (*idct)(DCTELEM *block/* align 16*/);
-    
+
     /**
      * block -> idct -> clip to unsigned 8 bit -> dest.
      * (-1392, 0, 0, ...) -> idct -> (-174, -174, ...) -> put -> (0, 0, ...)
      * @param line_size size in bytes of a horizotal line of dest
      */
     void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
-    
+
     /**
      * block -> idct -> add dest -> clip to unsigned 8 bit -> dest.
      * @param line_size size in bytes of a horizotal line of dest
      */
     void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
-    
+
     /**
      * idct input permutation.
      * several optimized IDCTs need a permutated input (relative to the normal order of the reference
@@ -329,7 +330,7 @@ typedef struct DSPContext {
     void (*add_8x8basis)(int16_t rem[64], int16_t basis[64], int scale);
 #define BASIS_SHIFT 16
 #define RECON_SHIFT 6
- 
+
     void (*h264_idct_add)(uint8_t *dst, DCTELEM *block, int stride);
     void (*h264_idct8_add)(uint8_t *dst, DCTELEM *block, int stride);
 } DSPContext;
@@ -345,7 +346,7 @@ void ff_block_permute(DCTELEM *block, uint8_t *permutation, const uint8_t *scant
 
 void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type);
 
-#define	BYTE_VEC32(c)	((c)*0x01010101UL)
+#define         BYTE_VEC32(c)   ((c)*0x01010101UL)
 
 static inline uint32_t rnd_avg32(uint32_t a, uint32_t b)
 {
@@ -369,6 +370,7 @@ static inline int get_penalty_factor(int lambda, int lambda2, int type){
     case FF_CMP_W97:
         return (2*lambda)>>(FF_LAMBDA_SHIFT);
     case FF_CMP_SATD:
+    case FF_CMP_DCT264:
         return (2*lambda)>>FF_LAMBDA_SHIFT;
     case FF_CMP_RD:
     case FF_CMP_PSNR:
@@ -391,7 +393,11 @@ static inline int get_penalty_factor(int lambda, int lambda2, int type){
    one or more MultiMedia extension */
 int mm_support(void);
 
-#define __align16 __attribute__ ((aligned (16)))
+#ifdef __GNUC__
+  #define DECLARE_ALIGNED_16(t,v)       t v __attribute__ ((aligned (16)))
+#else
+  #define DECLARE_ALIGNED_16(t,v)      __declspec(align(16)) t v
+#endif
 
 #if defined(HAVE_MMX)
 
@@ -422,7 +428,12 @@ static inline void emms(void)
         emms();\
 }
 
-#define __align8 __attribute__ ((aligned (8)))
+#ifdef __GNUC__
+  #define DECLARE_ALIGNED_8(t,v)       t v __attribute__ ((aligned (8)))
+#else
+  #define DECLARE_ALIGNED_8(t,v)      __declspec(align(8)) t v
+#endif
+
 #define STRIDE_ALIGN 8
 
 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
@@ -432,7 +443,7 @@ void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
 
 /* This is to use 4 bytes read to the IDCT pointers for some 'zero'
    line optimizations */
-#define __align8 __attribute__ ((aligned (4)))
+#define DECLARE_ALIGNED_8(t,v)    t v __attribute__ ((aligned (4)))
 #define STRIDE_ALIGN 4
 
 #define MM_IWMMXT    0x0100 /* XScale IWMMXT */
@@ -444,7 +455,7 @@ void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx);
 #elif defined(HAVE_MLIB)
 
 /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */
-#define __align8 __attribute__ ((aligned (8)))
+#define DECLARE_ALIGNED_8(t,v)    t v __attribute__ ((aligned (8)))
 #define STRIDE_ALIGN 8
 
 void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
@@ -452,13 +463,13 @@ void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
 #elif defined(ARCH_SPARC)
 
 /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */
-#define __align8 __attribute__ ((aligned (8)))
+#define DECLARE_ALIGNED_8(t,v)    t v __attribute__ ((aligned (8)))
 #define STRIDE_ALIGN 8
 void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx);
 
 #elif defined(ARCH_ALPHA)
 
-#define __align8 __attribute__ ((aligned (8)))
+#define DECLARE_ALIGNED_8(t,v)    t v __attribute__ ((aligned (8)))
 #define STRIDE_ALIGN 8
 
 void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
@@ -475,28 +486,28 @@ extern int mm_flags;
 #undef pixel
 #endif
 
-#define __align8 __attribute__ ((aligned (16)))
+#define DECLARE_ALIGNED_8(t,v)    t v __attribute__ ((aligned (16)))
 #define STRIDE_ALIGN 16
 
 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
 
 #elif defined(HAVE_MMI)
 
-#define __align8 __attribute__ ((aligned (16)))
+#define DECLARE_ALIGNED_8(t,v)    t v __attribute__ ((aligned (16)))
 #define STRIDE_ALIGN 16
 
 void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
 
 #elif defined(ARCH_SH4)
 
-#define __align8 __attribute__ ((aligned (8)))
+#define DECLARE_ALIGNED_8(t,v)    t v __attribute__ ((aligned (8)))
 #define STRIDE_ALIGN 8
 
 void dsputil_init_sh4(DSPContext* c, AVCodecContext *avctx);
 
 #else
 
-#define __align8 __attribute__ ((aligned (8)))
+#define DECLARE_ALIGNED_8(t,v)    t v __attribute__ ((aligned (8)))
 #define STRIDE_ALIGN 8
 
 #endif
@@ -511,6 +522,7 @@ struct unaligned_16 { uint16_t l; } __attribute__((packed));
 #define LD32(a) (((const struct unaligned_32 *) (a))->l)
 #define LD64(a) (((const struct unaligned_64 *) (a))->l)
 
+#define ST16(a, b) (((struct unaligned_16 *) (a))->l) = (b)
 #define ST32(a, b) (((struct unaligned_32 *) (a))->l) = (b)
 
 #else /* __GNUC__ */
diff --git a/src/libffmpeg/libavcodec/dv.c b/src/libffmpeg/libavcodec/dv.c
index 09fb77299..08611a900 100644
--- a/src/libffmpeg/libavcodec/dv.c
+++ b/src/libffmpeg/libavcodec/dv.c
@@ -3,7 +3,7 @@
  * Copyright (c) 2002 Fabrice Bellard.
  * Copyright (c) 2004 Roman Shaposhnik.
  *
- * DV encoder 
+ * DV encoder
  * Copyright (c) 2003 Roman Shaposhnik.
  *
  * Many thanks to Dan Dennedy <dan@dennedy.org> for providing wealth
@@ -21,7 +21,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
@@ -42,28 +42,28 @@ typedef struct DVVideoContext {
     AVFrame picture;
     AVCodecContext *avctx;
     uint8_t *buf;
-    
+
     uint8_t dv_zigzag[2][64];
     uint8_t dv_idct_shift[2][2][22][64];
-  
+
     void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size);
     void (*fdct[2])(DCTELEM *block);
     void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block);
 } DVVideoContext;
 
+/* MultiThreading - applies to entire DV codec, not just the avcontext */
+uint8_t** dv_anchor;
+
 #define TEX_VLC_BITS 9
 
 #ifdef DV_CODEC_TINY_TARGET
 #define DV_VLC_MAP_RUN_SIZE 15
 #define DV_VLC_MAP_LEV_SIZE 23
 #else
-#define DV_VLC_MAP_RUN_SIZE  64 
+#define DV_VLC_MAP_RUN_SIZE  64
 #define DV_VLC_MAP_LEV_SIZE 512 //FIXME sign was removed so this should be /2 but needs check
 #endif
 
-/* MultiThreading */
-static uint8_t** dv_anchor;
-
 /* XXX: also include quantization */
 static RL_VLC_ELEM *dv_rl_vlc;
 /* VLC encoding lookup table */
@@ -84,15 +84,15 @@ static void dv_build_unquantize_tables(DVVideoContext *s, uint8_t* perm)
             j = perm[i];
             s->dv_idct_shift[0][0][q][j] =
                 dv_quant_shifts[q][dv_88_areas[i]] + 1;
-	    s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1;
+            s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1;
         }
-        
+
         /* 248DCT */
         for(i = 1; i < 64; i++) {
             /* 248 table */
-            s->dv_idct_shift[0][1][q][i] =  
+            s->dv_idct_shift[0][1][q][i] =
                 dv_quant_shifts[q][dv_248_areas[i]] + 1;
-	    s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1;
+            s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1;
         }
     }
 }
@@ -114,51 +114,50 @@ static int dvvideo_init(AVCodecContext *avctx)
         done = 1;
 
         dv_vlc_map = av_mallocz_static(DV_VLC_MAP_LEV_SIZE*DV_VLC_MAP_RUN_SIZE*sizeof(struct dv_vlc_pair));
-	if (!dv_vlc_map)
-	    return -ENOMEM;
-
-	/* dv_anchor lets each thread know its Id */
-	dv_anchor = av_malloc(12*27*sizeof(void*));
-	if (!dv_anchor) {
-	    return -ENOMEM;
-	}
-	for (i=0; i<12*27; i++)
-	    dv_anchor[i] = (void*)(size_t)i;
-
-	/* it's faster to include sign bit in a generic VLC parsing scheme */
-	for (i=0, j=0; i<NB_DV_VLC; i++, j++) {
-	    new_dv_vlc_bits[j] = dv_vlc_bits[i];
-	    new_dv_vlc_len[j] = dv_vlc_len[i];
-	    new_dv_vlc_run[j] = dv_vlc_run[i];
-	    new_dv_vlc_level[j] = dv_vlc_level[i];
-	    
-	    if (dv_vlc_level[i]) {
-	        new_dv_vlc_bits[j] <<= 1;
-		new_dv_vlc_len[j]++;
-
-		j++;
-		new_dv_vlc_bits[j] = (dv_vlc_bits[i] << 1) | 1;
-		new_dv_vlc_len[j] = dv_vlc_len[i] + 1;
-		new_dv_vlc_run[j] = dv_vlc_run[i];
-		new_dv_vlc_level[j] = -dv_vlc_level[i];
-	    }
-	}
-             
+        if (!dv_vlc_map)
+            return -ENOMEM;
+
+        /* dv_anchor lets each thread know its Id */
+        dv_anchor = av_malloc(12*27*sizeof(void*));
+        if (!dv_anchor) {
+            return -ENOMEM;
+        }
+        for (i=0; i<12*27; i++)
+            dv_anchor[i] = (void*)(size_t)i;
+
+        /* it's faster to include sign bit in a generic VLC parsing scheme */
+        for (i=0, j=0; i<NB_DV_VLC; i++, j++) {
+            new_dv_vlc_bits[j] = dv_vlc_bits[i];
+            new_dv_vlc_len[j] = dv_vlc_len[i];
+            new_dv_vlc_run[j] = dv_vlc_run[i];
+            new_dv_vlc_level[j] = dv_vlc_level[i];
+
+            if (dv_vlc_level[i]) {
+                new_dv_vlc_bits[j] <<= 1;
+                new_dv_vlc_len[j]++;
+
+                j++;
+                new_dv_vlc_bits[j] = (dv_vlc_bits[i] << 1) | 1;
+                new_dv_vlc_len[j] = dv_vlc_len[i] + 1;
+                new_dv_vlc_run[j] = dv_vlc_run[i];
+                new_dv_vlc_level[j] = -dv_vlc_level[i];
+            }
+        }
+
         /* NOTE: as a trick, we use the fact the no codes are unused
            to accelerate the parsing of partial codes */
-        init_vlc(&dv_vlc, TEX_VLC_BITS, j, 
+        init_vlc(&dv_vlc, TEX_VLC_BITS, j,
                  new_dv_vlc_len, 1, 1, new_dv_vlc_bits, 2, 2, 0);
 
-        dv_rl_vlc = av_malloc(dv_vlc.table_size * sizeof(RL_VLC_ELEM));
-	if (!dv_rl_vlc) {
-	    av_free(dv_anchor);
-	    return -ENOMEM;
-	}
+        dv_rl_vlc = av_mallocz_static(dv_vlc.table_size * sizeof(RL_VLC_ELEM));
+        if (!dv_rl_vlc)
+            return -ENOMEM;
+
         for(i = 0; i < dv_vlc.table_size; i++){
             int code= dv_vlc.table[i][0];
             int len = dv_vlc.table[i][1];
             int level, run;
-        
+
             if(len<0){ //more bits needed
                 run= 0;
                 level= code;
@@ -170,49 +169,49 @@ static int dvvideo_init(AVCodecContext *avctx)
             dv_rl_vlc[i].level = level;
             dv_rl_vlc[i].run = run;
         }
-	free_vlc(&dv_vlc);
+        free_vlc(&dv_vlc);
 
-	for (i = 0; i < NB_DV_VLC - 1; i++) {
+        for (i = 0; i < NB_DV_VLC - 1; i++) {
            if (dv_vlc_run[i] >= DV_VLC_MAP_RUN_SIZE)
-	       continue;
+               continue;
 #ifdef DV_CODEC_TINY_TARGET
            if (dv_vlc_level[i] >= DV_VLC_MAP_LEV_SIZE)
-	       continue;
+               continue;
 #endif
-	   
-	   if (dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size != 0)
-	       continue;
-	       
-	   dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].vlc = dv_vlc_bits[i] << 
-	                                                    (!!dv_vlc_level[i]);
-	   dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size = dv_vlc_len[i] + 
-	                                                     (!!dv_vlc_level[i]);
-	}
-	for (i = 0; i < DV_VLC_MAP_RUN_SIZE; i++) {
+
+           if (dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size != 0)
+               continue;
+
+           dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].vlc = dv_vlc_bits[i] <<
+                                                            (!!dv_vlc_level[i]);
+           dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size = dv_vlc_len[i] +
+                                                             (!!dv_vlc_level[i]);
+        }
+        for (i = 0; i < DV_VLC_MAP_RUN_SIZE; i++) {
 #ifdef DV_CODEC_TINY_TARGET
-	   for (j = 1; j < DV_VLC_MAP_LEV_SIZE; j++) {
-	      if (dv_vlc_map[i][j].size == 0) {
-	          dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
-		            (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
-	          dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size + 
-		                          dv_vlc_map[0][j].size;
-	      }
-	   }
+           for (j = 1; j < DV_VLC_MAP_LEV_SIZE; j++) {
+              if (dv_vlc_map[i][j].size == 0) {
+                  dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
+                            (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
+                  dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size +
+                                          dv_vlc_map[0][j].size;
+              }
+           }
 #else
-	   for (j = 1; j < DV_VLC_MAP_LEV_SIZE/2; j++) {
-	      if (dv_vlc_map[i][j].size == 0) {
-	          dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
-		            (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
-	          dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size + 
-		                          dv_vlc_map[0][j].size;
-	      }
-	      dv_vlc_map[i][((uint16_t)(-j))&0x1ff].vlc = 
-	                                    dv_vlc_map[i][j].vlc | 1;
-	      dv_vlc_map[i][((uint16_t)(-j))&0x1ff].size = 
-	                                    dv_vlc_map[i][j].size;
-	   }
+           for (j = 1; j < DV_VLC_MAP_LEV_SIZE/2; j++) {
+              if (dv_vlc_map[i][j].size == 0) {
+                  dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc |
+                            (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size));
+                  dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size +
+                                          dv_vlc_map[0][j].size;
+              }
+              dv_vlc_map[i][((uint16_t)(-j))&0x1ff].vlc =
+                                            dv_vlc_map[i][j].vlc | 1;
+              dv_vlc_map[i][((uint16_t)(-j))&0x1ff].size =
+                                            dv_vlc_map[i][j].size;
+           }
 #endif
-	}
+        }
     }
 
     /* Generic DSP setup */
@@ -241,10 +240,10 @@ static int dvvideo_init(AVCodecContext *avctx)
 
     /* FIXME: I really don't think this should be here */
     if (dv_codec_profile(avctx))
-	avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt; 
+        avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt;
     avctx->coded_frame = &s->picture;
     s->avctx= avctx;
-    
+
     return 0;
 }
 
@@ -268,7 +267,7 @@ static const uint16_t block_sizes[6] = {
 /* bit budget for AC only in 5 MBs */
 static const int vs_total_ac_bits = (100 * 4 + 68*2) * 5;
 /* see dv_88_areas and dv_248_areas for details */
-static const int mb_area_start[5] = { 1, 6, 21, 43, 64 }; 
+static const int mb_area_start[5] = { 1, 6, 21, 43, 64 };
 
 #ifndef ALT_BITSTREAM_READER
 #warning only works with ALT_BITSTREAM_READER
@@ -299,16 +298,16 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block)
     int pos = mb->pos;
     int partial_bit_count = mb->partial_bit_count;
     int level, pos1, run, vlc_len, index;
-    
+
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
-    
+
     /* if we must parse a partial vlc, we do it here */
     if (partial_bit_count > 0) {
         re_cache = ((unsigned)re_cache >> partial_bit_count) |
-	           (mb->partial_bit_buffer << (sizeof(re_cache)*8 - partial_bit_count));
-	re_index -= partial_bit_count;
-	mb->partial_bit_count = 0;
+                   (mb->partial_bit_buffer << (sizeof(re_cache)*8 - partial_bit_count));
+        re_index -= partial_bit_count;
+        mb->partial_bit_count = 0;
     }
 
     /* get the AC coefficients until last_index is reached */
@@ -318,31 +317,31 @@ static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block)
 #endif
         /* our own optimized GET_RL_VLC */
         index = NEG_USR32(re_cache, TEX_VLC_BITS);
-	vlc_len = dv_rl_vlc[index].len;
+        vlc_len = dv_rl_vlc[index].len;
         if (vlc_len < 0) {
             index = NEG_USR32((unsigned)re_cache << TEX_VLC_BITS, -vlc_len) + dv_rl_vlc[index].level;
             vlc_len = TEX_VLC_BITS - vlc_len;
         }
         level = dv_rl_vlc[index].level;
-	run = dv_rl_vlc[index].run;
-	
-	/* gotta check if we're still within gb boundaries */
-	if (re_index + vlc_len > last_index) {
-	    /* should be < 16 bits otherwise a codeword could have been parsed */
-	    mb->partial_bit_count = last_index - re_index;
-	    mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count);
-	    re_index = last_index;
-	    break;
-	}
-	re_index += vlc_len;
+        run = dv_rl_vlc[index].run;
+
+        /* gotta check if we're still within gb boundaries */
+        if (re_index + vlc_len > last_index) {
+            /* should be < 16 bits otherwise a codeword could have been parsed */
+            mb->partial_bit_count = last_index - re_index;
+            mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count);
+            re_index = last_index;
+            break;
+        }
+        re_index += vlc_len;
 
 #ifdef VLC_DEBUG
-	printf("run=%d level=%d\n", run, level);
+        printf("run=%d level=%d\n", run, level);
 #endif
-	pos += run; 	
-	if (pos >= 64)
-	    break;
-        
+        pos += run;
+        if (pos >= 64)
+            break;
+
         assert(level);
         pos1 = scan_table[pos];
         block[pos1] = level << shift_table[pos1];
@@ -366,8 +365,8 @@ static inline void bit_copy(PutBitContext *pb, GetBitContext *gb)
 }
 
 /* mb_x and mb_y are in units of 8 pixels */
-static inline void dv_decode_video_segment(DVVideoContext *s, 
-                                           uint8_t *buf_ptr1, 
+static inline void dv_decode_video_segment(DVVideoContext *s,
+                                           uint8_t *buf_ptr1,
                                            const uint16_t *mb_pos_ptr)
 {
     int quant, dc, dct_mode, class1, j;
@@ -380,14 +379,14 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
     PutBitContext pb, vs_pb;
     GetBitContext gb;
     BlockInfo mb_data[5 * 6], *mb, *mb1;
-    DCTELEM sblock[5*6][64] __align8;
-    uint8_t mb_bit_buffer[80 + 4] __align8; /* allow some slack */
-    uint8_t vs_bit_buffer[5 * 80 + 4] __align8; /* allow some slack */
+    DECLARE_ALIGNED_8(DCTELEM, sblock[5*6][64]);
+    DECLARE_ALIGNED_8(uint8_t, mb_bit_buffer[80 + 4]); /* allow some slack */
+    DECLARE_ALIGNED_8(uint8_t, vs_bit_buffer[5 * 80 + 4]); /* allow some slack */
     const int log2_blocksize= 3-s->avctx->lowres;
-	    
+
     assert((((int)mb_bit_buffer)&7)==0);
     assert((((int)vs_bit_buffer)&7)==0);
-    
+
     memset(sblock, 0, sizeof(sblock));
 
     /* pass 1 : read DC and AC coefficients in blocks */
@@ -404,8 +403,8 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
         block = block1;
         for(j = 0;j < 6; j++) {
             last_index = block_sizes[j];
-	    init_get_bits(&gb, buf_ptr, last_index);
-            
+            init_get_bits(&gb, buf_ptr, last_index);
+
             /* get the dc */
             dc = get_sbits(&gb, 9);
             dct_mode = get_bits1(&gb);
@@ -432,11 +431,11 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
                block is finished */
             if (mb->pos >= 64)
                 bit_copy(&pb, &gb);
-            
+
             block += 64;
             mb++;
         }
-        
+
         /* pass 2 : we can do it just after */
 #ifdef VLC_DEBUG
         printf("***pass 2 size=%d MB#=%d\n", put_bits_count(&pb), mb_index);
@@ -444,7 +443,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
         block = block1;
         mb = mb1;
         init_get_bits(&gb, mb_bit_buffer, put_bits_count(&pb));
-	flush_put_bits(&pb);
+        flush_put_bits(&pb);
         for(j = 0;j < 6; j++, block += 64, mb++) {
             if (mb->pos < 64 && get_bits_left(&gb) > 0) {
                 dv_decode_ac(&gb, mb, block);
@@ -456,7 +455,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
         /* all blocks are finished, so the extra bytes can be used at
            the video segment level */
         if (j >= 6)
-	    bit_copy(&vs_pb, &gb);
+            bit_copy(&vs_pb, &gb);
     }
 
     /* we need a pass other the whole video segment */
@@ -475,13 +474,13 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
 #endif
                 dv_decode_ac(&gb, mb, block);
             }
-	    if (mb->pos >= 64 && mb->pos < 127)
-		av_log(NULL, AV_LOG_ERROR, "AC EOB marker is absent pos=%d\n", mb->pos);
+            if (mb->pos >= 64 && mb->pos < 127)
+                av_log(NULL, AV_LOG_ERROR, "AC EOB marker is absent pos=%d\n", mb->pos);
             block += 64;
             mb++;
         }
     }
-    
+
     /* compute idct and place blocks */
     block = &sblock[0][0];
     mb = mb_data;
@@ -508,7 +507,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
                 if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8)) {
                     uint64_t aligned_pixels[64/8];
                     uint8_t *pixels= (uint8_t*)aligned_pixels;
-		    uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1;
+                    uint8_t *c_ptr, *c_ptr1, *ptr, *ptr1;
                     int x, y, linesize;
                     /* NOTE: at end of line, the macroblock is handled as 420 */
                     idct_put(pixels, 8, block);
@@ -526,7 +525,7 @@ static inline void dv_decode_video_segment(DVVideoContext *s,
                     }
                 } else {
                     /* don't ask me why they inverted Cb and Cr ! */
-                    idct_put(s->picture.data[6 - j] + c_offset, 
+                    idct_put(s->picture.data[6 - j] + c_offset,
                              s->picture.linesize[6 - j], block);
                 }
             }
@@ -543,38 +542,38 @@ static always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc)
     int size;
     if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
         *vlc = dv_vlc_map[run][level].vlc | sign;
-	size = dv_vlc_map[run][level].size;
+        size = dv_vlc_map[run][level].size;
     }
-    else { 
+    else {
         if (level < DV_VLC_MAP_LEV_SIZE) {
-	    *vlc = dv_vlc_map[0][level].vlc | sign;
-	    size = dv_vlc_map[0][level].size;
-	} else {
+            *vlc = dv_vlc_map[0][level].vlc | sign;
+            size = dv_vlc_map[0][level].size;
+        } else {
             *vlc = 0xfe00 | (level << 1) | sign;
-	    size = 16;
-	}
-	if (run) {
-	    *vlc |= ((run < 16) ? dv_vlc_map[run-1][0].vlc : 
-	                          (0x1f80 | (run - 1))) << size;
-	    size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
-	}
+            size = 16;
+        }
+        if (run) {
+            *vlc |= ((run < 16) ? dv_vlc_map[run-1][0].vlc :
+                                  (0x1f80 | (run - 1))) << size;
+            size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
+        }
     }
-    
+
     return size;
 }
 
 static always_inline int dv_rl2vlc_size(int run, int level)
 {
     int size;
-    
+
     if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
-	size = dv_vlc_map[run][level].size; 
+        size = dv_vlc_map[run][level].size;
     }
-    else { 
-	size = (level < DV_VLC_MAP_LEV_SIZE) ? dv_vlc_map[0][level].size : 16;
-	if (run) {
-	    size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
-	}
+    else {
+        size = (level < DV_VLC_MAP_LEV_SIZE) ? dv_vlc_map[0][level].size : 16;
+        if (run) {
+            size += (run < 16) ? dv_vlc_map[run-1][0].size : 13;
+        }
     }
     return size;
 }
@@ -605,7 +604,7 @@ typedef struct EncBlockInfo {
     uint32_t partial_bit_buffer; /* we can't use uint16_t here */
 } EncBlockInfo;
 
-static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext* pb_pool, 
+static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext* pb_pool,
                                        PutBitContext* pb_end)
 {
     int prev;
@@ -620,22 +619,22 @@ static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext
        for (; size > (bits_left = put_bits_left(pb)); pb++) {
           if (bits_left) {
               size -= bits_left;
-	      put_bits(pb, bits_left, vlc >> size);
-	      vlc = vlc & ((1<<size)-1);
-	  }
-	  if (pb + 1 >= pb_end) {
-	      bi->partial_bit_count = size;
-	      bi->partial_bit_buffer = vlc;
-	      return pb;
-	  }
+              put_bits(pb, bits_left, vlc >> size);
+              vlc = vlc & ((1<<size)-1);
+          }
+          if (pb + 1 >= pb_end) {
+              bi->partial_bit_count = size;
+              bi->partial_bit_buffer = vlc;
+              return pb;
+          }
        }
-       
+
        /* Store VLC */
        put_bits(pb, size, vlc);
-       
+
        if(bi->cur_ac>=64)
            break;
-       
+
        /* Construct the next VLC */
        prev= bi->cur_ac;
        bi->cur_ac = bi->next[prev];
@@ -648,7 +647,7 @@ static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext
     return pb;
 }
 
-static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi, 
+static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi,
                                               const uint8_t* zigzag_scan, int bias)
 {
     int i, area;
@@ -656,14 +655,14 @@ static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi,
     int max=12;
     int prev=0;
 
-    bi->mb[0] = blk[0]; 
-    
+    bi->mb[0] = blk[0];
+
     for (area = 0; area < 4; area++) {
        bi->prev[area] = prev;
        bi->bit_size[area] = 1; // 4 areas 4 bits for EOB :)
        for (i=mb_area_start[area]; i<mb_area_start[area+1]; i++) {
           int level = blk[zigzag_scan[i]];
-       
+
           if (level+15 > 30U) {
               bi->sign[i] = (level>>31)&1;
               bi->mb[i] = level= ABS(level)>>4;
@@ -678,7 +677,7 @@ static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi,
     for(bi->cno = 0; max > classes[bi->cno]; bi->cno++);
 
     bi->cno += bias;
-    
+
     if (bi->cno >= 3) {
         bi->cno = 3;
         prev=0;
@@ -688,7 +687,7 @@ static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi,
             bi->bit_size[area] = 1; // 4 areas 4 bits for EOB :)
             for (; i<mb_area_start[area+1]; i= bi->next[i]) {
                 bi->mb[i] >>=1;
-            
+
                 if (bi->mb[i]) {
                     bi->bit_size[area] += dv_rl2vlc_size(i - prev - 1, bi->mb[i]);
                     bi->next[prev]= i;
@@ -707,19 +706,19 @@ static always_inline int dv_guess_dct_mode(DCTELEM *blk) {
     int score88 = 0;
     int score248 = 0;
     int i;
-    
+
     /* Compute 8-8 score (small values give a better chance for 8-8 DCT) */
     s = blk;
     for(i=0; i<7; i++) {
-        score88 += SC(0,  8) + SC(1, 9) + SC(2, 10) + SC(3, 11) + 
-	           SC(4, 12) + SC(5,13) + SC(6, 14) + SC(7, 15);
+        score88 += SC(0,  8) + SC(1, 9) + SC(2, 10) + SC(3, 11) +
+                   SC(4, 12) + SC(5,13) + SC(6, 14) + SC(7, 15);
         s += 8;
     }
     /* Compute 2-4-8 score (small values give a better chance for 2-4-8 DCT) */
     s = blk;
     for(i=0; i<6; i++) {
         score248 += SC(0, 16) + SC(1,17) + SC(2, 18) + SC(3, 19) +
-	            SC(4, 20) + SC(5,21) + SC(6, 22) + SC(7, 23);
+                    SC(4, 20) + SC(5,21) + SC(6, 22) + SC(7, 23);
         s += 8;
     }
 
@@ -736,60 +735,60 @@ static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos)
        b = blks;
        for (i=0; i<5; i++) {
           if (!qnos[i])
-	      continue;
-	  
-	  qnos[i]--;
-	  size[i] = 0;
+              continue;
+
+          qnos[i]--;
+          size[i] = 0;
           for (j=0; j<6; j++, b++) {
-	     for (a=0; a<4; a++) {
-	        if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) {
-		    b->bit_size[a] = 1; // 4 areas 4 bits for EOB :)
-		    b->area_q[a]++;
+             for (a=0; a<4; a++) {
+                if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) {
+                    b->bit_size[a] = 1; // 4 areas 4 bits for EOB :)
+                    b->area_q[a]++;
                     prev= b->prev[a];
                     for (k= b->next[prev] ; k<mb_area_start[a+1]; k= b->next[k]) {
-		       b->mb[k] >>= 1;
-		       if (b->mb[k]) {
+                       b->mb[k] >>= 1;
+                       if (b->mb[k]) {
                            b->bit_size[a] += dv_rl2vlc_size(k - prev - 1, b->mb[k]);
-	                   prev= k;
+                           prev= k;
                        } else {
                            b->next[prev] = b->next[k];
                        }
-		    }
+                    }
                     b->prev[a+1]= prev;
-		}
-		size[i] += b->bit_size[a];
-	     }
-	  }
+                }
+                size[i] += b->bit_size[a];
+             }
+          }
        }
-    } while ((vs_total_ac_bits < size[0] + size[1] + size[2] + size[3] + size[4]) && 
+    } while ((vs_total_ac_bits < size[0] + size[1] + size[2] + size[3] + size[4]) &&
              (qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4]));
 }
 
 /*
  * This is a very rough initial implementaion. The performance is
- * horrible and the weighting is missing. But it's missing from the 
+ * horrible and the weighting is missing. But it's missing from the
  * decoding step also -- so at least we're on the same page with decoder ;-)
  */
-static inline void dv_encode_video_segment(DVVideoContext *s, 
-                                           uint8_t *dif, 
+static inline void dv_encode_video_segment(DVVideoContext *s,
+                                           uint8_t *dif,
                                            const uint16_t *mb_pos_ptr)
 {
     int mb_index, i, j, v;
-    int mb_x, mb_y, c_offset, linesize; 
+    int mb_x, mb_y, c_offset, linesize;
     uint8_t*  y_ptr;
     uint8_t*  data;
     uint8_t*  ptr;
     int       do_edge_wrap;
-    DCTELEM   block[64] __align8;
+    DECLARE_ALIGNED_8(DCTELEM, block[64]);
     EncBlockInfo  enc_blks[5*6];
     PutBitContext pbs[5*6];
-    PutBitContext* pb; 
+    PutBitContext* pb;
     EncBlockInfo* enc_blk;
     int       vs_bit_size = 0;
     int       qnos[5];
-    
+
     assert((((int)block) & 7) == 0);
-   
+
     enc_blk = &enc_blks[0];
     pb = &pbs[0];
     for(mb_index = 0; mb_index < 5; mb_index++) {
@@ -797,68 +796,68 @@ static inline void dv_encode_video_segment(DVVideoContext *s,
         mb_x = v & 0xff;
         mb_y = v >> 8;
         y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8);
-	c_offset = (s->sys->pix_fmt == PIX_FMT_YUV411P) ?
-	           ((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)) :
-		   (((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8));
-	do_edge_wrap = 0;
-	qnos[mb_index] = 15; /* No quantization */
+        c_offset = (s->sys->pix_fmt == PIX_FMT_YUV411P) ?
+                   ((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)) :
+                   (((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8));
+        do_edge_wrap = 0;
+        qnos[mb_index] = 15; /* No quantization */
         ptr = dif + mb_index*80 + 4;
         for(j = 0;j < 6; j++) {
             if (j < 4) {  /* Four Y blocks */
-		/* NOTE: at end of line, the macroblock is handled as 420 */
-		if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) {
+                /* NOTE: at end of line, the macroblock is handled as 420 */
+                if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) {
                     data = y_ptr + (j * 8);
                 } else {
                     data = y_ptr + ((j & 1) * 8) + ((j >> 1) * 8 * s->picture.linesize[0]);
                 }
-		linesize = s->picture.linesize[0];
+                linesize = s->picture.linesize[0];
             } else {      /* Cr and Cb blocks */
-	        /* don't ask Fabrice why they inverted Cb and Cr ! */
-	        data = s->picture.data[6 - j] + c_offset;
-		linesize = s->picture.linesize[6 - j];
-		if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8))
-		    do_edge_wrap = 1;
-	    }	
-            
-	    /* Everything is set up -- now just copy data -> DCT block */
-	    if (do_edge_wrap) {  /* Edge wrap copy: 4x16 -> 8x8 */
-		uint8_t* d;
-		DCTELEM *b = block;
-	        for (i=0;i<8;i++) {
-		   d = data + 8 * linesize;
-		   b[0] = data[0]; b[1] = data[1]; b[2] = data[2]; b[3] = data[3];
+                /* don't ask Fabrice why they inverted Cb and Cr ! */
+                data = s->picture.data[6 - j] + c_offset;
+                linesize = s->picture.linesize[6 - j];
+                if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x >= (704 / 8))
+                    do_edge_wrap = 1;
+            }
+
+            /* Everything is set up -- now just copy data -> DCT block */
+            if (do_edge_wrap) {  /* Edge wrap copy: 4x16 -> 8x8 */
+                uint8_t* d;
+                DCTELEM *b = block;
+                for (i=0;i<8;i++) {
+                   d = data + 8 * linesize;
+                   b[0] = data[0]; b[1] = data[1]; b[2] = data[2]; b[3] = data[3];
                    b[4] =    d[0]; b[5] =    d[1]; b[6] =    d[2]; b[7] =    d[3];
-		   data += linesize;
-		   b += 8;
-		}
-	    } else {             /* Simple copy: 8x8 -> 8x8 */
-	        s->get_pixels(block, data, linesize);
-	    }
-	  
+                   data += linesize;
+                   b += 8;
+                }
+            } else {             /* Simple copy: 8x8 -> 8x8 */
+                s->get_pixels(block, data, linesize);
+            }
+
             if(s->avctx->flags & CODEC_FLAG_INTERLACED_DCT)
                 enc_blk->dct_mode = dv_guess_dct_mode(block);
             else
                 enc_blk->dct_mode = 0;
-	    enc_blk->area_q[0] = enc_blk->area_q[1] = enc_blk->area_q[2] = enc_blk->area_q[3] = 0;
-	    enc_blk->partial_bit_count = 0;
-	    enc_blk->partial_bit_buffer = 0;
-	    enc_blk->cur_ac = 0;
-	    
-	    s->fdct[enc_blk->dct_mode](block);
-	    
-	    dv_set_class_number(block, enc_blk, 
-	                        enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4);
-           
+            enc_blk->area_q[0] = enc_blk->area_q[1] = enc_blk->area_q[2] = enc_blk->area_q[3] = 0;
+            enc_blk->partial_bit_count = 0;
+            enc_blk->partial_bit_buffer = 0;
+            enc_blk->cur_ac = 0;
+
+            s->fdct[enc_blk->dct_mode](block);
+
+            dv_set_class_number(block, enc_blk,
+                                enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4);
+
             init_put_bits(pb, ptr, block_sizes[j]/8);
-	    put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2));
-	    put_bits(pb, 1, enc_blk->dct_mode);
-	    put_bits(pb, 2, enc_blk->cno);
-	    
-	    vs_bit_size += enc_blk->bit_size[0] + enc_blk->bit_size[1] +
-	                   enc_blk->bit_size[2] + enc_blk->bit_size[3];
-	    ++enc_blk;
-	    ++pb;
-	    ptr += block_sizes[j]/8;
+            put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2));
+            put_bits(pb, 1, enc_blk->dct_mode);
+            put_bits(pb, 2, enc_blk->cno);
+
+            vs_bit_size += enc_blk->bit_size[0] + enc_blk->bit_size[1] +
+                           enc_blk->bit_size[2] + enc_blk->bit_size[3];
+            ++enc_blk;
+            ++pb;
+            ptr += block_sizes[j]/8;
         }
     }
 
@@ -898,7 +897,7 @@ static int dv_decode_mt(AVCodecContext *avctx, void* sl)
     DVVideoContext *s = avctx->priv_data;
     int slice = (size_t)sl;
     dv_decode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80],
-	                    &s->sys->video_place[slice*5]);
+                            &s->sys->video_place[slice*5]);
     return 0;
 }
 
@@ -907,25 +906,25 @@ static int dv_encode_mt(AVCodecContext *avctx, void* sl)
     DVVideoContext *s = avctx->priv_data;
     int slice = (size_t)sl;
     dv_encode_video_segment(s, &s->buf[((slice/27)*6+(slice/3)+slice*5+7)*80],
-	                    &s->sys->video_place[slice*5]);
+                            &s->sys->video_place[slice*5]);
     return 0;
 }
 
 /* NOTE: exactly one frame must be given (120000 bytes for NTSC,
    144000 bytes for PAL) */
-static int dvvideo_decode_frame(AVCodecContext *avctx, 
+static int dvvideo_decode_frame(AVCodecContext *avctx,
                                  void *data, int *data_size,
                                  uint8_t *buf, int buf_size)
 {
     DVVideoContext *s = avctx->priv_data;
-  
+
     s->sys = dv_frame_profile(buf);
     if (!s->sys || buf_size < s->sys->frame_size)
         return -1; /* NOTE: we only accept several full frames */
 
     if(s->picture.data[0])
         avctx->release_buffer(avctx, &s->picture);
-    
+
     s->picture.reference = 0;
     s->picture.key_frame = 1;
     s->picture.pict_type = FF_I_TYPE;
@@ -939,26 +938,26 @@ static int dvvideo_decode_frame(AVCodecContext *avctx,
     s->picture.top_field_first = 0;
 
     s->buf = buf;
-    avctx->execute(avctx, dv_decode_mt, (void**)&dv_anchor[0], NULL, 
-	           s->sys->difseg_size * 27);
-    
+    avctx->execute(avctx, dv_decode_mt, (void**)&dv_anchor[0], NULL,
+                   s->sys->difseg_size * 27);
+
     emms_c();
 
     /* return image */
     *data_size = sizeof(AVFrame);
     *(AVFrame*)data= s->picture;
-    
+
     return s->sys->frame_size;
 }
 
-static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size, 
+static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size,
                                 void *data)
 {
     DVVideoContext *s = c->priv_data;
 
     s->sys = dv_codec_profile(c);
     if (!s->sys)
-	return -1;
+        return -1;
     if(buf_size < s->sys->frame_size)
         return -1;
 
@@ -968,13 +967,20 @@ static int dvvideo_encode_frame(AVCodecContext *c, uint8_t *buf, int buf_size,
     s->picture.pict_type = FF_I_TYPE;
 
     s->buf = buf;
-    c->execute(c, dv_encode_mt, (void**)&dv_anchor[0], NULL, 
-	       s->sys->difseg_size * 27);
+    c->execute(c, dv_encode_mt, (void**)&dv_anchor[0], NULL,
+               s->sys->difseg_size * 27);
 
     emms_c();
     return s->sys->frame_size;
 }
 
+static int dvvideo_close(AVCodecContext *c)
+{
+
+    return 0;
+}
+
+
 #ifdef CONFIG_DVVIDEO_ENCODER
 AVCodec dvvideo_encoder = {
     "dvvideo",
@@ -983,7 +989,7 @@ AVCodec dvvideo_encoder = {
     sizeof(DVVideoContext),
     dvvideo_init,
     dvvideo_encode_frame,
-    NULL,
+    dvvideo_close,
     NULL,
     CODEC_CAP_DR1,
     NULL
@@ -997,7 +1003,7 @@ AVCodec dvvideo_decoder = {
     sizeof(DVVideoContext),
     dvvideo_init,
     NULL,
-    NULL,
+    dvvideo_close,
     dvvideo_decode_frame,
     CODEC_CAP_DR1,
     NULL
diff --git a/src/libffmpeg/libavcodec/dvdata.h b/src/libffmpeg/libavcodec/dvdata.h
index acda751d6..f817ead2a 100644
--- a/src/libffmpeg/libavcodec/dvdata.h
+++ b/src/libffmpeg/libavcodec/dvdata.h
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
@@ -22,8 +22,8 @@
  * Constants for DV codec.
  */
 
-/* 
- * DVprofile is used to express the differences between various 
+/*
+ * DVprofile is used to express the differences between various
  * DV flavors. For now it's primarily used for differentiating
  * 525/60 and 625/50, but the plans are to use it for various
  * DV specs as well (e.g. SMPTE314M vs. IEC 61834).
@@ -32,7 +32,7 @@ typedef struct DVprofile {
     int              dsf;                 /* value of the dsf in the DV header */
     int              frame_size;          /* total size of one frame in bytes */
     int              difseg_size;         /* number of DIF segments */
-    int              frame_rate;      
+    int              frame_rate;
     int              frame_rate_base;
     int              ltc_divisor;         /* FPS from the LTS standpoint */
     int              height;              /* picture height in pixels */
@@ -40,7 +40,7 @@ typedef struct DVprofile {
     AVRational       sar[2];              /* sample aspect ratios for 4:3 and 16:9 */
     const uint16_t  *video_place;         /* positions of all DV macro blocks */
     enum PixelFormat pix_fmt;             /* picture pixel format */
-    
+
     int              audio_stride;        /* size of audio_shuffle table */
     int              audio_min_samples[3];/* min ammount of audio samples */
                                           /* for 48Khz, 44.1Khz and 32Khz */
@@ -51,10 +51,10 @@ typedef struct DVprofile {
 
 #define NB_DV_VLC 409
 
-/* 
+/*
  * There's a catch about the following three tables: the mapping they establish
  * between (run, level) and vlc is not 1-1. So you have to watch out for that
- * when building misc. tables. E.g. (1, 0) can be either 0x7cf or 0x1f82. 
+ * when building misc. tables. E.g. (1, 0) can be either 0x7cf or 0x1f82.
  */
 static const uint16_t dv_vlc_bits[409] = {
  0x0000, 0x0002, 0x0007, 0x0008, 0x0009, 0x0014, 0x0015, 0x0016,
@@ -300,27 +300,27 @@ static const uint8_t dv_248_areas[64] = {
 };
 
 static const uint8_t dv_quant_shifts[22][4] = {
-  { 3,3,4,4 }, 
-  { 3,3,4,4 }, 
-  { 2,3,3,4 }, 
+  { 3,3,4,4 },
+  { 3,3,4,4 },
   { 2,3,3,4 },
-  { 2,2,3,3 }, 
-  { 2,2,3,3 }, 
-  { 1,2,2,3 }, 
-  { 1,2,2,3 }, 
-  { 1,1,2,2 }, 
-  { 1,1,2,2 }, 
-  { 0,1,1,2 }, 
-  { 0,1,1,2 }, 
-  { 0,0,1,1 }, 
+  { 2,3,3,4 },
+  { 2,2,3,3 },
+  { 2,2,3,3 },
+  { 1,2,2,3 },
+  { 1,2,2,3 },
+  { 1,1,2,2 },
+  { 1,1,2,2 },
+  { 0,1,1,2 },
+  { 0,1,1,2 },
+  { 0,0,1,1 },
   { 0,0,1,1 },
-  { 0,0,0,1 }, 
-  { 0,0,0,0 }, 
-  { 0,0,0,0 }, 
-  { 0,0,0,0 }, 
-  { 0,0,0,0 }, 
-  { 0,0,0,0 }, 
-  { 0,0,0,0 }, 
+  { 0,0,0,1 },
+  { 0,0,0,0 },
+  { 0,0,0,0 },
+  { 0,0,0,0 },
+  { 0,0,0,0 },
+  { 0,0,0,0 },
+  { 0,0,0,0 },
   { 0,0,0,0 },
 };
 
@@ -657,329 +657,329 @@ static const uint16_t dv_place_420[1620] = {
 };
 
 static const uint16_t dv_place_411P[1620] = {
- 0x0c24, 0x2710, 0x3334, 0x0000, 0x1848, 
- 0x0d24, 0x2810, 0x3434, 0x0100, 0x1948, 
- 0x0e24, 0x2910, 0x3534, 0x0200, 0x1a48, 
- 0x0f24, 0x2914, 0x3538, 0x0300, 0x1b48, 
- 0x1024, 0x2814, 0x3438, 0x0400, 0x1c48, 
- 0x1124, 0x2714, 0x3338, 0x0500, 0x1d48, 
- 0x1128, 0x2614, 0x3238, 0x0504, 0x1d4c, 
- 0x1028, 0x2514, 0x3138, 0x0404, 0x1c4c, 
- 0x0f28, 0x2414, 0x3038, 0x0304, 0x1b4c, 
- 0x0e28, 0x2418, 0x303c, 0x0204, 0x1a4c, 
- 0x0d28, 0x2518, 0x313c, 0x0104, 0x194c, 
- 0x0c28, 0x2618, 0x323c, 0x0004, 0x184c, 
- 0x0c2c, 0x2718, 0x333c, 0x0008, 0x1850, 
- 0x0d2c, 0x2818, 0x343c, 0x0108, 0x1950, 
- 0x0e2c, 0x2918, 0x353c, 0x0208, 0x1a50, 
- 0x0f2c, 0x291c, 0x3540, 0x0308, 0x1b50, 
- 0x102c, 0x281c, 0x3440, 0x0408, 0x1c50, 
- 0x112c, 0x271c, 0x3340, 0x0508, 0x1d50, 
- 0x1130, 0x261c, 0x3240, 0x050c, 0x1d54, 
- 0x1030, 0x251c, 0x3140, 0x040c, 0x1c54, 
- 0x0f30, 0x241c, 0x3040, 0x030c, 0x1b54, 
- 0x0e30, 0x2420, 0x3044, 0x020c, 0x1a54, 
- 0x0d30, 0x2520, 0x3144, 0x010c, 0x1954, 
- 0x0c30, 0x2620, 0x3244, 0x000c, 0x1854, 
- 0x0c34, 0x2720, 0x3344, 0x0010, 0x1858, 
- 0x0d34, 0x2820, 0x3444, 0x0110, 0x1a58, 
- 0x0e34, 0x2920, 0x3544, 0x0210, 0x1c58, 
- 0x1224, 0x2d10, 0x3934, 0x0600, 0x1e48, 
- 0x1324, 0x2e10, 0x3a34, 0x0700, 0x1f48, 
- 0x1424, 0x2f10, 0x3b34, 0x0800, 0x2048, 
- 0x1524, 0x2f14, 0x3b38, 0x0900, 0x2148, 
- 0x1624, 0x2e14, 0x3a38, 0x0a00, 0x2248, 
- 0x1724, 0x2d14, 0x3938, 0x0b00, 0x2348, 
- 0x1728, 0x2c14, 0x3838, 0x0b04, 0x234c, 
- 0x1628, 0x2b14, 0x3738, 0x0a04, 0x224c, 
- 0x1528, 0x2a14, 0x3638, 0x0904, 0x214c, 
- 0x1428, 0x2a18, 0x363c, 0x0804, 0x204c, 
- 0x1328, 0x2b18, 0x373c, 0x0704, 0x1f4c, 
- 0x1228, 0x2c18, 0x383c, 0x0604, 0x1e4c, 
- 0x122c, 0x2d18, 0x393c, 0x0608, 0x1e50, 
- 0x132c, 0x2e18, 0x3a3c, 0x0708, 0x1f50, 
- 0x142c, 0x2f18, 0x3b3c, 0x0808, 0x2050, 
- 0x152c, 0x2f1c, 0x3b40, 0x0908, 0x2150, 
- 0x162c, 0x2e1c, 0x3a40, 0x0a08, 0x2250, 
- 0x172c, 0x2d1c, 0x3940, 0x0b08, 0x2350, 
- 0x1730, 0x2c1c, 0x3840, 0x0b0c, 0x2354, 
- 0x1630, 0x2b1c, 0x3740, 0x0a0c, 0x2254, 
- 0x1530, 0x2a1c, 0x3640, 0x090c, 0x2154, 
- 0x1430, 0x2a20, 0x3644, 0x080c, 0x2054, 
- 0x1330, 0x2b20, 0x3744, 0x070c, 0x1f54, 
- 0x1230, 0x2c20, 0x3844, 0x060c, 0x1e54, 
- 0x1234, 0x2d20, 0x3944, 0x0610, 0x1e58, 
- 0x1334, 0x2e20, 0x3a44, 0x0710, 0x2058, 
- 0x1434, 0x2f20, 0x3b44, 0x0810, 0x2258, 
- 0x1824, 0x3310, 0x3f34, 0x0c00, 0x2448, 
- 0x1924, 0x3410, 0x4034, 0x0d00, 0x2548, 
- 0x1a24, 0x3510, 0x4134, 0x0e00, 0x2648, 
- 0x1b24, 0x3514, 0x4138, 0x0f00, 0x2748, 
- 0x1c24, 0x3414, 0x4038, 0x1000, 0x2848, 
- 0x1d24, 0x3314, 0x3f38, 0x1100, 0x2948, 
- 0x1d28, 0x3214, 0x3e38, 0x1104, 0x294c, 
- 0x1c28, 0x3114, 0x3d38, 0x1004, 0x284c, 
- 0x1b28, 0x3014, 0x3c38, 0x0f04, 0x274c, 
- 0x1a28, 0x3018, 0x3c3c, 0x0e04, 0x264c, 
- 0x1928, 0x3118, 0x3d3c, 0x0d04, 0x254c, 
- 0x1828, 0x3218, 0x3e3c, 0x0c04, 0x244c, 
- 0x182c, 0x3318, 0x3f3c, 0x0c08, 0x2450, 
- 0x192c, 0x3418, 0x403c, 0x0d08, 0x2550, 
- 0x1a2c, 0x3518, 0x413c, 0x0e08, 0x2650, 
- 0x1b2c, 0x351c, 0x4140, 0x0f08, 0x2750, 
- 0x1c2c, 0x341c, 0x4040, 0x1008, 0x2850, 
- 0x1d2c, 0x331c, 0x3f40, 0x1108, 0x2950, 
- 0x1d30, 0x321c, 0x3e40, 0x110c, 0x2954, 
- 0x1c30, 0x311c, 0x3d40, 0x100c, 0x2854, 
- 0x1b30, 0x301c, 0x3c40, 0x0f0c, 0x2754, 
- 0x1a30, 0x3020, 0x3c44, 0x0e0c, 0x2654, 
- 0x1930, 0x3120, 0x3d44, 0x0d0c, 0x2554, 
- 0x1830, 0x3220, 0x3e44, 0x0c0c, 0x2454, 
- 0x1834, 0x3320, 0x3f44, 0x0c10, 0x2458, 
- 0x1934, 0x3420, 0x4044, 0x0d10, 0x2658, 
- 0x1a34, 0x3520, 0x4144, 0x0e10, 0x2858, 
- 0x1e24, 0x3910, 0x4534, 0x1200, 0x2a48, 
- 0x1f24, 0x3a10, 0x4634, 0x1300, 0x2b48, 
- 0x2024, 0x3b10, 0x4734, 0x1400, 0x2c48, 
- 0x2124, 0x3b14, 0x4738, 0x1500, 0x2d48, 
- 0x2224, 0x3a14, 0x4638, 0x1600, 0x2e48, 
- 0x2324, 0x3914, 0x4538, 0x1700, 0x2f48, 
- 0x2328, 0x3814, 0x4438, 0x1704, 0x2f4c, 
- 0x2228, 0x3714, 0x4338, 0x1604, 0x2e4c, 
- 0x2128, 0x3614, 0x4238, 0x1504, 0x2d4c, 
- 0x2028, 0x3618, 0x423c, 0x1404, 0x2c4c, 
- 0x1f28, 0x3718, 0x433c, 0x1304, 0x2b4c, 
- 0x1e28, 0x3818, 0x443c, 0x1204, 0x2a4c, 
- 0x1e2c, 0x3918, 0x453c, 0x1208, 0x2a50, 
- 0x1f2c, 0x3a18, 0x463c, 0x1308, 0x2b50, 
- 0x202c, 0x3b18, 0x473c, 0x1408, 0x2c50, 
- 0x212c, 0x3b1c, 0x4740, 0x1508, 0x2d50, 
- 0x222c, 0x3a1c, 0x4640, 0x1608, 0x2e50, 
- 0x232c, 0x391c, 0x4540, 0x1708, 0x2f50, 
- 0x2330, 0x381c, 0x4440, 0x170c, 0x2f54, 
- 0x2230, 0x371c, 0x4340, 0x160c, 0x2e54, 
- 0x2130, 0x361c, 0x4240, 0x150c, 0x2d54, 
- 0x2030, 0x3620, 0x4244, 0x140c, 0x2c54, 
- 0x1f30, 0x3720, 0x4344, 0x130c, 0x2b54, 
- 0x1e30, 0x3820, 0x4444, 0x120c, 0x2a54, 
- 0x1e34, 0x3920, 0x4544, 0x1210, 0x2a58, 
- 0x1f34, 0x3a20, 0x4644, 0x1310, 0x2c58, 
- 0x2034, 0x3b20, 0x4744, 0x1410, 0x2e58, 
- 0x2424, 0x3f10, 0x0334, 0x1800, 0x3048, 
- 0x2524, 0x4010, 0x0434, 0x1900, 0x3148, 
- 0x2624, 0x4110, 0x0534, 0x1a00, 0x3248, 
- 0x2724, 0x4114, 0x0538, 0x1b00, 0x3348, 
- 0x2824, 0x4014, 0x0438, 0x1c00, 0x3448, 
- 0x2924, 0x3f14, 0x0338, 0x1d00, 0x3548, 
- 0x2928, 0x3e14, 0x0238, 0x1d04, 0x354c, 
- 0x2828, 0x3d14, 0x0138, 0x1c04, 0x344c, 
- 0x2728, 0x3c14, 0x0038, 0x1b04, 0x334c, 
- 0x2628, 0x3c18, 0x003c, 0x1a04, 0x324c, 
- 0x2528, 0x3d18, 0x013c, 0x1904, 0x314c, 
- 0x2428, 0x3e18, 0x023c, 0x1804, 0x304c, 
- 0x242c, 0x3f18, 0x033c, 0x1808, 0x3050, 
- 0x252c, 0x4018, 0x043c, 0x1908, 0x3150, 
- 0x262c, 0x4118, 0x053c, 0x1a08, 0x3250, 
- 0x272c, 0x411c, 0x0540, 0x1b08, 0x3350, 
- 0x282c, 0x401c, 0x0440, 0x1c08, 0x3450, 
- 0x292c, 0x3f1c, 0x0340, 0x1d08, 0x3550, 
- 0x2930, 0x3e1c, 0x0240, 0x1d0c, 0x3554, 
- 0x2830, 0x3d1c, 0x0140, 0x1c0c, 0x3454, 
- 0x2730, 0x3c1c, 0x0040, 0x1b0c, 0x3354, 
- 0x2630, 0x3c20, 0x0044, 0x1a0c, 0x3254, 
- 0x2530, 0x3d20, 0x0144, 0x190c, 0x3154, 
- 0x2430, 0x3e20, 0x0244, 0x180c, 0x3054, 
- 0x2434, 0x3f20, 0x0344, 0x1810, 0x3058, 
- 0x2534, 0x4020, 0x0444, 0x1910, 0x3258, 
- 0x2634, 0x4120, 0x0544, 0x1a10, 0x3458, 
- 0x2a24, 0x4510, 0x0934, 0x1e00, 0x3648, 
- 0x2b24, 0x4610, 0x0a34, 0x1f00, 0x3748, 
- 0x2c24, 0x4710, 0x0b34, 0x2000, 0x3848, 
- 0x2d24, 0x4714, 0x0b38, 0x2100, 0x3948, 
- 0x2e24, 0x4614, 0x0a38, 0x2200, 0x3a48, 
- 0x2f24, 0x4514, 0x0938, 0x2300, 0x3b48, 
- 0x2f28, 0x4414, 0x0838, 0x2304, 0x3b4c, 
- 0x2e28, 0x4314, 0x0738, 0x2204, 0x3a4c, 
- 0x2d28, 0x4214, 0x0638, 0x2104, 0x394c, 
- 0x2c28, 0x4218, 0x063c, 0x2004, 0x384c, 
- 0x2b28, 0x4318, 0x073c, 0x1f04, 0x374c, 
- 0x2a28, 0x4418, 0x083c, 0x1e04, 0x364c, 
- 0x2a2c, 0x4518, 0x093c, 0x1e08, 0x3650, 
- 0x2b2c, 0x4618, 0x0a3c, 0x1f08, 0x3750, 
- 0x2c2c, 0x4718, 0x0b3c, 0x2008, 0x3850, 
- 0x2d2c, 0x471c, 0x0b40, 0x2108, 0x3950, 
- 0x2e2c, 0x461c, 0x0a40, 0x2208, 0x3a50, 
- 0x2f2c, 0x451c, 0x0940, 0x2308, 0x3b50, 
- 0x2f30, 0x441c, 0x0840, 0x230c, 0x3b54, 
- 0x2e30, 0x431c, 0x0740, 0x220c, 0x3a54, 
- 0x2d30, 0x421c, 0x0640, 0x210c, 0x3954, 
- 0x2c30, 0x4220, 0x0644, 0x200c, 0x3854, 
- 0x2b30, 0x4320, 0x0744, 0x1f0c, 0x3754, 
- 0x2a30, 0x4420, 0x0844, 0x1e0c, 0x3654, 
- 0x2a34, 0x4520, 0x0944, 0x1e10, 0x3658, 
- 0x2b34, 0x4620, 0x0a44, 0x1f10, 0x3858, 
- 0x2c34, 0x4720, 0x0b44, 0x2010, 0x3a58, 
- 0x3024, 0x0310, 0x0f34, 0x2400, 0x3c48, 
- 0x3124, 0x0410, 0x1034, 0x2500, 0x3d48, 
- 0x3224, 0x0510, 0x1134, 0x2600, 0x3e48, 
- 0x3324, 0x0514, 0x1138, 0x2700, 0x3f48, 
- 0x3424, 0x0414, 0x1038, 0x2800, 0x4048, 
- 0x3524, 0x0314, 0x0f38, 0x2900, 0x4148, 
- 0x3528, 0x0214, 0x0e38, 0x2904, 0x414c, 
- 0x3428, 0x0114, 0x0d38, 0x2804, 0x404c, 
- 0x3328, 0x0014, 0x0c38, 0x2704, 0x3f4c, 
- 0x3228, 0x0018, 0x0c3c, 0x2604, 0x3e4c, 
- 0x3128, 0x0118, 0x0d3c, 0x2504, 0x3d4c, 
- 0x3028, 0x0218, 0x0e3c, 0x2404, 0x3c4c, 
- 0x302c, 0x0318, 0x0f3c, 0x2408, 0x3c50, 
- 0x312c, 0x0418, 0x103c, 0x2508, 0x3d50, 
- 0x322c, 0x0518, 0x113c, 0x2608, 0x3e50, 
- 0x332c, 0x051c, 0x1140, 0x2708, 0x3f50, 
- 0x342c, 0x041c, 0x1040, 0x2808, 0x4050, 
- 0x352c, 0x031c, 0x0f40, 0x2908, 0x4150, 
- 0x3530, 0x021c, 0x0e40, 0x290c, 0x4154, 
- 0x3430, 0x011c, 0x0d40, 0x280c, 0x4054, 
- 0x3330, 0x001c, 0x0c40, 0x270c, 0x3f54, 
- 0x3230, 0x0020, 0x0c44, 0x260c, 0x3e54, 
- 0x3130, 0x0120, 0x0d44, 0x250c, 0x3d54, 
- 0x3030, 0x0220, 0x0e44, 0x240c, 0x3c54, 
- 0x3034, 0x0320, 0x0f44, 0x2410, 0x3c58, 
- 0x3134, 0x0420, 0x1044, 0x2510, 0x3e58, 
- 0x3234, 0x0520, 0x1144, 0x2610, 0x4058, 
- 0x3624, 0x0910, 0x1534, 0x2a00, 0x4248, 
- 0x3724, 0x0a10, 0x1634, 0x2b00, 0x4348, 
- 0x3824, 0x0b10, 0x1734, 0x2c00, 0x4448, 
- 0x3924, 0x0b14, 0x1738, 0x2d00, 0x4548, 
- 0x3a24, 0x0a14, 0x1638, 0x2e00, 0x4648, 
- 0x3b24, 0x0914, 0x1538, 0x2f00, 0x4748, 
- 0x3b28, 0x0814, 0x1438, 0x2f04, 0x474c, 
- 0x3a28, 0x0714, 0x1338, 0x2e04, 0x464c, 
- 0x3928, 0x0614, 0x1238, 0x2d04, 0x454c, 
- 0x3828, 0x0618, 0x123c, 0x2c04, 0x444c, 
- 0x3728, 0x0718, 0x133c, 0x2b04, 0x434c, 
- 0x3628, 0x0818, 0x143c, 0x2a04, 0x424c, 
- 0x362c, 0x0918, 0x153c, 0x2a08, 0x4250, 
- 0x372c, 0x0a18, 0x163c, 0x2b08, 0x4350, 
- 0x382c, 0x0b18, 0x173c, 0x2c08, 0x4450, 
- 0x392c, 0x0b1c, 0x1740, 0x2d08, 0x4550, 
- 0x3a2c, 0x0a1c, 0x1640, 0x2e08, 0x4650, 
- 0x3b2c, 0x091c, 0x1540, 0x2f08, 0x4750, 
- 0x3b30, 0x081c, 0x1440, 0x2f0c, 0x4754, 
- 0x3a30, 0x071c, 0x1340, 0x2e0c, 0x4654, 
- 0x3930, 0x061c, 0x1240, 0x2d0c, 0x4554, 
- 0x3830, 0x0620, 0x1244, 0x2c0c, 0x4454, 
- 0x3730, 0x0720, 0x1344, 0x2b0c, 0x4354, 
- 0x3630, 0x0820, 0x1444, 0x2a0c, 0x4254, 
- 0x3634, 0x0920, 0x1544, 0x2a10, 0x4258, 
- 0x3734, 0x0a20, 0x1644, 0x2b10, 0x4458, 
- 0x3834, 0x0b20, 0x1744, 0x2c10, 0x4658, 
- 0x3c24, 0x0f10, 0x1b34, 0x3000, 0x0048, 
- 0x3d24, 0x1010, 0x1c34, 0x3100, 0x0148, 
- 0x3e24, 0x1110, 0x1d34, 0x3200, 0x0248, 
- 0x3f24, 0x1114, 0x1d38, 0x3300, 0x0348, 
- 0x4024, 0x1014, 0x1c38, 0x3400, 0x0448, 
- 0x4124, 0x0f14, 0x1b38, 0x3500, 0x0548, 
- 0x4128, 0x0e14, 0x1a38, 0x3504, 0x054c, 
- 0x4028, 0x0d14, 0x1938, 0x3404, 0x044c, 
- 0x3f28, 0x0c14, 0x1838, 0x3304, 0x034c, 
- 0x3e28, 0x0c18, 0x183c, 0x3204, 0x024c, 
- 0x3d28, 0x0d18, 0x193c, 0x3104, 0x014c, 
- 0x3c28, 0x0e18, 0x1a3c, 0x3004, 0x004c, 
- 0x3c2c, 0x0f18, 0x1b3c, 0x3008, 0x0050, 
- 0x3d2c, 0x1018, 0x1c3c, 0x3108, 0x0150, 
- 0x3e2c, 0x1118, 0x1d3c, 0x3208, 0x0250, 
- 0x3f2c, 0x111c, 0x1d40, 0x3308, 0x0350, 
- 0x402c, 0x101c, 0x1c40, 0x3408, 0x0450, 
- 0x412c, 0x0f1c, 0x1b40, 0x3508, 0x0550, 
- 0x4130, 0x0e1c, 0x1a40, 0x350c, 0x0554, 
- 0x4030, 0x0d1c, 0x1940, 0x340c, 0x0454, 
- 0x3f30, 0x0c1c, 0x1840, 0x330c, 0x0354, 
- 0x3e30, 0x0c20, 0x1844, 0x320c, 0x0254, 
- 0x3d30, 0x0d20, 0x1944, 0x310c, 0x0154, 
- 0x3c30, 0x0e20, 0x1a44, 0x300c, 0x0054, 
- 0x3c34, 0x0f20, 0x1b44, 0x3010, 0x0058, 
- 0x3d34, 0x1020, 0x1c44, 0x3110, 0x0258, 
- 0x3e34, 0x1120, 0x1d44, 0x3210, 0x0458, 
- 0x4224, 0x1510, 0x2134, 0x3600, 0x0648, 
- 0x4324, 0x1610, 0x2234, 0x3700, 0x0748, 
- 0x4424, 0x1710, 0x2334, 0x3800, 0x0848, 
- 0x4524, 0x1714, 0x2338, 0x3900, 0x0948, 
- 0x4624, 0x1614, 0x2238, 0x3a00, 0x0a48, 
- 0x4724, 0x1514, 0x2138, 0x3b00, 0x0b48, 
- 0x4728, 0x1414, 0x2038, 0x3b04, 0x0b4c, 
- 0x4628, 0x1314, 0x1f38, 0x3a04, 0x0a4c, 
- 0x4528, 0x1214, 0x1e38, 0x3904, 0x094c, 
- 0x4428, 0x1218, 0x1e3c, 0x3804, 0x084c, 
- 0x4328, 0x1318, 0x1f3c, 0x3704, 0x074c, 
- 0x4228, 0x1418, 0x203c, 0x3604, 0x064c, 
- 0x422c, 0x1518, 0x213c, 0x3608, 0x0650, 
- 0x432c, 0x1618, 0x223c, 0x3708, 0x0750, 
- 0x442c, 0x1718, 0x233c, 0x3808, 0x0850, 
- 0x452c, 0x171c, 0x2340, 0x3908, 0x0950, 
- 0x462c, 0x161c, 0x2240, 0x3a08, 0x0a50, 
- 0x472c, 0x151c, 0x2140, 0x3b08, 0x0b50, 
- 0x4730, 0x141c, 0x2040, 0x3b0c, 0x0b54, 
- 0x4630, 0x131c, 0x1f40, 0x3a0c, 0x0a54, 
- 0x4530, 0x121c, 0x1e40, 0x390c, 0x0954, 
- 0x4430, 0x1220, 0x1e44, 0x380c, 0x0854, 
- 0x4330, 0x1320, 0x1f44, 0x370c, 0x0754, 
- 0x4230, 0x1420, 0x2044, 0x360c, 0x0654, 
- 0x4234, 0x1520, 0x2144, 0x3610, 0x0658, 
- 0x4334, 0x1620, 0x2244, 0x3710, 0x0858, 
- 0x4434, 0x1720, 0x2344, 0x3810, 0x0a58, 
- 0x0024, 0x1b10, 0x2734, 0x3c00, 0x0c48, 
- 0x0124, 0x1c10, 0x2834, 0x3d00, 0x0d48, 
- 0x0224, 0x1d10, 0x2934, 0x3e00, 0x0e48, 
- 0x0324, 0x1d14, 0x2938, 0x3f00, 0x0f48, 
- 0x0424, 0x1c14, 0x2838, 0x4000, 0x1048, 
- 0x0524, 0x1b14, 0x2738, 0x4100, 0x1148, 
- 0x0528, 0x1a14, 0x2638, 0x4104, 0x114c, 
- 0x0428, 0x1914, 0x2538, 0x4004, 0x104c, 
- 0x0328, 0x1814, 0x2438, 0x3f04, 0x0f4c, 
- 0x0228, 0x1818, 0x243c, 0x3e04, 0x0e4c, 
- 0x0128, 0x1918, 0x253c, 0x3d04, 0x0d4c, 
- 0x0028, 0x1a18, 0x263c, 0x3c04, 0x0c4c, 
- 0x002c, 0x1b18, 0x273c, 0x3c08, 0x0c50, 
- 0x012c, 0x1c18, 0x283c, 0x3d08, 0x0d50, 
- 0x022c, 0x1d18, 0x293c, 0x3e08, 0x0e50, 
- 0x032c, 0x1d1c, 0x2940, 0x3f08, 0x0f50, 
- 0x042c, 0x1c1c, 0x2840, 0x4008, 0x1050, 
- 0x052c, 0x1b1c, 0x2740, 0x4108, 0x1150, 
- 0x0530, 0x1a1c, 0x2640, 0x410c, 0x1154, 
- 0x0430, 0x191c, 0x2540, 0x400c, 0x1054, 
- 0x0330, 0x181c, 0x2440, 0x3f0c, 0x0f54, 
- 0x0230, 0x1820, 0x2444, 0x3e0c, 0x0e54, 
- 0x0130, 0x1920, 0x2544, 0x3d0c, 0x0d54, 
- 0x0030, 0x1a20, 0x2644, 0x3c0c, 0x0c54, 
- 0x0034, 0x1b20, 0x2744, 0x3c10, 0x0c58, 
- 0x0134, 0x1c20, 0x2844, 0x3d10, 0x0e58, 
- 0x0234, 0x1d20, 0x2944, 0x3e10, 0x1058, 
- 0x0624, 0x2110, 0x2d34, 0x4200, 0x1248, 
- 0x0724, 0x2210, 0x2e34, 0x4300, 0x1348, 
- 0x0824, 0x2310, 0x2f34, 0x4400, 0x1448, 
- 0x0924, 0x2314, 0x2f38, 0x4500, 0x1548, 
- 0x0a24, 0x2214, 0x2e38, 0x4600, 0x1648, 
- 0x0b24, 0x2114, 0x2d38, 0x4700, 0x1748, 
- 0x0b28, 0x2014, 0x2c38, 0x4704, 0x174c, 
- 0x0a28, 0x1f14, 0x2b38, 0x4604, 0x164c, 
- 0x0928, 0x1e14, 0x2a38, 0x4504, 0x154c, 
- 0x0828, 0x1e18, 0x2a3c, 0x4404, 0x144c, 
- 0x0728, 0x1f18, 0x2b3c, 0x4304, 0x134c, 
- 0x0628, 0x2018, 0x2c3c, 0x4204, 0x124c, 
- 0x062c, 0x2118, 0x2d3c, 0x4208, 0x1250, 
- 0x072c, 0x2218, 0x2e3c, 0x4308, 0x1350, 
- 0x082c, 0x2318, 0x2f3c, 0x4408, 0x1450, 
- 0x092c, 0x231c, 0x2f40, 0x4508, 0x1550, 
- 0x0a2c, 0x221c, 0x2e40, 0x4608, 0x1650, 
- 0x0b2c, 0x211c, 0x2d40, 0x4708, 0x1750, 
- 0x0b30, 0x201c, 0x2c40, 0x470c, 0x1754, 
- 0x0a30, 0x1f1c, 0x2b40, 0x460c, 0x1654, 
- 0x0930, 0x1e1c, 0x2a40, 0x450c, 0x1554, 
- 0x0830, 0x1e20, 0x2a44, 0x440c, 0x1454, 
- 0x0730, 0x1f20, 0x2b44, 0x430c, 0x1354, 
- 0x0630, 0x2020, 0x2c44, 0x420c, 0x1254, 
- 0x0634, 0x2120, 0x2d44, 0x4210, 0x1258, 
- 0x0734, 0x2220, 0x2e44, 0x4310, 0x1458, 
+ 0x0c24, 0x2710, 0x3334, 0x0000, 0x1848,
+ 0x0d24, 0x2810, 0x3434, 0x0100, 0x1948,
+ 0x0e24, 0x2910, 0x3534, 0x0200, 0x1a48,
+ 0x0f24, 0x2914, 0x3538, 0x0300, 0x1b48,
+ 0x1024, 0x2814, 0x3438, 0x0400, 0x1c48,
+ 0x1124, 0x2714, 0x3338, 0x0500, 0x1d48,
+ 0x1128, 0x2614, 0x3238, 0x0504, 0x1d4c,
+ 0x1028, 0x2514, 0x3138, 0x0404, 0x1c4c,
+ 0x0f28, 0x2414, 0x3038, 0x0304, 0x1b4c,
+ 0x0e28, 0x2418, 0x303c, 0x0204, 0x1a4c,
+ 0x0d28, 0x2518, 0x313c, 0x0104, 0x194c,
+ 0x0c28, 0x2618, 0x323c, 0x0004, 0x184c,
+ 0x0c2c, 0x2718, 0x333c, 0x0008, 0x1850,
+ 0x0d2c, 0x2818, 0x343c, 0x0108, 0x1950,
+ 0x0e2c, 0x2918, 0x353c, 0x0208, 0x1a50,
+ 0x0f2c, 0x291c, 0x3540, 0x0308, 0x1b50,
+ 0x102c, 0x281c, 0x3440, 0x0408, 0x1c50,
+ 0x112c, 0x271c, 0x3340, 0x0508, 0x1d50,
+ 0x1130, 0x261c, 0x3240, 0x050c, 0x1d54,
+ 0x1030, 0x251c, 0x3140, 0x040c, 0x1c54,
+ 0x0f30, 0x241c, 0x3040, 0x030c, 0x1b54,
+ 0x0e30, 0x2420, 0x3044, 0x020c, 0x1a54,
+ 0x0d30, 0x2520, 0x3144, 0x010c, 0x1954,
+ 0x0c30, 0x2620, 0x3244, 0x000c, 0x1854,
+ 0x0c34, 0x2720, 0x3344, 0x0010, 0x1858,
+ 0x0d34, 0x2820, 0x3444, 0x0110, 0x1a58,
+ 0x0e34, 0x2920, 0x3544, 0x0210, 0x1c58,
+ 0x1224, 0x2d10, 0x3934, 0x0600, 0x1e48,
+ 0x1324, 0x2e10, 0x3a34, 0x0700, 0x1f48,
+ 0x1424, 0x2f10, 0x3b34, 0x0800, 0x2048,
+ 0x1524, 0x2f14, 0x3b38, 0x0900, 0x2148,
+ 0x1624, 0x2e14, 0x3a38, 0x0a00, 0x2248,
+ 0x1724, 0x2d14, 0x3938, 0x0b00, 0x2348,
+ 0x1728, 0x2c14, 0x3838, 0x0b04, 0x234c,
+ 0x1628, 0x2b14, 0x3738, 0x0a04, 0x224c,
+ 0x1528, 0x2a14, 0x3638, 0x0904, 0x214c,
+ 0x1428, 0x2a18, 0x363c, 0x0804, 0x204c,
+ 0x1328, 0x2b18, 0x373c, 0x0704, 0x1f4c,
+ 0x1228, 0x2c18, 0x383c, 0x0604, 0x1e4c,
+ 0x122c, 0x2d18, 0x393c, 0x0608, 0x1e50,
+ 0x132c, 0x2e18, 0x3a3c, 0x0708, 0x1f50,
+ 0x142c, 0x2f18, 0x3b3c, 0x0808, 0x2050,
+ 0x152c, 0x2f1c, 0x3b40, 0x0908, 0x2150,
+ 0x162c, 0x2e1c, 0x3a40, 0x0a08, 0x2250,
+ 0x172c, 0x2d1c, 0x3940, 0x0b08, 0x2350,
+ 0x1730, 0x2c1c, 0x3840, 0x0b0c, 0x2354,
+ 0x1630, 0x2b1c, 0x3740, 0x0a0c, 0x2254,
+ 0x1530, 0x2a1c, 0x3640, 0x090c, 0x2154,
+ 0x1430, 0x2a20, 0x3644, 0x080c, 0x2054,
+ 0x1330, 0x2b20, 0x3744, 0x070c, 0x1f54,
+ 0x1230, 0x2c20, 0x3844, 0x060c, 0x1e54,
+ 0x1234, 0x2d20, 0x3944, 0x0610, 0x1e58,
+ 0x1334, 0x2e20, 0x3a44, 0x0710, 0x2058,
+ 0x1434, 0x2f20, 0x3b44, 0x0810, 0x2258,
+ 0x1824, 0x3310, 0x3f34, 0x0c00, 0x2448,
+ 0x1924, 0x3410, 0x4034, 0x0d00, 0x2548,
+ 0x1a24, 0x3510, 0x4134, 0x0e00, 0x2648,
+ 0x1b24, 0x3514, 0x4138, 0x0f00, 0x2748,
+ 0x1c24, 0x3414, 0x4038, 0x1000, 0x2848,
+ 0x1d24, 0x3314, 0x3f38, 0x1100, 0x2948,
+ 0x1d28, 0x3214, 0x3e38, 0x1104, 0x294c,
+ 0x1c28, 0x3114, 0x3d38, 0x1004, 0x284c,
+ 0x1b28, 0x3014, 0x3c38, 0x0f04, 0x274c,
+ 0x1a28, 0x3018, 0x3c3c, 0x0e04, 0x264c,
+ 0x1928, 0x3118, 0x3d3c, 0x0d04, 0x254c,
+ 0x1828, 0x3218, 0x3e3c, 0x0c04, 0x244c,
+ 0x182c, 0x3318, 0x3f3c, 0x0c08, 0x2450,
+ 0x192c, 0x3418, 0x403c, 0x0d08, 0x2550,
+ 0x1a2c, 0x3518, 0x413c, 0x0e08, 0x2650,
+ 0x1b2c, 0x351c, 0x4140, 0x0f08, 0x2750,
+ 0x1c2c, 0x341c, 0x4040, 0x1008, 0x2850,
+ 0x1d2c, 0x331c, 0x3f40, 0x1108, 0x2950,
+ 0x1d30, 0x321c, 0x3e40, 0x110c, 0x2954,
+ 0x1c30, 0x311c, 0x3d40, 0x100c, 0x2854,
+ 0x1b30, 0x301c, 0x3c40, 0x0f0c, 0x2754,
+ 0x1a30, 0x3020, 0x3c44, 0x0e0c, 0x2654,
+ 0x1930, 0x3120, 0x3d44, 0x0d0c, 0x2554,
+ 0x1830, 0x3220, 0x3e44, 0x0c0c, 0x2454,
+ 0x1834, 0x3320, 0x3f44, 0x0c10, 0x2458,
+ 0x1934, 0x3420, 0x4044, 0x0d10, 0x2658,
+ 0x1a34, 0x3520, 0x4144, 0x0e10, 0x2858,
+ 0x1e24, 0x3910, 0x4534, 0x1200, 0x2a48,
+ 0x1f24, 0x3a10, 0x4634, 0x1300, 0x2b48,
+ 0x2024, 0x3b10, 0x4734, 0x1400, 0x2c48,
+ 0x2124, 0x3b14, 0x4738, 0x1500, 0x2d48,
+ 0x2224, 0x3a14, 0x4638, 0x1600, 0x2e48,
+ 0x2324, 0x3914, 0x4538, 0x1700, 0x2f48,
+ 0x2328, 0x3814, 0x4438, 0x1704, 0x2f4c,
+ 0x2228, 0x3714, 0x4338, 0x1604, 0x2e4c,
+ 0x2128, 0x3614, 0x4238, 0x1504, 0x2d4c,
+ 0x2028, 0x3618, 0x423c, 0x1404, 0x2c4c,
+ 0x1f28, 0x3718, 0x433c, 0x1304, 0x2b4c,
+ 0x1e28, 0x3818, 0x443c, 0x1204, 0x2a4c,
+ 0x1e2c, 0x3918, 0x453c, 0x1208, 0x2a50,
+ 0x1f2c, 0x3a18, 0x463c, 0x1308, 0x2b50,
+ 0x202c, 0x3b18, 0x473c, 0x1408, 0x2c50,
+ 0x212c, 0x3b1c, 0x4740, 0x1508, 0x2d50,
+ 0x222c, 0x3a1c, 0x4640, 0x1608, 0x2e50,
+ 0x232c, 0x391c, 0x4540, 0x1708, 0x2f50,
+ 0x2330, 0x381c, 0x4440, 0x170c, 0x2f54,
+ 0x2230, 0x371c, 0x4340, 0x160c, 0x2e54,
+ 0x2130, 0x361c, 0x4240, 0x150c, 0x2d54,
+ 0x2030, 0x3620, 0x4244, 0x140c, 0x2c54,
+ 0x1f30, 0x3720, 0x4344, 0x130c, 0x2b54,
+ 0x1e30, 0x3820, 0x4444, 0x120c, 0x2a54,
+ 0x1e34, 0x3920, 0x4544, 0x1210, 0x2a58,
+ 0x1f34, 0x3a20, 0x4644, 0x1310, 0x2c58,
+ 0x2034, 0x3b20, 0x4744, 0x1410, 0x2e58,
+ 0x2424, 0x3f10, 0x0334, 0x1800, 0x3048,
+ 0x2524, 0x4010, 0x0434, 0x1900, 0x3148,
+ 0x2624, 0x4110, 0x0534, 0x1a00, 0x3248,
+ 0x2724, 0x4114, 0x0538, 0x1b00, 0x3348,
+ 0x2824, 0x4014, 0x0438, 0x1c00, 0x3448,
+ 0x2924, 0x3f14, 0x0338, 0x1d00, 0x3548,
+ 0x2928, 0x3e14, 0x0238, 0x1d04, 0x354c,
+ 0x2828, 0x3d14, 0x0138, 0x1c04, 0x344c,
+ 0x2728, 0x3c14, 0x0038, 0x1b04, 0x334c,
+ 0x2628, 0x3c18, 0x003c, 0x1a04, 0x324c,
+ 0x2528, 0x3d18, 0x013c, 0x1904, 0x314c,
+ 0x2428, 0x3e18, 0x023c, 0x1804, 0x304c,
+ 0x242c, 0x3f18, 0x033c, 0x1808, 0x3050,
+ 0x252c, 0x4018, 0x043c, 0x1908, 0x3150,
+ 0x262c, 0x4118, 0x053c, 0x1a08, 0x3250,
+ 0x272c, 0x411c, 0x0540, 0x1b08, 0x3350,
+ 0x282c, 0x401c, 0x0440, 0x1c08, 0x3450,
+ 0x292c, 0x3f1c, 0x0340, 0x1d08, 0x3550,
+ 0x2930, 0x3e1c, 0x0240, 0x1d0c, 0x3554,
+ 0x2830, 0x3d1c, 0x0140, 0x1c0c, 0x3454,
+ 0x2730, 0x3c1c, 0x0040, 0x1b0c, 0x3354,
+ 0x2630, 0x3c20, 0x0044, 0x1a0c, 0x3254,
+ 0x2530, 0x3d20, 0x0144, 0x190c, 0x3154,
+ 0x2430, 0x3e20, 0x0244, 0x180c, 0x3054,
+ 0x2434, 0x3f20, 0x0344, 0x1810, 0x3058,
+ 0x2534, 0x4020, 0x0444, 0x1910, 0x3258,
+ 0x2634, 0x4120, 0x0544, 0x1a10, 0x3458,
+ 0x2a24, 0x4510, 0x0934, 0x1e00, 0x3648,
+ 0x2b24, 0x4610, 0x0a34, 0x1f00, 0x3748,
+ 0x2c24, 0x4710, 0x0b34, 0x2000, 0x3848,
+ 0x2d24, 0x4714, 0x0b38, 0x2100, 0x3948,
+ 0x2e24, 0x4614, 0x0a38, 0x2200, 0x3a48,
+ 0x2f24, 0x4514, 0x0938, 0x2300, 0x3b48,
+ 0x2f28, 0x4414, 0x0838, 0x2304, 0x3b4c,
+ 0x2e28, 0x4314, 0x0738, 0x2204, 0x3a4c,
+ 0x2d28, 0x4214, 0x0638, 0x2104, 0x394c,
+ 0x2c28, 0x4218, 0x063c, 0x2004, 0x384c,
+ 0x2b28, 0x4318, 0x073c, 0x1f04, 0x374c,
+ 0x2a28, 0x4418, 0x083c, 0x1e04, 0x364c,
+ 0x2a2c, 0x4518, 0x093c, 0x1e08, 0x3650,
+ 0x2b2c, 0x4618, 0x0a3c, 0x1f08, 0x3750,
+ 0x2c2c, 0x4718, 0x0b3c, 0x2008, 0x3850,
+ 0x2d2c, 0x471c, 0x0b40, 0x2108, 0x3950,
+ 0x2e2c, 0x461c, 0x0a40, 0x2208, 0x3a50,
+ 0x2f2c, 0x451c, 0x0940, 0x2308, 0x3b50,
+ 0x2f30, 0x441c, 0x0840, 0x230c, 0x3b54,
+ 0x2e30, 0x431c, 0x0740, 0x220c, 0x3a54,
+ 0x2d30, 0x421c, 0x0640, 0x210c, 0x3954,
+ 0x2c30, 0x4220, 0x0644, 0x200c, 0x3854,
+ 0x2b30, 0x4320, 0x0744, 0x1f0c, 0x3754,
+ 0x2a30, 0x4420, 0x0844, 0x1e0c, 0x3654,
+ 0x2a34, 0x4520, 0x0944, 0x1e10, 0x3658,
+ 0x2b34, 0x4620, 0x0a44, 0x1f10, 0x3858,
+ 0x2c34, 0x4720, 0x0b44, 0x2010, 0x3a58,
+ 0x3024, 0x0310, 0x0f34, 0x2400, 0x3c48,
+ 0x3124, 0x0410, 0x1034, 0x2500, 0x3d48,
+ 0x3224, 0x0510, 0x1134, 0x2600, 0x3e48,
+ 0x3324, 0x0514, 0x1138, 0x2700, 0x3f48,
+ 0x3424, 0x0414, 0x1038, 0x2800, 0x4048,
+ 0x3524, 0x0314, 0x0f38, 0x2900, 0x4148,
+ 0x3528, 0x0214, 0x0e38, 0x2904, 0x414c,
+ 0x3428, 0x0114, 0x0d38, 0x2804, 0x404c,
+ 0x3328, 0x0014, 0x0c38, 0x2704, 0x3f4c,
+ 0x3228, 0x0018, 0x0c3c, 0x2604, 0x3e4c,
+ 0x3128, 0x0118, 0x0d3c, 0x2504, 0x3d4c,
+ 0x3028, 0x0218, 0x0e3c, 0x2404, 0x3c4c,
+ 0x302c, 0x0318, 0x0f3c, 0x2408, 0x3c50,
+ 0x312c, 0x0418, 0x103c, 0x2508, 0x3d50,
+ 0x322c, 0x0518, 0x113c, 0x2608, 0x3e50,
+ 0x332c, 0x051c, 0x1140, 0x2708, 0x3f50,
+ 0x342c, 0x041c, 0x1040, 0x2808, 0x4050,
+ 0x352c, 0x031c, 0x0f40, 0x2908, 0x4150,
+ 0x3530, 0x021c, 0x0e40, 0x290c, 0x4154,
+ 0x3430, 0x011c, 0x0d40, 0x280c, 0x4054,
+ 0x3330, 0x001c, 0x0c40, 0x270c, 0x3f54,
+ 0x3230, 0x0020, 0x0c44, 0x260c, 0x3e54,
+ 0x3130, 0x0120, 0x0d44, 0x250c, 0x3d54,
+ 0x3030, 0x0220, 0x0e44, 0x240c, 0x3c54,
+ 0x3034, 0x0320, 0x0f44, 0x2410, 0x3c58,
+ 0x3134, 0x0420, 0x1044, 0x2510, 0x3e58,
+ 0x3234, 0x0520, 0x1144, 0x2610, 0x4058,
+ 0x3624, 0x0910, 0x1534, 0x2a00, 0x4248,
+ 0x3724, 0x0a10, 0x1634, 0x2b00, 0x4348,
+ 0x3824, 0x0b10, 0x1734, 0x2c00, 0x4448,
+ 0x3924, 0x0b14, 0x1738, 0x2d00, 0x4548,
+ 0x3a24, 0x0a14, 0x1638, 0x2e00, 0x4648,
+ 0x3b24, 0x0914, 0x1538, 0x2f00, 0x4748,
+ 0x3b28, 0x0814, 0x1438, 0x2f04, 0x474c,
+ 0x3a28, 0x0714, 0x1338, 0x2e04, 0x464c,
+ 0x3928, 0x0614, 0x1238, 0x2d04, 0x454c,
+ 0x3828, 0x0618, 0x123c, 0x2c04, 0x444c,
+ 0x3728, 0x0718, 0x133c, 0x2b04, 0x434c,
+ 0x3628, 0x0818, 0x143c, 0x2a04, 0x424c,
+ 0x362c, 0x0918, 0x153c, 0x2a08, 0x4250,
+ 0x372c, 0x0a18, 0x163c, 0x2b08, 0x4350,
+ 0x382c, 0x0b18, 0x173c, 0x2c08, 0x4450,
+ 0x392c, 0x0b1c, 0x1740, 0x2d08, 0x4550,
+ 0x3a2c, 0x0a1c, 0x1640, 0x2e08, 0x4650,
+ 0x3b2c, 0x091c, 0x1540, 0x2f08, 0x4750,
+ 0x3b30, 0x081c, 0x1440, 0x2f0c, 0x4754,
+ 0x3a30, 0x071c, 0x1340, 0x2e0c, 0x4654,
+ 0x3930, 0x061c, 0x1240, 0x2d0c, 0x4554,
+ 0x3830, 0x0620, 0x1244, 0x2c0c, 0x4454,
+ 0x3730, 0x0720, 0x1344, 0x2b0c, 0x4354,
+ 0x3630, 0x0820, 0x1444, 0x2a0c, 0x4254,
+ 0x3634, 0x0920, 0x1544, 0x2a10, 0x4258,
+ 0x3734, 0x0a20, 0x1644, 0x2b10, 0x4458,
+ 0x3834, 0x0b20, 0x1744, 0x2c10, 0x4658,
+ 0x3c24, 0x0f10, 0x1b34, 0x3000, 0x0048,
+ 0x3d24, 0x1010, 0x1c34, 0x3100, 0x0148,
+ 0x3e24, 0x1110, 0x1d34, 0x3200, 0x0248,
+ 0x3f24, 0x1114, 0x1d38, 0x3300, 0x0348,
+ 0x4024, 0x1014, 0x1c38, 0x3400, 0x0448,
+ 0x4124, 0x0f14, 0x1b38, 0x3500, 0x0548,
+ 0x4128, 0x0e14, 0x1a38, 0x3504, 0x054c,
+ 0x4028, 0x0d14, 0x1938, 0x3404, 0x044c,
+ 0x3f28, 0x0c14, 0x1838, 0x3304, 0x034c,
+ 0x3e28, 0x0c18, 0x183c, 0x3204, 0x024c,
+ 0x3d28, 0x0d18, 0x193c, 0x3104, 0x014c,
+ 0x3c28, 0x0e18, 0x1a3c, 0x3004, 0x004c,
+ 0x3c2c, 0x0f18, 0x1b3c, 0x3008, 0x0050,
+ 0x3d2c, 0x1018, 0x1c3c, 0x3108, 0x0150,
+ 0x3e2c, 0x1118, 0x1d3c, 0x3208, 0x0250,
+ 0x3f2c, 0x111c, 0x1d40, 0x3308, 0x0350,
+ 0x402c, 0x101c, 0x1c40, 0x3408, 0x0450,
+ 0x412c, 0x0f1c, 0x1b40, 0x3508, 0x0550,
+ 0x4130, 0x0e1c, 0x1a40, 0x350c, 0x0554,
+ 0x4030, 0x0d1c, 0x1940, 0x340c, 0x0454,
+ 0x3f30, 0x0c1c, 0x1840, 0x330c, 0x0354,
+ 0x3e30, 0x0c20, 0x1844, 0x320c, 0x0254,
+ 0x3d30, 0x0d20, 0x1944, 0x310c, 0x0154,
+ 0x3c30, 0x0e20, 0x1a44, 0x300c, 0x0054,
+ 0x3c34, 0x0f20, 0x1b44, 0x3010, 0x0058,
+ 0x3d34, 0x1020, 0x1c44, 0x3110, 0x0258,
+ 0x3e34, 0x1120, 0x1d44, 0x3210, 0x0458,
+ 0x4224, 0x1510, 0x2134, 0x3600, 0x0648,
+ 0x4324, 0x1610, 0x2234, 0x3700, 0x0748,
+ 0x4424, 0x1710, 0x2334, 0x3800, 0x0848,
+ 0x4524, 0x1714, 0x2338, 0x3900, 0x0948,
+ 0x4624, 0x1614, 0x2238, 0x3a00, 0x0a48,
+ 0x4724, 0x1514, 0x2138, 0x3b00, 0x0b48,
+ 0x4728, 0x1414, 0x2038, 0x3b04, 0x0b4c,
+ 0x4628, 0x1314, 0x1f38, 0x3a04, 0x0a4c,
+ 0x4528, 0x1214, 0x1e38, 0x3904, 0x094c,
+ 0x4428, 0x1218, 0x1e3c, 0x3804, 0x084c,
+ 0x4328, 0x1318, 0x1f3c, 0x3704, 0x074c,
+ 0x4228, 0x1418, 0x203c, 0x3604, 0x064c,
+ 0x422c, 0x1518, 0x213c, 0x3608, 0x0650,
+ 0x432c, 0x1618, 0x223c, 0x3708, 0x0750,
+ 0x442c, 0x1718, 0x233c, 0x3808, 0x0850,
+ 0x452c, 0x171c, 0x2340, 0x3908, 0x0950,
+ 0x462c, 0x161c, 0x2240, 0x3a08, 0x0a50,
+ 0x472c, 0x151c, 0x2140, 0x3b08, 0x0b50,
+ 0x4730, 0x141c, 0x2040, 0x3b0c, 0x0b54,
+ 0x4630, 0x131c, 0x1f40, 0x3a0c, 0x0a54,
+ 0x4530, 0x121c, 0x1e40, 0x390c, 0x0954,
+ 0x4430, 0x1220, 0x1e44, 0x380c, 0x0854,
+ 0x4330, 0x1320, 0x1f44, 0x370c, 0x0754,
+ 0x4230, 0x1420, 0x2044, 0x360c, 0x0654,
+ 0x4234, 0x1520, 0x2144, 0x3610, 0x0658,
+ 0x4334, 0x1620, 0x2244, 0x3710, 0x0858,
+ 0x4434, 0x1720, 0x2344, 0x3810, 0x0a58,
+ 0x0024, 0x1b10, 0x2734, 0x3c00, 0x0c48,
+ 0x0124, 0x1c10, 0x2834, 0x3d00, 0x0d48,
+ 0x0224, 0x1d10, 0x2934, 0x3e00, 0x0e48,
+ 0x0324, 0x1d14, 0x2938, 0x3f00, 0x0f48,
+ 0x0424, 0x1c14, 0x2838, 0x4000, 0x1048,
+ 0x0524, 0x1b14, 0x2738, 0x4100, 0x1148,
+ 0x0528, 0x1a14, 0x2638, 0x4104, 0x114c,
+ 0x0428, 0x1914, 0x2538, 0x4004, 0x104c,
+ 0x0328, 0x1814, 0x2438, 0x3f04, 0x0f4c,
+ 0x0228, 0x1818, 0x243c, 0x3e04, 0x0e4c,
+ 0x0128, 0x1918, 0x253c, 0x3d04, 0x0d4c,
+ 0x0028, 0x1a18, 0x263c, 0x3c04, 0x0c4c,
+ 0x002c, 0x1b18, 0x273c, 0x3c08, 0x0c50,
+ 0x012c, 0x1c18, 0x283c, 0x3d08, 0x0d50,
+ 0x022c, 0x1d18, 0x293c, 0x3e08, 0x0e50,
+ 0x032c, 0x1d1c, 0x2940, 0x3f08, 0x0f50,
+ 0x042c, 0x1c1c, 0x2840, 0x4008, 0x1050,
+ 0x052c, 0x1b1c, 0x2740, 0x4108, 0x1150,
+ 0x0530, 0x1a1c, 0x2640, 0x410c, 0x1154,
+ 0x0430, 0x191c, 0x2540, 0x400c, 0x1054,
+ 0x0330, 0x181c, 0x2440, 0x3f0c, 0x0f54,
+ 0x0230, 0x1820, 0x2444, 0x3e0c, 0x0e54,
+ 0x0130, 0x1920, 0x2544, 0x3d0c, 0x0d54,
+ 0x0030, 0x1a20, 0x2644, 0x3c0c, 0x0c54,
+ 0x0034, 0x1b20, 0x2744, 0x3c10, 0x0c58,
+ 0x0134, 0x1c20, 0x2844, 0x3d10, 0x0e58,
+ 0x0234, 0x1d20, 0x2944, 0x3e10, 0x1058,
+ 0x0624, 0x2110, 0x2d34, 0x4200, 0x1248,
+ 0x0724, 0x2210, 0x2e34, 0x4300, 0x1348,
+ 0x0824, 0x2310, 0x2f34, 0x4400, 0x1448,
+ 0x0924, 0x2314, 0x2f38, 0x4500, 0x1548,
+ 0x0a24, 0x2214, 0x2e38, 0x4600, 0x1648,
+ 0x0b24, 0x2114, 0x2d38, 0x4700, 0x1748,
+ 0x0b28, 0x2014, 0x2c38, 0x4704, 0x174c,
+ 0x0a28, 0x1f14, 0x2b38, 0x4604, 0x164c,
+ 0x0928, 0x1e14, 0x2a38, 0x4504, 0x154c,
+ 0x0828, 0x1e18, 0x2a3c, 0x4404, 0x144c,
+ 0x0728, 0x1f18, 0x2b3c, 0x4304, 0x134c,
+ 0x0628, 0x2018, 0x2c3c, 0x4204, 0x124c,
+ 0x062c, 0x2118, 0x2d3c, 0x4208, 0x1250,
+ 0x072c, 0x2218, 0x2e3c, 0x4308, 0x1350,
+ 0x082c, 0x2318, 0x2f3c, 0x4408, 0x1450,
+ 0x092c, 0x231c, 0x2f40, 0x4508, 0x1550,
+ 0x0a2c, 0x221c, 0x2e40, 0x4608, 0x1650,
+ 0x0b2c, 0x211c, 0x2d40, 0x4708, 0x1750,
+ 0x0b30, 0x201c, 0x2c40, 0x470c, 0x1754,
+ 0x0a30, 0x1f1c, 0x2b40, 0x460c, 0x1654,
+ 0x0930, 0x1e1c, 0x2a40, 0x450c, 0x1554,
+ 0x0830, 0x1e20, 0x2a44, 0x440c, 0x1454,
+ 0x0730, 0x1f20, 0x2b44, 0x430c, 0x1354,
+ 0x0630, 0x2020, 0x2c44, 0x420c, 0x1254,
+ 0x0634, 0x2120, 0x2d44, 0x4210, 0x1258,
+ 0x0734, 0x2220, 0x2e44, 0x4310, 0x1458,
  0x0834, 0x2320, 0x2f44, 0x4410, 0x1658,
 };
 
@@ -1262,7 +1262,7 @@ static const uint16_t dv_audio_shuffle525[10][9] = {
   { 12, 42, 72,  2, 32, 62, 22, 52, 82 },
   { 18, 48, 78,  8, 38, 68, 28, 58, 88 },
   { 24, 54, 84, 14, 44, 74,  4, 34, 64 },
-  
+
   {  1, 31, 61, 21, 51, 81, 11, 41, 71 }, /* 2nd channel */
   {  7, 37, 67, 27, 57, 87, 17, 47, 77 },
   { 13, 43, 73,  3, 33, 63, 23, 53, 83 },
@@ -1275,21 +1275,21 @@ static const uint16_t dv_audio_shuffle625[12][9] = {
   {   6,  42,  78,  32,  68, 104,  22,  58,  94},
   {  12,  48,  84,   2,  38,  74,  28,  64, 100},
   {  18,  54,  90,   8,  44,  80,  34,  70, 106},
-  {  24,  60,  96,  14,  50,  86,   4,  40,  76},  
+  {  24,  60,  96,  14,  50,  86,   4,  40,  76},
   {  30,  66, 102,  20,  56,  92,  10,  46,  82},
-	
+
   {   1,  37,  73,  27,  63,  99,  17,  53,  89}, /* 2nd channel */
   {   7,  43,  79,  33,  69, 105,  23,  59,  95},
   {  13,  49,  85,   3,  39,  75,  29,  65, 101},
   {  19,  55,  91,   9,  45,  81,  35,  71, 107},
-  {  25,  61,  97,  15,  51,  87,   5,  41,  77},  
+  {  25,  61,  97,  15,  51,  87,   5,  41,  77},
   {  31,  67, 103,  21,  57,  93,  11,  47,  83},
 };
 
 static const __attribute__((unused)) int dv_audio_frequency[3] = {
     48000, 44100, 32000,
 };
-    
+
 static const DVprofile dv_profiles[] = {
     { .dsf = 0,
       .frame_size = 120000,        /* IEC 61834, SMPTE-314M - 525/60 (NTSC) */
@@ -1306,7 +1306,7 @@ static const DVprofile dv_profiles[] = {
       .audio_min_samples = { 1580, 1452, 1053 }, /* for 48, 44.1 and 32Khz */
       .audio_samples_dist = { 1602, 1601, 1602, 1601, 1602 },
       .audio_shuffle = dv_audio_shuffle525,
-    }, 
+    },
     { .dsf = 1,
       .frame_size = 144000,        /* IEC 61834 - 625/50 (PAL) */
       .difseg_size = 12,
@@ -1349,7 +1349,7 @@ static inline const DVprofile* dv_frame_profile(uint8_t* frame)
     else if ((frame[5] & 0x07) == 0) { /* APT flag */
         return &dv_profiles[1];
     }
-    else 
+    else
         return &dv_profiles[2];
 }
 
@@ -1357,10 +1357,10 @@ static inline const DVprofile* dv_codec_profile(AVCodecContext* codec)
 {
     if (codec->width != 720) {
         return NULL;
-    } 
+    }
     else if (codec->height == 480) {
         return &dv_profiles[0];
-    } 
-    else 
+    }
+    else
         return &dv_profiles[1];
 }
diff --git a/src/libffmpeg/libavcodec/error_resilience.c b/src/libffmpeg/libavcodec/error_resilience.c
index 2bb2276cd..9912044ec 100644
--- a/src/libffmpeg/libavcodec/error_resilience.c
+++ b/src/libffmpeg/libavcodec/error_resilience.c
@@ -15,16 +15,16 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
- 
+
 /**
  * @file error_resilience.c
  * Error resilience / concealment.
  */
 
 #include <limits.h>
- 
+
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
@@ -35,7 +35,7 @@ static void decode_mb(MpegEncContext *s){
     s->dest[1] = s->current_picture.data[1] + (s->mb_y * 8 * s->uvlinesize) + s->mb_x * 8;
     s->dest[2] = s->current_picture.data[2] + (s->mb_y * 8 * s->uvlinesize) + s->mb_x * 8;
 
-    MPV_decode_mb(s, s->block);    
+    MPV_decode_mb(s, s->block);
 }
 
 /**
@@ -79,8 +79,8 @@ static void filter181(int16_t *data, int width, int height, int stride){
 
         for(x=1; x<width-1; x++){
             int dc;
-            
-            dc= - prev_dc 
+
+            dc= - prev_dc
                 + data[x     + y*stride]*8
                 - data[x + 1 + y*stride];
             dc= (dc*10923 + 32768)>>16;
@@ -88,15 +88,15 @@ static void filter181(int16_t *data, int width, int height, int stride){
             data[x + y*stride]= dc;
         }
     }
-    
+
     /* vertical filter */
     for(x=1; x<width-1; x++){
         int prev_dc= data[x];
 
         for(y=1; y<height-1; y++){
             int dc;
-            
-            dc= - prev_dc 
+
+            dc= - prev_dc
                 + data[x +  y   *stride]*8
                 - data[x + (y+1)*stride];
             dc= (dc*10923 + 32768)>>16;
@@ -108,8 +108,8 @@ static void filter181(int16_t *data, int width, int height, int stride){
 
 /**
  * guess the dc of blocks which dont have a undamaged dc
- * @param w	width in 8 pixel blocks
- * @param h	height in 8 pixel blocks
+ * @param w     width in 8 pixel blocks
+ * @param h     height in 8 pixel blocks
  */
 static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, int is_luma){
     int b_x, b_y;
@@ -120,14 +120,14 @@ static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, i
             int distance[4]={9999,9999,9999,9999};
             int mb_index, error, j;
             int64_t guess, weight_sum;
-            
+
             mb_index= (b_x>>is_luma) + (b_y>>is_luma)*s->mb_stride;
-            
+
             error= s->error_status_table[mb_index];
-            
+
             if(IS_INTER(s->current_picture.mb_type[mb_index])) continue; //inter
             if(!(error&DC_ERROR)) continue;           //dc-ok
-            
+
             /* right block */
             for(j=b_x+1; j<w; j++){
                 int mb_index_j= (j>>is_luma) + (b_y>>is_luma)*s->mb_stride;
@@ -139,7 +139,7 @@ static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, i
                     break;
                 }
             }
-            
+
             /* left block */
             for(j=b_x-1; j>=0; j--){
                 int mb_index_j= (j>>is_luma) + (b_y>>is_luma)*s->mb_stride;
@@ -175,7 +175,7 @@ static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, i
                     break;
                 }
             }
-            
+
             weight_sum=0;
             guess=0;
             for(j=0; j<4; j++){
@@ -192,8 +192,8 @@ static void guess_dc(MpegEncContext *s, int16_t *dc, int w, int h, int stride, i
 
 /**
  * simple horizontal deblocking filter used for error resilience
- * @param w	width in 8 pixel blocks
- * @param h	height in 8 pixel blocks
+ * @param w     width in 8 pixel blocks
+ * @param h     height in 8 pixel blocks
  */
 static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){
     int b_x, b_y;
@@ -211,28 +211,28 @@ static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int st
             int offset= b_x*8 + b_y*stride*8;
             int16_t *left_mv=  s->current_picture.motion_val[0][s->b8_stride*(b_y<<(1-is_luma)) + ( b_x   <<(1-is_luma))];
             int16_t *right_mv= s->current_picture.motion_val[0][s->b8_stride*(b_y<<(1-is_luma)) + ((b_x+1)<<(1-is_luma))];
-            
+
             if(!(left_damage||right_damage)) continue; // both undamaged
-            
-            if(   (!left_intra) && (!right_intra) 
+
+            if(   (!left_intra) && (!right_intra)
                && ABS(left_mv[0]-right_mv[0]) + ABS(left_mv[1]+right_mv[1]) < 2) continue;
-            
+
             for(y=0; y<8; y++){
                 int a,b,c,d;
-                
+
                 a= dst[offset + 7 + y*stride] - dst[offset + 6 + y*stride];
                 b= dst[offset + 8 + y*stride] - dst[offset + 7 + y*stride];
                 c= dst[offset + 9 + y*stride] - dst[offset + 8 + y*stride];
-                
+
                 d= ABS(b) - ((ABS(a) + ABS(c) + 1)>>1);
                 d= FFMAX(d, 0);
                 if(b<0) d= -d;
-                
+
                 if(d==0) continue;
 
                 if(!(left_damage && right_damage))
                     d= d*16/9;
-                
+
                 if(left_damage){
                     dst[offset + 7 + y*stride] = cm[dst[offset + 7 + y*stride] + ((d*7)>>4)];
                     dst[offset + 6 + y*stride] = cm[dst[offset + 6 + y*stride] + ((d*5)>>4)];
@@ -252,8 +252,8 @@ static void h_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int st
 
 /**
  * simple vertical deblocking filter used for error resilience
- * @param w	width in 8 pixel blocks
- * @param h	height in 8 pixel blocks
+ * @param w     width in 8 pixel blocks
+ * @param h     height in 8 pixel blocks
  */
 static void v_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int stride, int is_luma){
     int b_x, b_y;
@@ -271,28 +271,28 @@ static void v_block_filter(MpegEncContext *s, uint8_t *dst, int w, int h, int st
             int offset= b_x*8 + b_y*stride*8;
             int16_t *top_mv=    s->current_picture.motion_val[0][s->b8_stride*( b_y   <<(1-is_luma)) + (b_x<<(1-is_luma))];
             int16_t *bottom_mv= s->current_picture.motion_val[0][s->b8_stride*((b_y+1)<<(1-is_luma)) + (b_x<<(1-is_luma))];
-            
+
             if(!(top_damage||bottom_damage)) continue; // both undamaged
-            
-            if(   (!top_intra) && (!bottom_intra) 
+
+            if(   (!top_intra) && (!bottom_intra)
                && ABS(top_mv[0]-bottom_mv[0]) + ABS(top_mv[1]+bottom_mv[1]) < 2) continue;
-            
+
             for(x=0; x<8; x++){
                 int a,b,c,d;
-                
+
                 a= dst[offset + x + 7*stride] - dst[offset + x + 6*stride];
                 b= dst[offset + x + 8*stride] - dst[offset + x + 7*stride];
                 c= dst[offset + x + 9*stride] - dst[offset + x + 8*stride];
-                
+
                 d= ABS(b) - ((ABS(a) + ABS(c)+1)>>1);
                 d= FFMAX(d, 0);
                 if(b<0) d= -d;
-                
+
                 if(d==0) continue;
 
                 if(!(top_damage && bottom_damage))
                     d= d*16/9;
-                
+
                 if(top_damage){
                     dst[offset + x +  7*stride] = cm[dst[offset + x +  7*stride] + ((d*7)>>4)];
                     dst[offset + x +  6*stride] = cm[dst[offset + x +  6*stride] + ((d*5)>>4)];
@@ -320,7 +320,7 @@ static void guess_mv(MpegEncContext *s){
     const int mb_height= s->mb_height;
     int i, depth, num_avail;
     int mb_x, mb_y;
-   
+
     num_avail=0;
     for(i=0; i<s->mb_num; i++){
         const int mb_xy= s->mb_index2xy[ i ];
@@ -329,17 +329,17 @@ static void guess_mv(MpegEncContext *s){
 
         if(IS_INTRA(s->current_picture.mb_type[mb_xy])) f=MV_FROZEN; //intra //FIXME check
         if(!(error&MV_ERROR)) f=MV_FROZEN;           //inter with undamaged MV
-        
+
         fixed[mb_xy]= f;
         if(f==MV_FROZEN)
             num_avail++;
     }
-    
+
     if((!(s->avctx->error_concealment&FF_EC_GUESS_MVS)) || num_avail <= mb_width/2){
         for(mb_y=0; mb_y<s->mb_height; mb_y++){
             for(mb_x=0; mb_x<s->mb_width; mb_x++){
                 const int mb_xy= mb_x + mb_y*s->mb_stride;
-                
+
                 if(IS_INTRA(s->current_picture.mb_type[mb_xy]))  continue;
                 if(!(s->error_status_table[mb_xy]&MV_ERROR)) continue;
 
@@ -348,7 +348,7 @@ static void guess_mv(MpegEncContext *s){
                 s->mv_type = MV_TYPE_16X16;
                 s->mb_skipped=0;
 
-		s->dsp.clear_blocks(s->block[0]);
+                s->dsp.clear_blocks(s->block[0]);
 
                 s->mb_x= mb_x;
                 s->mb_y= mb_y;
@@ -359,7 +359,7 @@ static void guess_mv(MpegEncContext *s){
         }
         return;
     }
-    
+
     for(depth=0;; depth++){
         int changed, pass, none_left;
 
@@ -368,7 +368,7 @@ static void guess_mv(MpegEncContext *s){
         for(pass=0; (changed || pass<2) && pass<10; pass++){
             int mb_x, mb_y;
 int score_sum=0;
- 
+
             changed=0;
             for(mb_y=0; mb_y<s->mb_height; mb_y++){
                 for(mb_x=0; mb_x<s->mb_width; mb_x++){
@@ -384,11 +384,11 @@ int score_sum=0;
                     int prev_y= s->current_picture.motion_val[0][mot_index][1];
 
                     if((mb_x^mb_y^pass)&1) continue;
-                    
+
                     if(fixed[mb_xy]==MV_FROZEN) continue;
                     assert(!IS_INTRA(s->current_picture.mb_type[mb_xy]));
                     assert(s->last_picture_ptr && s->last_picture_ptr->data[0]);
-                    
+
                     j=0;
                     if(mb_x>0           && fixed[mb_xy-1        ]==MV_FROZEN) j=1;
                     if(mb_x+1<mb_width  && fixed[mb_xy+1        ]==MV_FROZEN) j=1;
@@ -402,9 +402,9 @@ int score_sum=0;
                     if(mb_y>0           && fixed[mb_xy-mb_stride]==MV_CHANGED) j=1;
                     if(mb_y+1<mb_height && fixed[mb_xy+mb_stride]==MV_CHANGED) j=1;
                     if(j==0 && pass>1) continue;
-                    
+
                     none_left=0;
-                    
+
                     if(mb_x>0 && fixed[mb_xy-1]){
                         mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index - 2][0];
                         mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index - 2][1];
@@ -426,7 +426,7 @@ int score_sum=0;
                         pred_count++;
                     }
                     if(pred_count==0) continue;
-                    
+
                     if(pred_count>1){
                         int sum_x=0, sum_y=0;
                         int max_x, max_y, min_x, min_y;
@@ -435,11 +435,11 @@ int score_sum=0;
                             sum_x+= mv_predictor[j][0];
                             sum_y+= mv_predictor[j][1];
                         }
-                    
+
                         /* mean */
                         mv_predictor[pred_count][0] = sum_x/j;
                         mv_predictor[pred_count][1] = sum_y/j;
-                    
+
                         /* median */
                         if(pred_count>=3){
                             min_y= min_x= 99999;
@@ -455,28 +455,28 @@ int score_sum=0;
                         }
                         mv_predictor[pred_count+1][0] = sum_x - max_x - min_x;
                         mv_predictor[pred_count+1][1] = sum_y - max_y - min_y;
-                        
+
                         if(pred_count==4){
                             mv_predictor[pred_count+1][0] /= 2;
                             mv_predictor[pred_count+1][1] /= 2;
                         }
                         pred_count+=2;
                     }
-                    
+
                     /* zero MV */
                     pred_count++;
 
                     /* last MV */
                     mv_predictor[pred_count][0]= s->current_picture.motion_val[0][mot_index][0];
                     mv_predictor[pred_count][1]= s->current_picture.motion_val[0][mot_index][1];
-                    pred_count++;                    
-                    
+                    pred_count++;
+
                     s->mv_dir = MV_DIR_FORWARD;
                     s->mb_intra=0;
                     s->mv_type = MV_TYPE_16X16;
                     s->mb_skipped=0;
 
-		    s->dsp.clear_blocks(s->block[0]);
+                    s->dsp.clear_blocks(s->block[0]);
 
                     s->mb_x= mb_x;
                     s->mb_y= mb_y;
@@ -489,7 +489,7 @@ int score_sum=0;
                         s->current_picture.motion_val[0][mot_index][1]= s->mv[0][0][1]= mv_predictor[j][1];
 
                         decode_mb(s);
-                        
+
                         if(mb_x>0 && fixed[mb_xy-1]){
                             int k;
                             for(k=0; k<16; k++)
@@ -510,7 +510,7 @@ int score_sum=0;
                             for(k=0; k<16; k++)
                                 score += ABS(src[k+s->linesize*15]-src[k+s->linesize*16]);
                         }
-                        
+
                         if(score <= best_score){ // <= will favor the last MV
                             best_score= score;
                             best_pred= j;
@@ -523,7 +523,7 @@ score_sum+= best_score;
 
                     decode_mb(s);
 
-                    
+
                     if(s->mv[0][0][0] != prev_x || s->mv[0][0][1] != prev_y){
                         fixed[mb_xy]=MV_CHANGED;
                         changed++;
@@ -534,10 +534,10 @@ score_sum+= best_score;
 
 //            printf(".%d/%d", changed, score_sum); fflush(stdout);
         }
-        
-        if(none_left) 
+
+        if(none_left)
             return;
-            
+
         for(i=0; i<s->mb_num; i++){
             int mb_xy= s->mb_index2xy[i];
             if(fixed[mb_xy])
@@ -546,10 +546,10 @@ score_sum+= best_score;
 //        printf(":"); fflush(stdout);
     }
 }
-    
+
 static int is_intra_more_likely(MpegEncContext *s){
     int is_intra_likely, i, j, undamaged_count, skip_amount, mb_x, mb_y;
-    
+
     if(s->last_picture_ptr==NULL) return 1; //no previous frame available -> use spatial prediction
 
     undamaged_count=0;
@@ -559,10 +559,10 @@ static int is_intra_more_likely(MpegEncContext *s){
         if(!((error&DC_ERROR) && (error&MV_ERROR)))
             undamaged_count++;
     }
-    
+
     if(undamaged_count < 5) return 0; //allmost all MBs damaged -> use temporal prediction
-    
-    skip_amount= FFMAX(undamaged_count/50, 1); //check only upto 50 MBs 
+
+    skip_amount= FFMAX(undamaged_count/50, 1); //check only upto 50 MBs
     is_intra_likely=0;
 
     j=0;
@@ -574,15 +574,15 @@ static int is_intra_more_likely(MpegEncContext *s){
             error= s->error_status_table[mb_xy];
             if((error&DC_ERROR) && (error&MV_ERROR))
                 continue; //skip damaged
-        
-            j++;    
+
+            j++;
             if((j%skip_amount) != 0) continue; //skip a few to speed things up
-    
+
             if(s->pict_type==I_TYPE){
                 uint8_t *mb_ptr     = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize;
                 uint8_t *last_mb_ptr= s->last_picture.data   [0] + mb_x*16 + mb_y*16*s->linesize;
-    
-		is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr                    , s->linesize, 16);
+
+                is_intra_likely += s->dsp.sad[0](NULL, last_mb_ptr, mb_ptr                    , s->linesize, 16);
                 is_intra_likely -= s->dsp.sad[0](NULL, last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize, 16);
             }else{
                 if(IS_INTRA(s->current_picture.mb_type[mb_xy]))
@@ -593,7 +593,7 @@ static int is_intra_more_likely(MpegEncContext *s){
         }
     }
 //printf("is_intra_likely: %d type:%d\n", is_intra_likely, s->pict_type);
-    return is_intra_likely > 0;    
+    return is_intra_likely > 0;
 }
 
 void ff_er_frame_start(MpegEncContext *s){
@@ -615,7 +615,7 @@ void ff_er_add_slice(MpegEncContext *s, int startx, int starty, int endx, int en
     const int start_xy= s->mb_index2xy[start_i];
     const int end_xy  = s->mb_index2xy[end_i];
     int mask= -1;
-    
+
     if(!s->error_resilience) return;
 
     mask &= ~VP_START;
@@ -643,18 +643,18 @@ void ff_er_add_slice(MpegEncContext *s, int startx, int starty, int endx, int en
         }
     }
 
-    if(end_i == s->mb_num) 
+    if(end_i == s->mb_num)
         s->error_count= INT_MAX;
     else{
         s->error_status_table[end_xy] &= mask;
         s->error_status_table[end_xy] |= status;
     }
- 
+
     s->error_status_table[start_xy] |= VP_START;
 
     if(start_xy > 0 && s->avctx->thread_count <= 1 && s->avctx->skip_top*s->mb_width < start_i){
         int prev_status= s->error_status_table[ s->mb_index2xy[start_i - 1] ];
-        
+
         prev_status &= ~ VP_START;
         if(prev_status != (MV_END|DC_END|AC_END)) s->error_count= INT_MAX;
     }
@@ -668,13 +668,13 @@ void ff_er_frame_end(MpegEncContext *s){
     int is_intra_likely;
     int size = s->b8_stride * 2 * s->mb_height;
     Picture *pic= s->current_picture_ptr;
-    
-    if(!s->error_resilience || s->error_count==0 || 
+
+    if(!s->error_resilience || s->error_count==0 ||
        s->error_count==3*s->mb_width*(s->avctx->skip_top + s->avctx->skip_bottom)) return;
 
     if(s->current_picture.motion_val[0] == NULL){
         av_log(s->avctx, AV_LOG_ERROR, "Warning MVs not available\n");
-            
+
         for(i=0; i<2; i++){
             pic->ref_index[i]= av_mallocz(size * sizeof(uint8_t));
             pic->motion_val_base[i]= av_mallocz((size+4) * 2 * sizeof(uint16_t));
@@ -683,7 +683,7 @@ void ff_er_frame_end(MpegEncContext *s){
         pic->motion_subsample_log2= 3;
         s->current_picture= *s->current_picture_ptr;
     }
-    
+
     for(i=0; i<2; i++){
         if(pic->ref_index[i])
             memset(pic->ref_index[i], 0, size * sizeof(uint8_t));
@@ -693,13 +693,13 @@ void ff_er_frame_end(MpegEncContext *s){
         for(mb_y=0; mb_y<s->mb_height; mb_y++){
             for(mb_x=0; mb_x<s->mb_width; mb_x++){
                 int status= s->error_status_table[mb_x + mb_y*s->mb_stride];
-            
-                av_log(s->avctx, AV_LOG_DEBUG, "%2X ", status); 
+
+                av_log(s->avctx, AV_LOG_DEBUG, "%2X ", status);
             }
             av_log(s->avctx, AV_LOG_DEBUG, "\n");
         }
     }
-    
+
 #if 1
     /* handle overlapping slices */
     for(error_type=1; error_type<=3; error_type++){
@@ -708,7 +708,7 @@ void ff_er_frame_end(MpegEncContext *s){
         for(i=s->mb_num-1; i>=0; i--){
             const int mb_xy= s->mb_index2xy[i];
             int error= s->error_status_table[mb_xy];
-        
+
             if(error&(1<<error_type))
                 end_ok=1;
             if(error&(8<<error_type))
@@ -730,7 +730,7 @@ void ff_er_frame_end(MpegEncContext *s){
         for(i=s->mb_num-1; i>=0; i--){
             const int mb_xy= s->mb_index2xy[i];
             int error= s->error_status_table[mb_xy];
-        
+
             if(error&AC_END)
                 end_ok=0;
             if((error&MV_END) || (error&DC_END) || (error&AC_ERROR))
@@ -747,26 +747,26 @@ void ff_er_frame_end(MpegEncContext *s){
     /* handle missing slices */
     if(s->error_resilience>=4){
         int end_ok=1;
-                
+
         for(i=s->mb_num-2; i>=s->mb_width+100; i--){ //FIXME +100 hack
             const int mb_xy= s->mb_index2xy[i];
             int error1= s->error_status_table[mb_xy  ];
             int error2= s->error_status_table[s->mb_index2xy[i+1]];
-        
+
             if(error1&VP_START)
                 end_ok=1;
-             
+
             if(   error2==(VP_START|DC_ERROR|AC_ERROR|MV_ERROR|AC_END|DC_END|MV_END)
-               && error1!=(VP_START|DC_ERROR|AC_ERROR|MV_ERROR|AC_END|DC_END|MV_END) 
+               && error1!=(VP_START|DC_ERROR|AC_ERROR|MV_ERROR|AC_END|DC_END|MV_END)
                && ((error1&AC_END) || (error1&DC_END) || (error1&MV_END))){ //end & uninited
                 end_ok=0;
             }
-        
+
             if(!end_ok)
                 s->error_status_table[mb_xy]|= DC_ERROR|AC_ERROR|MV_ERROR;
         }
     }
-    
+
 #if 1
     /* backward mark errors */
     distance=9999999;
@@ -774,9 +774,9 @@ void ff_er_frame_end(MpegEncContext *s){
         for(i=s->mb_num-1; i>=0; i--){
             const int mb_xy= s->mb_index2xy[i];
             int error= s->error_status_table[mb_xy];
-            
+
             if(!s->mbskip_table[mb_xy]) //FIXME partition specific
-                distance++;            
+                distance++;
             if(error&(1<<error_type))
                 distance= 0;
 
@@ -799,7 +799,7 @@ void ff_er_frame_end(MpegEncContext *s){
     for(i=0; i<s->mb_num; i++){
         const int mb_xy= s->mb_index2xy[i];
         int old_error= s->error_status_table[mb_xy];
-        
+
         if(old_error&VP_START)
             error= old_error& (DC_ERROR|AC_ERROR|MV_ERROR);
         else{
@@ -844,7 +844,7 @@ void ff_er_frame_end(MpegEncContext *s){
         else
             s->current_picture.mb_type[mb_xy]= MB_TYPE_16x16 | MB_TYPE_L0;
     }
-    
+
     /* handle inter blocks with damaged AC */
     for(mb_y=0; mb_y<s->mb_height; mb_y++){
         for(mb_x=0; mb_x<s->mb_width; mb_x++){
@@ -855,7 +855,7 @@ void ff_er_frame_end(MpegEncContext *s){
             if(IS_INTRA(mb_type)) continue; //intra
             if(error&MV_ERROR) continue;              //inter with damaged MV
             if(!(error&AC_ERROR)) continue;           //undamaged inter
-            
+
             s->mv_dir = MV_DIR_FORWARD;
             s->mb_intra=0;
             s->mb_skipped=0;
@@ -872,8 +872,8 @@ void ff_er_frame_end(MpegEncContext *s){
                 s->mv[0][0][0] = s->current_picture.motion_val[0][ mb_x*2 + mb_y*2*s->b8_stride ][0];
                 s->mv[0][0][1] = s->current_picture.motion_val[0][ mb_x*2 + mb_y*2*s->b8_stride ][1];
             }
-        
-	    s->dsp.clear_blocks(s->block[0]);
+
+            s->dsp.clear_blocks(s->block[0]);
 
             s->mb_x= mb_x;
             s->mb_y= mb_y;
@@ -893,16 +893,16 @@ void ff_er_frame_end(MpegEncContext *s){
                 if(IS_INTRA(mb_type)) continue;
                 if(!(error&MV_ERROR)) continue;           //inter with undamaged MV
                 if(!(error&AC_ERROR)) continue;           //undamaged inter
-            
+
                 s->mv_dir = MV_DIR_FORWARD|MV_DIR_BACKWARD;
                 s->mb_intra=0;
                 s->mv_type = MV_TYPE_16X16;
                 s->mb_skipped=0;
-                
+
                 if(s->pp_time){
                     int time_pp= s->pp_time;
                     int time_pb= s->pb_time;
-            
+
                     s->mv[0][0][0] = s->next_picture.motion_val[0][xy][0]*time_pb/time_pp;
                     s->mv[0][0][1] = s->next_picture.motion_val[0][xy][1]*time_pb/time_pp;
                     s->mv[1][0][0] = s->next_picture.motion_val[0][xy][0]*(time_pb - time_pp)/time_pp;
@@ -935,16 +935,16 @@ void ff_er_frame_end(MpegEncContext *s){
             uint8_t *dest_y, *dest_cb, *dest_cr;
             const int mb_xy= mb_x + mb_y * s->mb_stride;
             const int mb_type= s->current_picture.mb_type[mb_xy];
-           
+
             error= s->error_status_table[mb_xy];
 
             if(IS_INTRA(mb_type) && s->partitioned_frame) continue;
 //            if(error&MV_ERROR) continue; //inter data damaged FIXME is this good?
-            
+
             dest_y = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize;
             dest_cb= s->current_picture.data[1] + mb_x*8  + mb_y*8 *s->uvlinesize;
             dest_cr= s->current_picture.data[2] + mb_x*8  + mb_y*8 *s->uvlinesize;
-           
+
             dc_ptr= &s->dc_val[0][mb_x*2 + mb_y*2*s->b8_stride];
             for(n=0; n<4; n++){
                 dc=0;
@@ -966,7 +966,7 @@ void ff_er_frame_end(MpegEncContext *s){
                 }
             }
             s->dc_val[1][mb_x + mb_y*s->mb_stride]= (dcu+4)>>3;
-            s->dc_val[2][mb_x + mb_y*s->mb_stride]= (dcv+4)>>3;   
+            s->dc_val[2][mb_x + mb_y*s->mb_stride]= (dcv+4)>>3;
         }
     }
 #if 1
@@ -974,10 +974,10 @@ void ff_er_frame_end(MpegEncContext *s){
     guess_dc(s, s->dc_val[0], s->mb_width*2, s->mb_height*2, s->b8_stride, 1);
     guess_dc(s, s->dc_val[1], s->mb_width  , s->mb_height  , s->mb_stride, 0);
     guess_dc(s, s->dc_val[2], s->mb_width  , s->mb_height  , s->mb_stride, 0);
-#endif   
+#endif
     /* filter luma DC */
     filter181(s->dc_val[0], s->mb_width*2, s->mb_height*2, s->b8_stride);
-    
+
 #if 1
     /* render DC only intra */
     for(mb_y=0; mb_y<s->mb_height; mb_y++){
@@ -990,16 +990,16 @@ void ff_er_frame_end(MpegEncContext *s){
 
             if(IS_INTER(mb_type)) continue;
             if(!(error&AC_ERROR)) continue;              //undamaged
-            
+
             dest_y = s->current_picture.data[0] + mb_x*16 + mb_y*16*s->linesize;
             dest_cb= s->current_picture.data[1] + mb_x*8  + mb_y*8 *s->uvlinesize;
             dest_cr= s->current_picture.data[2] + mb_x*8  + mb_y*8 *s->uvlinesize;
-            
+
             put_dc(s, dest_y, dest_cb, dest_cr, mb_x, mb_y);
         }
     }
 #endif
-    
+
     if(s->avctx->error_concealment&FF_EC_DEBLOCK){
         /* filter horizontal block boundaries */
         h_block_filter(s, s->current_picture.data[0], s->mb_width*2, s->mb_height*2, s->linesize  , 1);
@@ -1019,10 +1019,10 @@ ec_clean:
     for(i=0; i<s->mb_num; i++){
         const int mb_xy= s->mb_index2xy[i];
         int error= s->error_status_table[mb_xy];
-        
+
         if(s->pict_type!=B_TYPE && (error&(DC_ERROR|MV_ERROR|AC_ERROR))){
             s->mbskip_table[mb_xy]=0;
         }
         s->mbintra_table[mb_xy]=1;
-    }    
+    }
 }
diff --git a/src/libffmpeg/libavcodec/eval.c b/src/libffmpeg/libavcodec/eval.c
index 330781581..5b0e51d62 100644
--- a/src/libffmpeg/libavcodec/eval.c
+++ b/src/libffmpeg/libavcodec/eval.c
@@ -15,7 +15,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -75,7 +75,7 @@ static double evalPrimary(Parser *p){
         p->s= next;
         return d;
     }
-    
+
     /* named constants */
     for(i=0; p->const_name && p->const_name[i]; i++){
         if(strmatch(p->s, p->const_name[i])){
@@ -83,7 +83,7 @@ static double evalPrimary(Parser *p){
             return p->const_value[i];
         }
     }
-    
+
     p->s= strchr(p->s, '(');
     if(p->s==NULL){
         av_log(NULL, AV_LOG_ERROR, "Parser: missing ( in \"%s\"\n", next);
@@ -100,7 +100,7 @@ static double evalPrimary(Parser *p){
         return NAN;
     }
     p->s++; // ")"
-    
+
          if( strmatch(next, "sinh"  ) ) d= sinh(d);
     else if( strmatch(next, "cosh"  ) ) d= cosh(d);
     else if( strmatch(next, "tanh"  ) ) d= tanh(d);
@@ -140,7 +140,7 @@ static double evalPrimary(Parser *p){
     }
 
     return d;
-}      
+}
 
 static double evalPow(Parser *p){
     int sign= (*p->s == '+') - (*p->s == '-');
@@ -187,7 +187,7 @@ double ff_eval(char *s, double *const_value, const char **const_name,
                double (**func2)(void *, double, double), char **func2_name,
                void *opaque){
     Parser p;
-    
+
     p.stack_index=100;
     p.s= s;
     p.const_value= const_value;
@@ -197,12 +197,12 @@ double ff_eval(char *s, double *const_value, const char **const_name,
     p.func2      = func2;
     p.func2_name = func2_name;
     p.opaque     = opaque;
-    
+
     return evalExpression(&p);
 }
 
 #ifdef TEST
-#undef printf 
+#undef printf
 static double const_values[]={
     M_PI,
     M_E,
@@ -216,7 +216,7 @@ static const char *const_names[]={
 main(){
     int i;
     printf("%f == 12.7\n", ff_eval("1+(5-2)^(3-1)+1/2+sin(PI)-max(-2.2,-3.1)", const_values, const_names, NULL, NULL, NULL, NULL, NULL));
-    
+
     for(i=0; i<1050; i++){
         START_TIMER
             ff_eval("1+(5-2)^(3-1)+1/2+sin(PI)-max(-2.2,-3.1)", const_values, const_names, NULL, NULL, NULL, NULL, NULL);
diff --git a/src/libffmpeg/libavcodec/faandct.c b/src/libffmpeg/libavcodec/faandct.c
index 0462cee61..cd7ef7c6b 100644
--- a/src/libffmpeg/libavcodec/faandct.c
+++ b/src/libffmpeg/libavcodec/faandct.c
@@ -14,14 +14,14 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * this implementation is based upon the IJG integer AAN DCT (see jfdctfst.c)
  */
 
 /**
  * @file faandct.c
- * @brief 
+ * @brief
  *     Floating point AAN DCT
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
@@ -84,19 +84,19 @@ static always_inline void row_fdct(FLOAT temp[64], DCTELEM * data)
         tmp5= data[2 + i] - data[5 + i];
         tmp3= data[3 + i] + data[4 + i];
         tmp4= data[3 + i] - data[4 + i];
-        
+
         tmp10= tmp0 + tmp3;
         tmp13= tmp0 - tmp3;
         tmp11= tmp1 + tmp2;
         tmp12= tmp1 - tmp2;
-        
+
         temp[0 + i]= tmp10 + tmp11;
         temp[4 + i]= tmp10 - tmp11;
-        
+
         z1= (tmp12 + tmp13)*A1;
         temp[2 + i]= tmp13 + z1;
         temp[6 + i]= tmp13 - z1;
-        
+
         tmp10= tmp4 + tmp5;
         tmp11= tmp5 + tmp6;
         tmp12= tmp6 + tmp7;
@@ -113,7 +113,7 @@ static always_inline void row_fdct(FLOAT temp[64], DCTELEM * data)
         temp[3 + i]= z13 - z2;
         temp[1 + i]= z11 + z4;
         temp[7 + i]= z11 - z4;
-    }    
+    }
 }
 
 void ff_faandct(DCTELEM * data)
@@ -137,19 +137,19 @@ void ff_faandct(DCTELEM * data)
         tmp5= temp[8*2 + i] - temp[8*5 + i];
         tmp3= temp[8*3 + i] + temp[8*4 + i];
         tmp4= temp[8*3 + i] - temp[8*4 + i];
-        
+
         tmp10= tmp0 + tmp3;
         tmp13= tmp0 - tmp3;
         tmp11= tmp1 + tmp2;
         tmp12= tmp1 - tmp2;
-        
+
         data[8*0 + i]= lrintf(SCALE(8*0 + i) * (tmp10 + tmp11));
         data[8*4 + i]= lrintf(SCALE(8*4 + i) * (tmp10 - tmp11));
-        
+
         z1= (tmp12 + tmp13)* A1;
         data[8*2 + i]= lrintf(SCALE(8*2 + i) * (tmp13 + z1));
         data[8*6 + i]= lrintf(SCALE(8*6 + i) * (tmp13 - z1));
-        
+
         tmp10= tmp4 + tmp5;
         tmp11= tmp5 + tmp6;
         tmp12= tmp6 + tmp7;
@@ -190,29 +190,29 @@ void ff_faandct248(DCTELEM * data)
         tmp5 = temp[8*2 + i] - temp[8*3 + i];
         tmp6 = temp[8*4 + i] - temp[8*5 + i];
         tmp7 = temp[8*6 + i] - temp[8*7 + i];
-        
+
         tmp10 = tmp0 + tmp3;
         tmp11 = tmp1 + tmp2;
         tmp12 = tmp1 - tmp2;
         tmp13 = tmp0 - tmp3;
-        
+
         data[8*0 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11));
         data[8*4 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11));
-        
+
         z1 = (tmp12 + tmp13)* A1;
         data[8*2 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + z1));
         data[8*6 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1));
-        
+
         tmp10 = tmp4 + tmp7;
-	tmp11 = tmp5 + tmp6;
-	tmp12 = tmp5 - tmp6;
-	tmp13 = tmp4 - tmp7;
+        tmp11 = tmp5 + tmp6;
+        tmp12 = tmp5 - tmp6;
+        tmp13 = tmp4 - tmp7;
 
-	data[8*1 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11));
-	data[8*5 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11));
+        data[8*1 + i] = lrintf(SCALE(8*0 + i) * (tmp10 + tmp11));
+        data[8*5 + i] = lrintf(SCALE(8*4 + i) * (tmp10 - tmp11));
 
-	z1 = (tmp12 + tmp13)* A1;
-	data[8*3 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + z1));
-	data[8*7 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1));
+        z1 = (tmp12 + tmp13)* A1;
+        data[8*3 + i] = lrintf(SCALE(8*2 + i) * (tmp13 + z1));
+        data[8*7 + i] = lrintf(SCALE(8*6 + i) * (tmp13 - z1));
     }
 }
diff --git a/src/libffmpeg/libavcodec/faandct.h b/src/libffmpeg/libavcodec/faandct.h
index c40f8016b..677594c04 100644
--- a/src/libffmpeg/libavcodec/faandct.h
+++ b/src/libffmpeg/libavcodec/faandct.h
@@ -14,18 +14,18 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
 /**
  * @file faandct.h
- * @brief 
+ * @brief
  *     Floating point AAN DCT
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
- 
+
 #define FAAN_POSTSCALE
- 
+
 void ff_faandct(DCTELEM * data);
 void ff_faandct248(DCTELEM * data);
diff --git a/src/libffmpeg/libavcodec/fdctref.c b/src/libffmpeg/libavcodec/fdctref.c
index d728727ce..5eff36849 100644
--- a/src/libffmpeg/libavcodec/fdctref.c
+++ b/src/libffmpeg/libavcodec/fdctref.c
@@ -64,51 +64,51 @@ void init_fdct()
 void fdct(block)
 short *block;
 {
-	register int i, j;
-	double s;
-	double tmp[64];
+        register int i, j;
+        double s;
+        double tmp[64];
 
-	for(i = 0; i < 8; i++)
-    	for(j = 0; j < 8; j++)
-    	{
-    		s = 0.0;
+        for(i = 0; i < 8; i++)
+            for(j = 0; j < 8; j++)
+            {
+                    s = 0.0;
 
 /*
- *     		for(k = 0; k < 8; k++)
- *         		s += c[j][k] * block[8 * i + k];
+ *                     for(k = 0; k < 8; k++)
+ *                         s += c[j][k] * block[8 * i + k];
  */
-        	s += c[j][0] * block[8 * i + 0];
-        	s += c[j][1] * block[8 * i + 1];
-        	s += c[j][2] * block[8 * i + 2];
-        	s += c[j][3] * block[8 * i + 3];
-        	s += c[j][4] * block[8 * i + 4];
-        	s += c[j][5] * block[8 * i + 5];
-        	s += c[j][6] * block[8 * i + 6];
-        	s += c[j][7] * block[8 * i + 7];
-
-    		tmp[8 * i + j] = s;
-    	}
-
-	for(j = 0; j < 8; j++)
-    	for(i = 0; i < 8; i++)
-    	{
-    		s = 0.0;
+                s += c[j][0] * block[8 * i + 0];
+                s += c[j][1] * block[8 * i + 1];
+                s += c[j][2] * block[8 * i + 2];
+                s += c[j][3] * block[8 * i + 3];
+                s += c[j][4] * block[8 * i + 4];
+                s += c[j][5] * block[8 * i + 5];
+                s += c[j][6] * block[8 * i + 6];
+                s += c[j][7] * block[8 * i + 7];
+
+                    tmp[8 * i + j] = s;
+            }
+
+        for(j = 0; j < 8; j++)
+            for(i = 0; i < 8; i++)
+            {
+                    s = 0.0;
 
 /*
- *     	  	for(k = 0; k < 8; k++)
- *        	    s += c[i][k] * tmp[8 * k + j];
+ *                       for(k = 0; k < 8; k++)
+ *                    s += c[i][k] * tmp[8 * k + j];
  */
-        	s += c[i][0] * tmp[8 * 0 + j];
-        	s += c[i][1] * tmp[8 * 1 + j];
-        	s += c[i][2] * tmp[8 * 2 + j];
-        	s += c[i][3] * tmp[8 * 3 + j];
-        	s += c[i][4] * tmp[8 * 4 + j];
-        	s += c[i][5] * tmp[8 * 5 + j];
-        	s += c[i][6] * tmp[8 * 6 + j];
-        	s += c[i][7] * tmp[8 * 7 + j];
-		s*=8.0;
-
-    		block[8 * i + j] = (short)floor(s + 0.499999);
+                s += c[i][0] * tmp[8 * 0 + j];
+                s += c[i][1] * tmp[8 * 1 + j];
+                s += c[i][2] * tmp[8 * 2 + j];
+                s += c[i][3] * tmp[8 * 3 + j];
+                s += c[i][4] * tmp[8 * 4 + j];
+                s += c[i][5] * tmp[8 * 5 + j];
+                s += c[i][6] * tmp[8 * 6 + j];
+                s += c[i][7] * tmp[8 * 7 + j];
+                s*=8.0;
+
+                    block[8 * i + j] = (short)floor(s + 0.499999);
 /*
  * reason for adding 0.499999 instead of 0.5:
  * s is quite often x.5 (at least for i and/or j = 0 or 4)
@@ -141,7 +141,7 @@ short *block;
       tmp[8*i+j] = partial_product;
     }
 
-  /* Transpose operation is integrated into address mapping by switching 
+  /* Transpose operation is integrated into address mapping by switching
      loop order of i and j */
 
   for (j=0; j<8; j++)
diff --git a/src/libffmpeg/libavcodec/fft.c b/src/libffmpeg/libavcodec/fft.c
index 912a2edd6..81b6843e9 100644
--- a/src/libffmpeg/libavcodec/fft.c
+++ b/src/libffmpeg/libavcodec/fft.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
@@ -26,13 +26,13 @@
 
 /**
  * The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is
- * done 
+ * done
  */
 int ff_fft_init(FFTContext *s, int nbits, int inverse)
 {
     int i, j, m, n;
     float alpha, c1, s1, s2;
-    
+
     s->nbits = nbits;
     n = 1 << nbits;
 
@@ -45,7 +45,7 @@ int ff_fft_init(FFTContext *s, int nbits, int inverse)
     s->inverse = inverse;
 
     s2 = inverse ? 1.0 : -1.0;
-        
+
     for(i=0;i<(n/2);i++) {
         alpha = 2 * M_PI * (float)i / (float)n;
         c1 = cos(alpha);
@@ -70,7 +70,7 @@ int ff_fft_init(FFTContext *s, int nbits, int inverse)
         if (has_vectors) {
             int np, nblocks, np2, l;
             FFTComplex *q;
-            
+
             np = 1 << nbits;
             nblocks = np >> 3;
             np2 = np >> 1;
@@ -144,13 +144,13 @@ int ff_fft_init(FFTContext *s, int nbits, int inverse)
 /**
  * Do a complex FFT with the parameters defined in ff_fft_init(). The
  * input data must be permuted before with s->revtab table. No
- * 1.0/sqrt(n) normalization is done.  
+ * 1.0/sqrt(n) normalization is done.
  */
 void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
 {
     int ln = s->nbits;
-    int	j, np, np2;
-    int	nblocks, nloops;
+    int j, np, np2;
+    int nblocks, nloops;
     register FFTComplex *p, *q;
     FFTComplex *exptab = s->exptab;
     int l;
@@ -163,29 +163,29 @@ void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
     p=&z[0];
     j=(np >> 1);
     do {
-        BF(p[0].re, p[0].im, p[1].re, p[1].im, 
+        BF(p[0].re, p[0].im, p[1].re, p[1].im,
            p[0].re, p[0].im, p[1].re, p[1].im);
         p+=2;
     } while (--j != 0);
 
     /* pass 1 */
 
-    
+
     p=&z[0];
     j=np >> 2;
     if (s->inverse) {
         do {
-            BF(p[0].re, p[0].im, p[2].re, p[2].im, 
+            BF(p[0].re, p[0].im, p[2].re, p[2].im,
                p[0].re, p[0].im, p[2].re, p[2].im);
-            BF(p[1].re, p[1].im, p[3].re, p[3].im, 
+            BF(p[1].re, p[1].im, p[3].re, p[3].im,
                p[1].re, p[1].im, -p[3].im, p[3].re);
             p+=4;
         } while (--j != 0);
     } else {
         do {
-            BF(p[0].re, p[0].im, p[2].re, p[2].im, 
+            BF(p[0].re, p[0].im, p[2].re, p[2].im,
                p[0].re, p[0].im, p[2].re, p[2].im);
-            BF(p[1].re, p[1].im, p[3].re, p[3].im, 
+            BF(p[1].re, p[1].im, p[3].re, p[3].im,
                p[1].re, p[1].im, p[3].im, -p[3].re);
             p+=4;
         } while (--j != 0);
@@ -201,7 +201,7 @@ void ff_fft_calc_c(FFTContext *s, FFTComplex *z)
         for (j = 0; j < nblocks; ++j) {
             BF(p->re, p->im, q->re, q->im,
                p->re, p->im, q->re, q->im);
-            
+
             p++;
             q++;
             for(l = nblocks; l < np2; l += nblocks) {
@@ -228,7 +228,7 @@ void ff_fft_permute(FFTContext *s, FFTComplex *z)
     int j, k, np;
     FFTComplex tmp;
     const uint16_t *revtab = s->revtab;
-    
+
     /* reverse */
     np = 1 << s->nbits;
     for(j=0;j<np;j++) {
diff --git a/src/libffmpeg/libavcodec/ffv1.c b/src/libffmpeg/libavcodec/ffv1.c
index 57ed9adb5..10ba21b4c 100644
--- a/src/libffmpeg/libavcodec/ffv1.c
+++ b/src/libffmpeg/libavcodec/ffv1.c
@@ -15,10 +15,10 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
- 
+
 /**
  * @file ffv1.c
  * FF Video Codec 1 (an experimental lossless codec)
@@ -144,8 +144,8 @@ static const int8_t quant13[256]={
 };
 
 static const uint8_t log2_run[32]={
- 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 
- 4, 4, 5, 5, 6, 6, 7, 7, 
+ 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 5, 5, 6, 6, 7, 7,
  8, 9,10,11,12,13,14,15,
 };
 
@@ -180,8 +180,8 @@ typedef struct FFV1Context{
     int16_t quant_table[5][256];
     int run_index;
     int colorspace;
-    
-    DSPContext dsp; 
+
+    DSPContext dsp;
 }FFV1Context;
 
 static always_inline int fold(int diff, int bits){
@@ -226,7 +226,7 @@ static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signe
         const int a= ABS(v);
         const int e= av_log2(a);
         put_rac(c, state+0, 0);
-        
+
         assert(e<=9);
 
         for(i=0; i<e; i++){
@@ -283,15 +283,15 @@ static inline void update_vlc_state(VlcState * const state, const int v){
 
     if(drift <= -count){
         if(state->bias > -128) state->bias--;
-        
+
         drift += count;
         if(drift <= -count)
             drift= -count + 1;
     }else if(drift > 0){
         if(state->bias <  127) state->bias++;
-        
+
         drift -= count;
-        if(drift > 0) 
+        if(drift > 0)
             drift= 0;
     }
 
@@ -319,7 +319,7 @@ static inline void put_vlc_symbol(PutBitContext *pb, VlcState * const state, int
 #else
      code= v ^ ((2*state->drift + state->count)>>31);
 #endif
-    
+
 //printf("v:%d/%d bias:%d error:%d drift:%d count:%d k:%d\n", v, code, state->bias, state->error_sum, state->drift, state->count, k);
     set_sr_golomb(pb, code, k, 12, bits);
 
@@ -348,7 +348,7 @@ static inline int get_vlc_symbol(GetBitContext *gb, VlcState * const state, int
 #endif
 
     ret= fold(v + state->bias, bits);
-    
+
     update_vlc_state(state, v);
 //printf("final: %d\n", ret);
     return ret;
@@ -376,7 +376,7 @@ static inline int encode_line(FFV1Context *s, int w, int_fast16_t *sample[2], in
 
     for(x=0; x<w; x++){
         int diff, context;
-        
+
         context= get_context(s, sample[0]+x, sample[1]+x, sample[2]+x);
         diff= sample[0][x] - predict(sample[0]+x, sample[1]+x);
 
@@ -386,12 +386,12 @@ static inline int encode_line(FFV1Context *s, int w, int_fast16_t *sample[2], in
         }
 
         diff= fold(diff, bits);
-        
+
         if(s->ac){
             put_symbol(c, p->state[context], diff, 1);
         }else{
             if(context == 0) run_mode=1;
-            
+
             if(run_mode){
 
                 if(diff){
@@ -400,7 +400,7 @@ static inline int encode_line(FFV1Context *s, int w, int_fast16_t *sample[2], in
                         run_index++;
                         put_bits(&s->pb, 1, 1);
                     }
-                    
+
                     put_bits(&s->pb, 1 + log2_run[run_index], run_count);
                     if(run_index) run_index--;
                     run_count=0;
@@ -410,7 +410,7 @@ static inline int encode_line(FFV1Context *s, int w, int_fast16_t *sample[2], in
                     run_count++;
                 }
             }
-            
+
 //            printf("count:%d index:%d, mode:%d, x:%d y:%d pos:%d\n", run_count, run_index, run_mode, x, y, (int)put_bits_count(&s->pb));
 
             if(run_mode == 0)
@@ -428,7 +428,7 @@ static inline int encode_line(FFV1Context *s, int w, int_fast16_t *sample[2], in
             put_bits(&s->pb, 1, 1);
     }
     s->run_index= run_index;
-    
+
     return 0;
 }
 
@@ -437,13 +437,13 @@ static void encode_plane(FFV1Context *s, uint8_t *src, int w, int h, int stride,
     const int ring_size= s->avctx->context_model ? 3 : 2;
     int_fast16_t sample_buffer[ring_size][w+6], *sample[ring_size];
     s->run_index=0;
-    
+
     memset(sample_buffer, 0, sizeof(sample_buffer));
-    
+
     for(y=0; y<h; y++){
         for(i=0; i<ring_size; i++)
             sample[i]= sample_buffer[(h+i-y)%ring_size]+3;
-        
+
         sample[0][-1]= sample[1][0  ];
         sample[1][ w]= sample[1][w-1];
 //{START_TIMER
@@ -460,9 +460,9 @@ static void encode_rgb_frame(FFV1Context *s, uint32_t *src, int w, int h, int st
     const int ring_size= s->avctx->context_model ? 3 : 2;
     int_fast16_t sample_buffer[3][ring_size][w+6], *sample[3][ring_size];
     s->run_index=0;
-    
+
     memset(sample_buffer, 0, sizeof(sample_buffer));
-    
+
     for(y=0; y<h; y++){
         for(i=0; i<ring_size; i++)
             for(p=0; p<3; p++)
@@ -473,13 +473,13 @@ static void encode_rgb_frame(FFV1Context *s, uint32_t *src, int w, int h, int st
             int b= v&0xFF;
             int g= (v>>8)&0xFF;
             int r= (v>>16)&0xFF;
-            
+
             b -= g;
             r -= g;
             g += (b + r)>>2;
             b += 0x100;
             r += 0x100;
-            
+
 //            assert(g>=0 && b>=0 && r>=0);
 //            assert(g<256 && b<512 && r<512);
             sample[0][0][x]= g;
@@ -515,10 +515,10 @@ static void write_header(FFV1Context *f){
     RangeCoder * const c= &f->c;
 
     memset(state, 128, sizeof(state));
-    
+
     put_symbol(c, state, f->version, 0);
     put_symbol(c, state, f->avctx->coder_type, 0);
-    put_symbol(c, state, f->colorspace, 0); //YUV cs type 
+    put_symbol(c, state, f->colorspace, 0); //YUV cs type
     put_rac(c, state, 1); //chroma planes
         put_symbol(c, state, f->chroma_h_shift, 0);
         put_symbol(c, state, f->chroma_v_shift, 0);
@@ -534,12 +534,12 @@ static int common_init(AVCodecContext *avctx){
 
     s->avctx= avctx;
     s->flags= avctx->flags;
-        
+
     dsputil_init(&s->dsp, avctx);
-    
+
     width= s->width= avctx->width;
     height= s->height= avctx->height;
-    
+
     assert(width && height);
 
     return 0;
@@ -555,12 +555,12 @@ static int encode_init(AVCodecContext *avctx)
                "use vstrict=-2 / -strict -2 to use it anyway\n");
         return -1;
     }
-        
+
     common_init(avctx);
- 
+
     s->version=0;
     s->ac= avctx->coder_type;
-    
+
     s->plane_count=2;
     for(i=0; i<256; i++){
         s->quant_table[0][i]=           quant11[i];
@@ -578,10 +578,10 @@ static int encode_init(AVCodecContext *avctx)
 
     for(i=0; i<s->plane_count; i++){
         PlaneContext * const p= &s->plane[i];
-               
+
         if(avctx->context_model==0){
             p->context_count= (11*11*11+1)/2;
-        }else{        
+        }else{
             p->context_count= (11*11*5*5*5+1)/2;
         }
 
@@ -611,7 +611,7 @@ static int encode_init(AVCodecContext *avctx)
     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
 
     s->picture_number=0;
-    
+
     return 0;
 }
 
@@ -624,7 +624,7 @@ static void clear_state(FFV1Context *f){
 
         p->interlace_bit_state[0]= 128;
         p->interlace_bit_state[1]= 128;
-        
+
         for(j=0; j<p->context_count; j++){
             if(f->ac){
                 memset(p->state[j], 128, sizeof(uint8_t)*CONTEXT_SIZE);
@@ -654,7 +654,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
 
     *p = *pict;
     p->pict_type= FF_I_TYPE;
-    
+
     if(avctx->gop_size==0 || f->picture_number % avctx->gop_size == 0){
         put_rac(c, &keystate, 1);
         p->key_frame= 1;
@@ -670,7 +670,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
 //printf("pos=%d\n", used_count);
         init_put_bits(&f->pb, buf + used_count, buf_size - used_count);
     }
-    
+
     if(f->colorspace==0){
         const int chroma_width = -((-width )>>f->chroma_h_shift);
         const int chroma_height= -((-height)>>f->chroma_v_shift);
@@ -683,7 +683,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
         encode_rgb_frame(f, (uint32_t*)(p->data[0]), width, height, p->linesize[0]/4);
     }
     emms_c();
-    
+
     f->picture_number++;
 
     if(f->ac){
@@ -695,7 +695,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
 }
 
 static void common_end(FFV1Context *s){
-    int i; 
+    int i;
 
     for(i=0; i<s->plane_count; i++){
         PlaneContext *p= &s->plane[i];
@@ -723,20 +723,20 @@ static inline void decode_line(FFV1Context *s, int w, int_fast16_t *sample[2], i
 
     for(x=0; x<w; x++){
         int diff, context, sign;
-         
+
         context= get_context(s, sample[1] + x, sample[0] + x, sample[1] + x);
         if(context < 0){
             context= -context;
             sign=1;
         }else
             sign=0;
-        
+
 
         if(s->ac){
             diff= get_symbol(c, p->state[context], 1);
         }else{
             if(context == 0 && run_mode==0) run_mode=1;
-            
+
             if(run_mode){
                 if(run_count==0 && run_mode==1){
                     if(get_bits1(&s->gb)){
@@ -759,7 +759,7 @@ static inline void decode_line(FFV1Context *s, int w, int_fast16_t *sample[2], i
                     diff=0;
             }else
                 diff= get_vlc_symbol(&s->gb, &p->vlc_state[context], bits);
-            
+
 //            printf("count:%d index:%d, mode:%d, x:%d y:%d pos:%d\n", run_count, run_index, run_mode, x, y, get_bits_count(&s->gb));
         }
 
@@ -767,7 +767,7 @@ static inline void decode_line(FFV1Context *s, int w, int_fast16_t *sample[2], i
 
         sample[1][x]= (predict(sample[1] + x, sample[0] + x) + diff) & ((1<<bits)-1);
     }
-    s->run_index= run_index;        
+    s->run_index= run_index;
 }
 
 static void decode_plane(FFV1Context *s, uint8_t *src, int w, int h, int stride, int plane_index){
@@ -776,9 +776,9 @@ static void decode_plane(FFV1Context *s, uint8_t *src, int w, int h, int stride,
     int_fast16_t *sample[2]= {sample_buffer[0]+3, sample_buffer[1]+3};
 
     s->run_index=0;
-    
+
     memset(sample_buffer, 0, sizeof(sample_buffer));
-    
+
     for(y=0; y<h; y++){
         int_fast16_t *temp= sample[0]; //FIXME try a normal buffer
 
@@ -787,7 +787,7 @@ static void decode_plane(FFV1Context *s, uint8_t *src, int w, int h, int stride,
 
         sample[1][-1]= sample[0][0  ];
         sample[0][ w]= sample[0][w-1];
-        
+
 //{START_TIMER
         decode_line(s, w, sample, plane_index, 8);
         for(x=0; x<w; x++){
@@ -806,9 +806,9 @@ static void decode_rgb_frame(FFV1Context *s, uint32_t *src, int w, int h, int st
         {sample_buffer[2][0]+3, sample_buffer[2][1]+3}};
 
     s->run_index=0;
-    
+
     memset(sample_buffer, 0, sizeof(sample_buffer));
-    
+
     for(y=0; y<h; y++){
         for(p=0; p<3; p++){
             int_fast16_t *temp= sample[p][0]; //FIXME try a normal buffer
@@ -827,13 +827,13 @@ static void decode_rgb_frame(FFV1Context *s, uint32_t *src, int w, int h, int st
 
 //            assert(g>=0 && b>=0 && r>=0);
 //            assert(g<256 && b<512 && r<512);
-            
+
             b -= 0x100;
             r -= 0x100;
             g -= (b + r)>>2;
             b += g;
             r += g;
-            
+
             src[x + stride*y]= b + (g<<8) + (r<<16);
         }
     }
@@ -850,7 +850,7 @@ static int read_quant_table(RangeCoder *c, int16_t *quant_table, int scale){
         int len= get_symbol(c, state, 0) + 1;
 
         if(len + i > 128) return -1;
-        
+
         while(len--){
             quant_table[i] = scale*v;
             i++;
@@ -863,7 +863,7 @@ static int read_quant_table(RangeCoder *c, int16_t *quant_table, int scale){
         quant_table[256-i]= -quant_table[i];
     }
     quant_table[128]= -quant_table[127];
-    
+
     return 2*v - 1;
 }
 
@@ -871,7 +871,7 @@ static int read_header(FFV1Context *f){
     uint8_t state[CONTEXT_SIZE];
     int i, context_count;
     RangeCoder * const c= &f->c;
-    
+
     memset(state, 128, sizeof(state));
 
     f->version= get_symbol(c, state, 0);
@@ -916,7 +916,7 @@ static int read_header(FFV1Context *f){
         }
     }
     context_count= (context_count+1)/2;
-    
+
     for(i=0; i<f->plane_count; i++){
         PlaneContext * const p= &f->plane[i];
 
@@ -928,7 +928,7 @@ static int read_header(FFV1Context *f){
             if(!p->vlc_state) p->vlc_state= av_malloc(p->context_count*sizeof(VlcState));
         }
     }
-    
+
     return 0;
 }
 
@@ -937,7 +937,7 @@ static int decode_init(AVCodecContext *avctx)
 //    FFV1Context *s = avctx->priv_data;
 
     common_init(avctx);
-    
+
     return 0;
 }
 
@@ -973,7 +973,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
 
     if(avctx->debug&FF_DEBUG_PICT_INFO)
         av_log(avctx, AV_LOG_ERROR, "keyframe:%d coder:%d\n", p->key_frame, f->ac);
-    
+
     if(!f->ac){
         bytes_read = c->bytestream - c->bytestream_start - 1;
         if(bytes_read ==0) av_log(avctx, AV_LOG_ERROR, "error at end of AC stream\n"); //FIXME
@@ -982,28 +982,28 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
     } else {
         bytes_read = 0; /* avoid warning */
     }
-    
+
     if(f->colorspace==0){
         const int chroma_width = -((-width )>>f->chroma_h_shift);
         const int chroma_height= -((-height)>>f->chroma_v_shift);
         decode_plane(f, p->data[0], width, height, p->linesize[0], 0);
-        
+
         decode_plane(f, p->data[1], chroma_width, chroma_height, p->linesize[1], 1);
         decode_plane(f, p->data[2], chroma_width, chroma_height, p->linesize[2], 1);
     }else{
         decode_rgb_frame(f, (uint32_t*)p->data[0], width, height, p->linesize[0]/4);
     }
-        
+
     emms_c();
 
     f->picture_number++;
 
     *picture= *p;
-    
+
     avctx->release_buffer(avctx, p); //FIXME
 
     *data_size = sizeof(AVFrame);
-    
+
     if(f->ac){
         bytes_read= c->bytestream - c->bytestream_start - 1;
         if(bytes_read ==0) av_log(f->avctx, AV_LOG_ERROR, "error at end of frame\n");
diff --git a/src/libffmpeg/libavcodec/flac.c b/src/libffmpeg/libavcodec/flac.c
index 9be1ac0de..97ac53745 100644
--- a/src/libffmpeg/libavcodec/flac.c
+++ b/src/libffmpeg/libavcodec/flac.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
@@ -30,9 +30,9 @@
  * 34-byte streaminfo structure through avctx->extradata[_size] followed
  * by data starting with the 0xFFF8 marker.
  */
- 
+
 #include <limits.h>
- 
+
 #include "avcodec.h"
 #include "bitstream.h"
 #include "golomb.h"
@@ -66,7 +66,7 @@ typedef struct FLACContext {
     uint8_t *bitstream;
     int bitstream_size;
     int bitstream_index;
-    int allocated_bitstream_size;
+    unsigned int allocated_bitstream_size;
 } FLACContext;
 
 #define METADATA_TYPE_STREAMINFO 0
@@ -74,14 +74,14 @@ typedef struct FLACContext {
 static int sample_rate_table[] =
 { 0, 0, 0, 0,
   8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000,
-  0, 0, 0, 0 }; 
+  0, 0, 0, 0 };
 
-static int sample_size_table[] = 
+static int sample_size_table[] =
 { 0, 8, 12, 0, 16, 20, 24, 0 };
 
 static int blocksize_table[] = {
-     0,    192, 576<<0, 576<<1, 576<<2, 576<<3,      0,      0, 
-256<<0, 256<<1, 256<<2, 256<<3, 256<<4, 256<<5, 256<<6, 256<<7 
+     0,    192, 576<<0, 576<<1, 576<<2, 576<<3,      0,      0,
+256<<0, 256<<1, 256<<2, 256<<3, 256<<4, 256<<5, 256<<6, 256<<7
 };
 
 static const uint8_t table_crc8[256] = {
@@ -123,18 +123,18 @@ static int64_t get_utf8(GetBitContext *gb)
 {
     uint64_t val;
     int ones=0, bytes;
-    
+
     while(get_bits1(gb))
         ones++;
 
     if     (ones==0) bytes=0;
     else if(ones==1) return -1;
     else             bytes= ones - 1;
-    
+
     val= get_bits(gb, 7-ones);
     while(bytes--){
         const int tmp = get_bits(gb, 8);
-        
+
         if((tmp>>6) != 2)
             return -1;
         val<<=6;
@@ -147,18 +147,18 @@ static int64_t get_utf8(GetBitContext *gb)
 static int skip_utf8(GetBitContext *gb)
 {
     int ones=0, bytes;
-    
+
     while(get_bits1(gb))
         ones++;
 
     if     (ones==0) bytes=0;
     else if(ones==1) return -1;
     else             bytes= ones - 1;
-    
+
     skip_bits(gb, 7-ones);
     while(bytes--){
         const int tmp = get_bits(gb, 8);
-        
+
         if((tmp>>6) != 2)
             return -1;
     }
@@ -169,7 +169,7 @@ static int skip_utf8(GetBitContext *gb)
 static int get_crc8(const uint8_t *buf, int count){
     int crc=0;
     int i;
-    
+
     for(i=0; i<count; i++){
         crc = table_crc8[crc ^ buf[i]];
     }
@@ -229,19 +229,19 @@ static void metadata_streaminfo(FLACContext *s)
 
     s->min_framesize = get_bits_long(&s->gb, 24);
     s->max_framesize = get_bits_long(&s->gb, 24);
-    
+
     s->samplerate = get_bits_long(&s->gb, 20);
     s->channels = get_bits(&s->gb, 3) + 1;
     s->bps = get_bits(&s->gb, 5) + 1;
-    
+
     s->avctx->channels = s->channels;
     s->avctx->sample_rate = s->samplerate;
 
     skip_bits(&s->gb, 36); /* total num of samples */
-    
+
     skip_bits(&s->gb, 64); /* md5 sum */
     skip_bits(&s->gb, 64); /* md5 sum */
-    
+
     allocate_buffers(s);
 }
 
@@ -255,12 +255,12 @@ static int decode_residuals(FLACContext *s, int channel, int pred_order)
         av_log(s->avctx, AV_LOG_DEBUG, "illegal residual coding method %d\n", method_type);
         return -1;
     }
-    
+
     rice_order = get_bits(&s->gb, 4);
 
     samples= s->blocksize >> rice_order;
 
-    sample= 
+    sample=
     i= pred_order;
     for (partition = 0; partition < (1 << rice_order); partition++)
     {
@@ -285,23 +285,23 @@ static int decode_residuals(FLACContext *s, int channel, int pred_order)
 //    av_log(s->avctx, AV_LOG_DEBUG, "partitions: %d, samples: %d\n", 1 << rice_order, sample);
 
     return 0;
-}    
+}
 
 static int decode_subframe_fixed(FLACContext *s, int channel, int pred_order)
 {
     int i;
-        
+
 //    av_log(s->avctx, AV_LOG_DEBUG, "  SUBFRAME FIXED\n");
-        
+
     /* warm up samples */
 //    av_log(s->avctx, AV_LOG_DEBUG, "   warm up samples: %d\n", pred_order);
-        
+
     for (i = 0; i < pred_order; i++)
     {
         s->decoded[channel][i] = get_sbits(&s->gb, s->curr_bps);
 //        av_log(s->avctx, AV_LOG_DEBUG, "    %d: %d\n", i, s->decoded[channel][i]);
     }
-    
+
     if (decode_residuals(s, channel, pred_order) < 0)
         return -1;
 
@@ -320,13 +320,13 @@ static int decode_subframe_fixed(FLACContext *s, int channel, int pred_order)
             break;
         case 3:
             for (i = pred_order; i < s->blocksize; i++)
-                s->decoded[channel][i] += 3*s->decoded[channel][i-1] 
+                s->decoded[channel][i] += 3*s->decoded[channel][i-1]
                                         - 3*s->decoded[channel][i-2]
                                         +   s->decoded[channel][i-3];
             break;
         case 4:
             for (i = pred_order; i < s->blocksize; i++)
-                s->decoded[channel][i] += 4*s->decoded[channel][i-1] 
+                s->decoded[channel][i] += 4*s->decoded[channel][i-1]
                                         - 6*s->decoded[channel][i-2]
                                         + 4*s->decoded[channel][i-3]
                                         -   s->decoded[channel][i-4];
@@ -344,18 +344,18 @@ static int decode_subframe_lpc(FLACContext *s, int channel, int pred_order)
     int sum, i, j;
     int coeff_prec, qlevel;
     int coeffs[pred_order];
-        
+
 //    av_log(s->avctx, AV_LOG_DEBUG, "  SUBFRAME LPC\n");
-        
+
     /* warm up samples */
 //    av_log(s->avctx, AV_LOG_DEBUG, "   warm up samples: %d\n", pred_order);
-        
+
     for (i = 0; i < pred_order; i++)
     {
         s->decoded[channel][i] = get_sbits(&s->gb, s->curr_bps);
 //        av_log(s->avctx, AV_LOG_DEBUG, "    %d: %d\n", i, s->decoded[channel][i]);
     }
-    
+
     coeff_prec = get_bits(&s->gb, 4) + 1;
     if (coeff_prec == 16)
     {
@@ -375,7 +375,7 @@ static int decode_subframe_lpc(FLACContext *s, int channel, int pred_order)
         coeffs[i] = get_sbits(&s->gb, coeff_prec);
 //        av_log(s->avctx, AV_LOG_DEBUG, "    %d: %d\n", i, coeffs[i]);
     }
-    
+
     if (decode_residuals(s, channel, pred_order) < 0)
         return -1;
 
@@ -386,7 +386,7 @@ static int decode_subframe_lpc(FLACContext *s, int channel, int pred_order)
             sum += coeffs[j] * s->decoded[channel][i-j-1];
         s->decoded[channel][i] += sum >> qlevel;
     }
-    
+
     return 0;
 }
 
@@ -394,7 +394,7 @@ static inline int decode_subframe(FLACContext *s, int channel)
 {
     int type, wasted = 0;
     int i, tmp;
-    
+
     s->curr_bps = s->bps;
     if(channel == 0){
         if(s->decorrelation == RIGHT_SIDE)
@@ -411,7 +411,7 @@ static inline int decode_subframe(FLACContext *s, int channel)
     }
     type = get_bits(&s->gb, 6);
 //    wasted = get_bits1(&s->gb);
-    
+
 //    if (wasted)
 //    {
 //        while (!get_bits1(&s->gb))
@@ -465,7 +465,7 @@ static inline int decode_subframe(FLACContext *s, int channel)
         av_log(s->avctx, AV_LOG_ERROR, "invalid coding type\n");
         return -1;
     }
-        
+
     if (wasted)
     {
         int i;
@@ -480,11 +480,11 @@ static int decode_frame(FLACContext *s)
 {
     int blocksize_code, sample_rate_code, sample_size_code, assignment, i, crc8;
     int decorrelation, bps, blocksize, samplerate;
-    
+
     blocksize_code = get_bits(&s->gb, 4);
 
     sample_rate_code = get_bits(&s->gb, 4);
-    
+
     assignment = get_bits(&s->gb, 4); /* channel assignment */
     if (assignment < 8 && s->channels == assignment+1)
         decorrelation = INDEPENDENT;
@@ -495,13 +495,13 @@ static int decode_frame(FLACContext *s)
         av_log(s->avctx, AV_LOG_ERROR, "unsupported channel assignment %d (channels=%d)\n", assignment, s->channels);
         return -1;
     }
-        
+
     sample_size_code = get_bits(&s->gb, 3);
     if(sample_size_code == 0)
         bps= s->bps;
     else if((sample_size_code != 3) && (sample_size_code != 7))
         bps = sample_size_table[sample_size_code];
-    else 
+    else
     {
         av_log(s->avctx, AV_LOG_ERROR, "invalid sample size code (%d)\n", sample_size_code);
         return -1;
@@ -512,25 +512,25 @@ static int decode_frame(FLACContext *s)
         av_log(s->avctx, AV_LOG_ERROR, "broken stream, invalid padding\n");
         return -1;
     }
-    
+
     if(get_utf8(&s->gb) < 0){
         av_log(s->avctx, AV_LOG_ERROR, "utf8 fscked\n");
         return -1;
     }
-#if 0    
+#if 0
     if (/*((blocksize_code == 6) || (blocksize_code == 7)) &&*/
         (s->min_blocksize != s->max_blocksize)){
     }else{
     }
 #endif
-    
+
     if (blocksize_code == 0)
         blocksize = s->min_blocksize;
     else if (blocksize_code == 6)
         blocksize = get_bits(&s->gb, 8)+1;
     else if (blocksize_code == 7)
         blocksize = get_bits(&s->gb, 16)+1;
-    else 
+    else
         blocksize = blocksize_table[blocksize_code];
 
     if(blocksize > s->max_blocksize){
@@ -559,7 +559,7 @@ static int decode_frame(FLACContext *s)
         av_log(s->avctx, AV_LOG_ERROR, "header crc mismatch crc=%2X\n", crc8);
         return -1;
     }
-    
+
     s->blocksize    = blocksize;
     s->samplerate   = samplerate;
     s->bps          = bps;
@@ -574,7 +574,7 @@ static int decode_frame(FLACContext *s)
         if (decode_subframe(s, i) < 0)
             return -1;
     }
-    
+
     align_get_bits(&s->gb);
 
     /* frame footer */
@@ -610,7 +610,7 @@ static int flac_decode_frame(AVCodecContext *avctx,
             buf= &s->bitstream[s->bitstream_index];
             buf_size += s->bitstream_size;
             s->bitstream_size= buf_size;
-            
+
             if(buf_size < s->max_framesize){
 //                printf("wanna more data ...\n");
                 return input_buf_size;
@@ -618,7 +618,7 @@ static int flac_decode_frame(AVCodecContext *avctx,
     }
 
     init_get_bits(&s->gb, buf, buf_size*8);
-    
+
     /* fLaC signature (be) */
     if (show_bits_long(&s->gb, 32) == bswap_32(ff_get_fourcc("fLaC")))
     {
@@ -629,7 +629,7 @@ static int flac_decode_frame(AVCodecContext *avctx,
             metadata_last = get_bits(&s->gb, 1);
             metadata_type = get_bits(&s->gb, 7);
             metadata_size = get_bits_long(&s->gb, 24);
-            
+
             av_log(s->avctx, AV_LOG_DEBUG, " metadata block: flag = %d, type = %d, size = %d\n",
                 metadata_last, metadata_type,
                 metadata_size);
@@ -647,7 +647,7 @@ static int flac_decode_frame(AVCodecContext *avctx,
                         init_get_bits(&s->gb, buf, buf_size*8);
                         skip_bits(&s->gb, bits_count);
                     }
- 
+
                     dump_headers(s);
                     break;}
                 default:
@@ -659,7 +659,7 @@ static int flac_decode_frame(AVCodecContext *avctx,
     }
     else
     {
-        
+
         tmp = show_bits(&s->gb, 16);
         if(tmp != 0xFFF8){
             av_log(s->avctx, AV_LOG_ERROR, "FRAME HEADER not here\n");
@@ -676,7 +676,7 @@ static int flac_decode_frame(AVCodecContext *avctx,
         }
     }
 
-    
+
 #if 0
     /* fix the channel order here */
     if (s->order == MID_SIDE)
@@ -757,7 +757,7 @@ static int flac_decode_frame(AVCodecContext *avctx,
                 *(samples++) = mid + side;
                 *(samples++) = mid;
 #else
-                
+
                 mid <<= 1;
                 if (side & 1)
                     mid++;
@@ -786,7 +786,7 @@ end:
         s->bitstream_index += i;
         s->bitstream_size  -= i;
         return input_buf_size;
-    }else 
+    }else
         return i;
 }
 
@@ -794,13 +794,13 @@ static int flac_decode_close(AVCodecContext *avctx)
 {
     FLACContext *s = avctx->priv_data;
     int i;
-    
+
     for (i = 0; i < s->channels; i++)
     {
         av_freep(&s->decoded[i]);
     }
     av_freep(&s->bitstream);
-    
+
     return 0;
 }
 
@@ -820,5 +820,5 @@ AVCodec flac_decoder = {
     NULL,
     flac_decode_close,
     flac_decode_frame,
-    .flush= flac_flush,    
+    .flush= flac_flush,
 };
diff --git a/src/libffmpeg/libavcodec/flicvideo.c b/src/libffmpeg/libavcodec/flicvideo.c
index 60d1849ef..fa128d0d3 100644
--- a/src/libffmpeg/libavcodec/flicvideo.c
+++ b/src/libffmpeg/libavcodec/flicvideo.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -87,7 +87,7 @@ static int flic_decode_init(AVCodecContext *avctx)
 
     s->fli_type = LE_16(&fli_header[4]); /* Might be overridden if a Magic Carpet FLC */
     depth       = LE_16(&fli_header[12]);
-    
+
     if (depth == 0) {
       depth = 8; /* Some FLC generators set depth to zero, when they mean 8Bpp. Fix up here */
     }
@@ -115,7 +115,7 @@ static int flic_decode_init(AVCodecContext *avctx)
         default :
                   av_log(avctx, AV_LOG_ERROR, "Unkown FLC/FLX depth of %d Bpp is unsupported.\n",depth);
                   return -1;
-    }             
+    }
 
     s->frame.data[0] = NULL;
     s->new_palette = 0;
@@ -159,7 +159,7 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
     int pixel_countdown;
     unsigned char *pixels;
     int pixel_limit;
-    
+
     s->frame.reference = 1;
     s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | FF_BUFFER_HINTS_REUSABLE;
     if (avctx->reget_buffer(avctx, &s->frame) < 0) {
@@ -190,8 +190,8 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
             stream_ptr_after_color_chunk = stream_ptr + chunk_size - 6;
             s->new_palette = 1;
 
-            /* check special case: If this file is from the Magic Carpet 
-             * game and uses 6-bit colors even though it reports 256-color 
+            /* check special case: If this file is from the Magic Carpet
+             * game and uses 6-bit colors even though it reports 256-color
              * chunks in a 0xAF12-type file (fli_type is set to 0xAF13 during
              * initialization) */
             if ((chunk_type == FLI_256_COLOR) && (s->fli_type != FLC_MAGIC_CARPET_SYNTHETIC_TYPE_CODE))
@@ -415,7 +415,7 @@ static int flic_decode_frame_8BPP(AVCodecContext *avctx,
     return buf_size;
 }
 
-int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
+static int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
                                       void *data, int *data_size,
                                       uint8_t *buf, int buf_size)
 {
@@ -543,7 +543,7 @@ int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
                  * pixels on a row */
                 stream_ptr++;
                 pixel_countdown = (s->avctx->width * 2);
-                
+
                 while (pixel_countdown > 0) {
                     byte_run = buf[stream_ptr++];
                     if (byte_run > 0) {
@@ -572,10 +572,10 @@ int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
 
                 /* Now FLX is strange, in that it is "byte" as opposed to "pixel" run length compressed.
                  * This doesnt give us any good oportunity to perform word endian conversion
-                 * during decompression. So if its requried (ie, this isnt a LE target, we do 
+                 * during decompression. So if its requried (ie, this isnt a LE target, we do
                  * a second pass over the line here, swapping the bytes.
                  */
-                pixel = 0xFF00; 
+                pixel = 0xFF00;
                 if (0xFF00 != LE_16(&pixel)) /* Check if its not an LE Target */
                 {
                   pixel_ptr = y_ptr;
@@ -584,7 +584,7 @@ int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
                     *((signed short*)(&pixels[pixel_ptr])) = LE_16(&buf[pixel_ptr]);
                     pixel_ptr += 2;
                   }
-                }  
+                }
                 y_ptr += s->frame.linesize[0];
             }
             break;
@@ -597,7 +597,7 @@ int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
                  * pixels on a row */
                 stream_ptr++;
                 pixel_countdown = s->avctx->width; /* Width is in pixels, not bytes */
-                
+
                 while (pixel_countdown > 0) {
                     byte_run = buf[stream_ptr++];
                     if (byte_run > 0) {
@@ -606,7 +606,7 @@ int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
                         CHECK_PIXEL_PTR(byte_run);
                         for (j = 0; j < byte_run; j++) {
                             *((signed short*)(&pixels[pixel_ptr])) = pixel;
-                            pixel_ptr += 2;                            
+                            pixel_ptr += 2;
                             pixel_countdown--;
                             if (pixel_countdown < 0)
                                 av_log(avctx, AV_LOG_ERROR, "pixel_countdown < 0 (%d)\n",
@@ -639,7 +639,7 @@ int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
                        "bigger than image, skipping chunk\n", chunk_size - 6);
                 stream_ptr += chunk_size - 6;
             } else {
-                
+
                 for (y_ptr = 0; y_ptr < s->frame.linesize[0] * s->avctx->height;
                      y_ptr += s->frame.linesize[0]) {
 
@@ -649,7 +649,7 @@ int flic_decode_frame_15_16BPP(AVCodecContext *avctx,
                       *((signed short*)(&pixels[y_ptr + pixel_ptr])) = LE_16(&buf[stream_ptr+pixel_ptr]);
                       pixel_ptr += 2;
                       pixel_countdown--;
-                    }  
+                    }
                     stream_ptr += s->avctx->width*2;
                 }
             }
@@ -702,7 +702,7 @@ static int flic_decode_frame(AVCodecContext *avctx,
              (avctx->pix_fmt == PIX_FMT_RGB565)) {
       return flic_decode_frame_15_16BPP(avctx, data, data_size,
                                         buf, buf_size);
-    }                                        
+    }
     else if (avctx->pix_fmt == PIX_FMT_BGR24) {
       return flic_decode_frame_24BPP(avctx, data, data_size,
                                      buf, buf_size);
@@ -711,10 +711,10 @@ static int flic_decode_frame(AVCodecContext *avctx,
     /* Shouldnt get  here, ever as the pix_fmt is processed */
     /* in flic_decode_init and the above if should deal with */
     /* the finite set of possibilites allowable by here. */
-    /* but in case we do, just error out. */    
+    /* but in case we do, just error out. */
     av_log(avctx, AV_LOG_ERROR, "Unknown Format of FLC. My Science cant explain how this happened\n");
     return -1;
-}                             
+}
 
 
 static int flic_decode_end(AVCodecContext *avctx)
diff --git a/src/libffmpeg/libavcodec/fraps.c b/src/libffmpeg/libavcodec/fraps.c
index 3b18c7249..d107e47b1 100644
--- a/src/libffmpeg/libavcodec/fraps.c
+++ b/src/libffmpeg/libavcodec/fraps.c
@@ -14,22 +14,22 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
- 
+
 /**
  * @file fraps.c
  * Lossless Fraps 'FPS1' decoder
  * @author Roine Gustafsson <roine at users sf net>
- * 
+ *
  * Only decodes version 0 and 1 files.
  * Codec algorithm for version 0 is taken from Transcode <www.transcoding.org>
  *
  * Version 2 files, which are the most commonly found Fraps files, cannot be
  * decoded yet.
  */
- 
+
 #include "avcodec.h"
 
 #define FPS_TAG MKTAG('F', 'P', 'S', 'x')
@@ -57,7 +57,7 @@ static int decode_init(AVCodecContext *avctx)
     avctx->pix_fmt= PIX_FMT_NONE; /* set in decode_frame */
 
     s->avctx = avctx;
-    s->frame.data[0] = NULL;    
+    s->frame.data[0] = NULL;
 
     return 0;
 }
@@ -72,7 +72,7 @@ static int decode_init(AVCodecContext *avctx)
  * @param buf_size size of input data frame
  * @return number of consumed bytes on success or negative if decode fails
  */
-static int decode_frame(AVCodecContext *avctx, 
+static int decode_frame(AVCodecContext *avctx,
                         void *data, int *data_size,
                         uint8_t *buf, int buf_size)
 {
@@ -91,7 +91,7 @@ static int decode_frame(AVCodecContext *avctx,
     header_size = (header & (1<<30))? 8 : 4; /* bit 30 means pad to 8 bytes */
 
     if (version > 1) {
-        av_log(avctx, AV_LOG_ERROR, 
+        av_log(avctx, AV_LOG_ERROR,
                "This file is encoded with Fraps version %d. " \
                "This codec can only decode version 0 and 1.\n", version);
         return -1;
@@ -100,40 +100,40 @@ static int decode_frame(AVCodecContext *avctx,
     buf+=4;
     if (header_size == 8)
         buf+=4;
-        
+
     switch(version) {
     case 0:
     default:
         /* Fraps v0 is a reordered YUV420 */
         avctx->pix_fmt = PIX_FMT_YUV420P;
 
-        if ( (buf_size != avctx->width*avctx->height*3/2+header_size) && 
+        if ( (buf_size != avctx->width*avctx->height*3/2+header_size) &&
              (buf_size != header_size) ) {
             av_log(avctx, AV_LOG_ERROR,
-                   "Invalid frame length %d (should be %d)\n", 
+                   "Invalid frame length %d (should be %d)\n",
                    buf_size, avctx->width*avctx->height*3/2+header_size);
             return -1;
         }
-        
+
         if (( (avctx->width % 8) != 0) || ( (avctx->height % 2) != 0 )) {
-            av_log(avctx, AV_LOG_ERROR, "Invalid frame size %dx%d\n", 
+            av_log(avctx, AV_LOG_ERROR, "Invalid frame size %dx%d\n",
                    avctx->width, avctx->height);
             return -1;
         }
 
-        f->reference = 1; 
-        f->buffer_hints = FF_BUFFER_HINTS_VALID | 
-                          FF_BUFFER_HINTS_PRESERVE | 
+        f->reference = 1;
+        f->buffer_hints = FF_BUFFER_HINTS_VALID |
+                          FF_BUFFER_HINTS_PRESERVE |
                           FF_BUFFER_HINTS_REUSABLE;
         if (avctx->reget_buffer(avctx, f)) {
             av_log(avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
             return -1;
-        }        
+        }
         /* bit 31 means same as previous pic */
-        f->pict_type = (header & (1<<31))? FF_P_TYPE : FF_I_TYPE; 
+        f->pict_type = (header & (1<<31))? FF_P_TYPE : FF_I_TYPE;
         f->key_frame = f->pict_type == FF_I_TYPE;
 
-        if (f->pict_type == FF_I_TYPE) { 
+        if (f->pict_type == FF_I_TYPE) {
             buf32=(uint32_t*)buf;
             for(y=0; y<avctx->height/2; y++){
                 luma1=(uint32_t*)&f->data[0][ y*2*f->linesize[0] ];
@@ -156,9 +156,9 @@ static int decode_frame(AVCodecContext *avctx,
         /* Fraps v1 is an upside-down BGR24 */
         avctx->pix_fmt = PIX_FMT_BGR24;
 
-        if ( (buf_size != avctx->width*avctx->height*3+header_size) && 
+        if ( (buf_size != avctx->width*avctx->height*3+header_size) &&
              (buf_size != header_size) ) {
-            av_log(avctx, AV_LOG_ERROR, 
+            av_log(avctx, AV_LOG_ERROR,
                    "Invalid frame length %d (should be %d)\n",
                    buf_size, avctx->width*avctx->height*3+header_size);
             return -1;
diff --git a/src/libffmpeg/libavcodec/g726.c b/src/libffmpeg/libavcodec/g726.c
index efc3c5fae..8114fe0f3 100644
--- a/src/libffmpeg/libavcodec/g726.c
+++ b/src/libffmpeg/libavcodec/g726.c
@@ -1,9 +1,9 @@
 /*
- * G.726 ADPCM audio codec 
+ * G.726 ADPCM audio codec
  * Copyright (c) 2004 Roman Shaposhnik.
  *
  * This is a very straightforward rendition of the G.726
- * Section 4 "Computational Details". 
+ * Section 4 "Computational Details".
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -17,7 +17,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include <limits.h>
 #include "avcodec.h"
@@ -26,35 +26,35 @@
 
 /**
  * G.726 11bit float.
- * G.726 Standard uses rather odd 11bit floating point arithmentic for 
+ * G.726 Standard uses rather odd 11bit floating point arithmentic for
  * numerous occasions. It's a mistery to me why they did it this way
  * instead of simply using 32bit integer arithmetic.
  */
 typedef struct Float11 {
-	int sign;   /**< 1bit sign */
-	int exp;    /**< 4bit exponent */
-	int mant;   /**< 6bit mantissa */
+        int sign;   /**< 1bit sign */
+        int exp;    /**< 4bit exponent */
+        int mant;   /**< 6bit mantissa */
 } Float11;
 
 static inline Float11* i2f(int16_t i, Float11* f)
 {
-	f->sign = (i < 0);
-	if (f->sign)
-		i = -i;
-	f->exp = av_log2_16bit(i) + !!i;
-	f->mant = i? (i<<6) >> f->exp : 
-		         1<<5;
-	return f;
+        f->sign = (i < 0);
+        if (f->sign)
+                i = -i;
+        f->exp = av_log2_16bit(i) + !!i;
+        f->mant = i? (i<<6) >> f->exp :
+                         1<<5;
+        return f;
 }
 
 static inline int16_t mult(Float11* f1, Float11* f2)
 {
-	int res, exp;
+        int res, exp;
 
-	exp = f1->exp + f2->exp;
-	res = (((f1->mant * f2->mant) + 0x30) >> 4) << 7;
-	res = exp > 26 ? res << (exp - 26) : res >> (26 - exp);
-	return (f1->sign ^ f2->sign) ? -res : res;
+        exp = f1->exp + f2->exp;
+        res = (((f1->mant * f2->mant) + 0x30) >> 4) << 7;
+        res = exp > 26 ? res << (exp - 26) : res >> (26 - exp);
+        return (f1->sign ^ f2->sign) ? -res : res;
 }
 
 static inline int sgn(int value)
@@ -63,94 +63,94 @@ static inline int sgn(int value)
 }
 
 typedef struct G726Tables {
-	int  bits;            /**< bits per sample */
-	int* quant;           /**< quantization table */
-	int* iquant;          /**< inverse quantization table */
-	int* W;               /**< special table #1 ;-) */
-	int* F;               /**< special table #2 */
+        int  bits;            /**< bits per sample */
+        int* quant;           /**< quantization table */
+        int* iquant;          /**< inverse quantization table */
+        int* W;               /**< special table #1 ;-) */
+        int* F;               /**< special table #2 */
 } G726Tables;
 
 typedef struct G726Context {
-	 G726Tables* tbls;    /**< static tables needed for computation */
-	 
-	 Float11 sr[2];       /**< prev. reconstructed samples */
-	 Float11 dq[6];       /**< prev. difference */
-	 int a[2];            /**< second order predictor coeffs */
-	 int b[6];            /**< sixth order predictor coeffs */
-	 int pk[2];           /**< signs of prev. 2 sez + dq */
-	 
-	 int ap;              /**< scale factor control */
-	 int yu;              /**< fast scale factor */
-	 int yl;              /**< slow scale factor */
-	 int dms;             /**< short average magnitude of F[i] */
-	 int dml;             /**< long average magnitude of F[i] */
-	 int td;              /**< tone detect */
-
-	 int se;              /**< estimated signal for the next iteration */
-	 int sez;             /**< estimated second order prediction */
-	 int y;               /**< quantizer scaling factor for the next iteration */
+         G726Tables* tbls;    /**< static tables needed for computation */
+
+         Float11 sr[2];       /**< prev. reconstructed samples */
+         Float11 dq[6];       /**< prev. difference */
+         int a[2];            /**< second order predictor coeffs */
+         int b[6];            /**< sixth order predictor coeffs */
+         int pk[2];           /**< signs of prev. 2 sez + dq */
+
+         int ap;              /**< scale factor control */
+         int yu;              /**< fast scale factor */
+         int yl;              /**< slow scale factor */
+         int dms;             /**< short average magnitude of F[i] */
+         int dml;             /**< long average magnitude of F[i] */
+         int td;              /**< tone detect */
+
+         int se;              /**< estimated signal for the next iteration */
+         int sez;             /**< estimated second order prediction */
+         int y;               /**< quantizer scaling factor for the next iteration */
 } G726Context;
 
 static int quant_tbl16[] =                       /**< 16kbit/s 2bits per sample */
-           { 260, INT_MAX }; 
+           { 260, INT_MAX };
 static int iquant_tbl16[] =
            { 116, 365, 365, 116 };
-static int W_tbl16[] = 
+static int W_tbl16[] =
            { -22, 439, 439, -22 };
 static int F_tbl16[] =
            { 0, 7, 7, 0 };
-	   
+
 static int quant_tbl24[] =                       /**< 24kbit/s 3bits per sample */
            {  7, 217, 330, INT_MAX };
 static int iquant_tbl24[] =
            { INT_MIN, 135, 273, 373, 373, 273, 135, INT_MIN };
-static int W_tbl24[] = 
-           { -4,  30, 137, 582, 582, 137,  30, -4 }; 
+static int W_tbl24[] =
+           { -4,  30, 137, 582, 582, 137,  30, -4 };
 static int F_tbl24[] =
            { 0, 1, 2, 7, 7, 2, 1, 0 };
-	   
+
 static int quant_tbl32[] =                       /**< 32kbit/s 4bits per sample */
            { -125,  79, 177, 245, 299, 348, 399, INT_MAX };
 static int iquant_tbl32[] =
-           { INT_MIN,   4, 135, 213, 273, 323, 373, 425,  
-	         425, 373, 323, 273, 213, 135,   4, INT_MIN };
-static int W_tbl32[] = 
+           { INT_MIN,   4, 135, 213, 273, 323, 373, 425,
+                 425, 373, 323, 273, 213, 135,   4, INT_MIN };
+static int W_tbl32[] =
            { -12,  18,  41,  64, 112, 198, 355, 1122,
-	    1122, 355, 198, 112,  64,  41,  18, -12};
-static int F_tbl32[] = 
+            1122, 355, 198, 112,  64,  41,  18, -12};
+static int F_tbl32[] =
            { 0, 0, 0, 1, 1, 1, 3, 7, 7, 3, 1, 1, 1, 0, 0, 0 };
-	   
+
 static int quant_tbl40[] =                      /**< 40kbit/s 5bits per sample */
            { -122, -16,  67, 138, 197, 249, 297, 338,
-	      377, 412, 444, 474, 501, 527, 552, INT_MAX };
+              377, 412, 444, 474, 501, 527, 552, INT_MAX };
 static int iquant_tbl40[] =
-           { INT_MIN, -66,  28, 104, 169, 224, 274, 318,  
-	         358, 395, 429, 459, 488, 514, 539, 566,
-	         566, 539, 514, 488, 459, 429, 395, 358,
-	         318, 274, 224, 169, 104,  28, -66, INT_MIN };
-static int W_tbl40[] = 
+           { INT_MIN, -66,  28, 104, 169, 224, 274, 318,
+                 358, 395, 429, 459, 488, 514, 539, 566,
+                 566, 539, 514, 488, 459, 429, 395, 358,
+                 318, 274, 224, 169, 104,  28, -66, INT_MIN };
+static int W_tbl40[] =
            {   14,  14,  24,  39,  40,  41,   58,  100,
-	      141, 179, 219, 280, 358, 440,  529,  696, 
-	      696, 529, 440, 358, 280, 219,  179,  141,
-	      100,  58,  41,  40,  39,  24,   14,   14 };
-static int F_tbl40[] = 
+              141, 179, 219, 280, 358, 440,  529,  696,
+              696, 529, 440, 358, 280, 219,  179,  141,
+              100,  58,  41,  40,  39,  24,   14,   14 };
+static int F_tbl40[] =
            { 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 3, 4, 5, 6, 6,
-	     6, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
+             6, 6, 5, 4, 3, 2, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 };
 
-static G726Tables G726Tables_pool[] = 
+static G726Tables G726Tables_pool[] =
            {{ 2, quant_tbl16, iquant_tbl16, W_tbl16, F_tbl16 },
-	    { 3, quant_tbl24, iquant_tbl24, W_tbl24, F_tbl24 },
-	    { 4, quant_tbl32, iquant_tbl32, W_tbl32, F_tbl32 },
+            { 3, quant_tbl24, iquant_tbl24, W_tbl24, F_tbl24 },
+            { 4, quant_tbl32, iquant_tbl32, W_tbl32, F_tbl32 },
             { 5, quant_tbl40, iquant_tbl40, W_tbl40, F_tbl40 }};
-					       
+
 
 /**
- * Para 4.2.2 page 18: Adaptive quantizer. 
+ * Para 4.2.2 page 18: Adaptive quantizer.
  */
 static inline uint8_t quant(G726Context* c, int d)
 {
    int sign, exp, i, dln;
-   
+
    sign = i = 0;
    if (d < 0) {
        sign = 1;
@@ -158,16 +158,16 @@ static inline uint8_t quant(G726Context* c, int d)
    }
    exp = av_log2_16bit(d);
    dln = ((exp<<7) + (((d<<7)>>exp)&0x7f)) - (c->y>>2);
-   
+
    while (c->tbls->quant[i] < INT_MAX && c->tbls->quant[i] < dln)
         ++i;
-   
+
    if (sign)
        i = ~i;
    if (c->tbls->bits != 2 && i == 0) /* I'm not sure this is a good idea */
        i = 0xff;
 
-   return i; 
+   return i;
 }
 
 /**
@@ -176,18 +176,18 @@ static inline uint8_t quant(G726Context* c, int d)
 static inline int16_t inverse_quant(G726Context* c, int i)
 {
     int dql, dex, dqt;
-	
+
     dql = c->tbls->iquant[i] + (c->y >> 2);
     dex = (dql>>7) & 0xf;        /* 4bit exponent */
     dqt = (1<<7) + (dql & 0x7f); /* log2 -> linear */
-    return (dql < 0) ? 0 : ((dqt<<7) >> (14-dex)); 
+    return (dql < 0) ? 0 : ((dqt<<7) >> (14-dex));
 }
 
 static inline int16_t g726_iterate(G726Context* c, int16_t I)
 {
     int dq, re_signal, pk0, fa1, i, tr, ylint, ylfrac, thr2, al, dq0;
     Float11 f;
-    
+
     dq = inverse_quant(c, I);
     if (I >> (c->tbls->bits - 1))  /* get the sign */
         dq = -dq;
@@ -201,26 +201,26 @@ static inline int16_t g726_iterate(G726Context* c, int16_t I)
         tr = 1;
     else
         tr = 0;
-    
+
     /* Update second order predictor coefficient A2 and A1 */
     pk0 = (c->sez + dq) ? sgn(c->sez + dq) : 0;
     dq0 = dq ? sgn(dq) : 0;
     if (tr) {
         c->a[0] = 0;
-	c->a[1] = 0;
+        c->a[1] = 0;
         for (i=0; i<6; i++)
-	   c->b[i] = 0;
+           c->b[i] = 0;
     } else {
-	/* This is a bit crazy, but it really is +255 not +256 */
-	fa1 = clip((-c->a[0]*c->pk[0]*pk0)>>5, -256, 255);
-	
-	c->a[1] += 128*pk0*c->pk[1] + fa1 - (c->a[1]>>7);
-	c->a[1] = clip(c->a[1], -12288, 12288);
+        /* This is a bit crazy, but it really is +255 not +256 */
+        fa1 = clip((-c->a[0]*c->pk[0]*pk0)>>5, -256, 255);
+
+        c->a[1] += 128*pk0*c->pk[1] + fa1 - (c->a[1]>>7);
+        c->a[1] = clip(c->a[1], -12288, 12288);
         c->a[0] += 64*3*pk0*c->pk[0] - (c->a[0] >> 8);
-	c->a[0] = clip(c->a[0], -(15360 - c->a[1]), 15360 - c->a[1]);
+        c->a[0] = clip(c->a[0], -(15360 - c->a[1]), 15360 - c->a[1]);
 
         for (i=0; i<6; i++)
-	     c->b[i] += 128*dq0*sgn(-c->dq[i].sign) - (c->b[i]>>8);
+             c->b[i] += 128*dq0*sgn(-c->dq[i].sign) - (c->b[i]>>8);
     }
 
     /* Update Dq and Sr and Pk */
@@ -232,28 +232,28 @@ static inline int16_t g726_iterate(G726Context* c, int16_t I)
        c->dq[i] = c->dq[i-1];
     i2f(dq, &c->dq[0]);
     c->dq[0].sign = I >> (c->tbls->bits - 1); /* Isn't it crazy ?!?! */
-    
+
     /* Update tone detect [I'm not sure 'tr == 0' is really needed] */
-    c->td = (tr == 0 && c->a[1] < -11776); 
-       
+    c->td = (tr == 0 && c->a[1] < -11776);
+
     /* Update Ap */
     c->dms += ((c->tbls->F[I]<<9) - c->dms) >> 5;
     c->dml += ((c->tbls->F[I]<<11) - c->dml) >> 7;
-    if (tr) 
+    if (tr)
        c->ap = 256;
     else if (c->y > 1535 && !c->td && (abs((c->dms << 2) - c->dml) < (c->dml >> 3)))
        c->ap += (-c->ap) >> 4;
     else
-       c->ap += (0x200 - c->ap) >> 4; 
+       c->ap += (0x200 - c->ap) >> 4;
 
     /* Update Yu and Yl */
     c->yu = clip(c->y + (((c->tbls->W[I] << 5) - c->y) >> 5), 544, 5120);
     c->yl += c->yu + ((-c->yl)>>6);
- 
+
     /* Next iteration for Y */
     al = (c->ap >= 256) ? 1<<6 : c->ap >> 2;
     c->y = (c->yl + (c->yu - (c->yl>>6))*al) >> 6;
-	
+
     /* Next iteration for SE and SEZ */
     c->se = 0;
     for (i=0; i<6; i++)
@@ -302,7 +302,7 @@ static int16_t g726_decode(G726Context* c, int16_t i)
 static int16_t g726_encode(G726Context* c, int16_t sig)
 {
    uint8_t i;
-   
+
    i = quant(c, sig/4 - c->se) & ((1<<c->tbls->bits) - 1);
    g726_iterate(c, i);
    return i;
@@ -320,16 +320,16 @@ typedef struct AVG726Context {
 static int g726_init(AVCodecContext * avctx)
 {
     AVG726Context* c = (AVG726Context*)avctx->priv_data;
-    
+
     if (avctx->channels != 1 ||
         (avctx->bit_rate != 16000 && avctx->bit_rate != 24000 &&
-	 avctx->bit_rate != 32000 && avctx->bit_rate != 40000)) {
+         avctx->bit_rate != 32000 && avctx->bit_rate != 40000)) {
         av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n");
-	return -1;
+        return -1;
     }
     if (avctx->sample_rate != 8000 && avctx->strict_std_compliance>FF_COMPLIANCE_INOFFICIAL) {
         av_log(avctx, AV_LOG_ERROR, "G726: unsupported audio format\n");
-	return -1;
+        return -1;
     }
     g726_reset(&c->c, avctx->bit_rate);
     c->code_size = c->c.tbls->bits;
@@ -364,8 +364,8 @@ static int g726_encode_frame(AVCodecContext *avctx,
 
     flush_put_bits(&pb);
 
-    return put_bits_count(&pb)>>3; 
-}		
+    return put_bits_count(&pb)>>3;
+}
 
 static int g726_decode_frame(AVCodecContext *avctx,
                              void *data, int *data_size,
@@ -375,25 +375,25 @@ static int g726_decode_frame(AVCodecContext *avctx,
     short *samples = data;
     uint8_t code;
     uint8_t mask;
-    GetBitContext gb; 
-   
+    GetBitContext gb;
+
     if (!buf_size)
         goto out;
-    
+
     mask = (1<<c->code_size) - 1;
     init_get_bits(&gb, buf, buf_size * 8);
     if (c->bits_left) {
         int s = c->code_size - c->bits_left;;
-	code = (c->bit_buffer << s) | get_bits(&gb, s);
-	*samples++ = g726_decode(&c->c, code & mask);
+        code = (c->bit_buffer << s) | get_bits(&gb, s);
+        *samples++ = g726_decode(&c->c, code & mask);
     }
-    
+
     while (get_bits_count(&gb) + c->code_size <= buf_size*8)
-	*samples++ = g726_decode(&c->c, get_bits(&gb, c->code_size) & mask);
-    
+        *samples++ = g726_decode(&c->c, get_bits(&gb, c->code_size) & mask);
+
     c->bits_left = buf_size*8 - get_bits_count(&gb);
     c->bit_buffer = get_bits(&gb, c->bits_left);
-    
+
 out:
     *data_size = (uint8_t*)samples - (uint8_t*)data;
     return buf_size;
diff --git a/src/libffmpeg/libavcodec/golomb.c b/src/libffmpeg/libavcodec/golomb.c
index a63f82280..c140b8b07 100644
--- a/src/libffmpeg/libavcodec/golomb.c
+++ b/src/libffmpeg/libavcodec/golomb.c
@@ -14,19 +14,19 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
- 
+
 /**
  * @file golomb.c
- * @brief 
+ * @brief
  *     exp golomb vlc stuff
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
 
 #include "common.h"
- 
+
 const uint8_t ff_golomb_vlc_len[512]={
 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
@@ -46,7 +46,7 @@ const uint8_t ff_golomb_vlc_len[512]={
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1
 };
 
-const uint8_t ff_ue_golomb_vlc_code[512]={ 
+const uint8_t ff_ue_golomb_vlc_code[512]={
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,
  7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9,10,10,10,10,11,11,11,11,12,12,12,12,13,13,13,13,14,14,14,14,
  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
@@ -65,7 +65,7 @@ const uint8_t ff_ue_golomb_vlc_code[512]={
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
 };
 
-const int8_t ff_se_golomb_vlc_code[512]={ 
+const int8_t ff_se_golomb_vlc_code[512]={
   0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  8, -8,  9, -9, 10,-10, 11,-11, 12,-12, 13,-13, 14,-14, 15,-15,
   4,  4,  4,  4, -4, -4, -4, -4,  5,  5,  5,  5, -5, -5, -5, -5,  6,  6,  6,  6, -6, -6, -6, -6,  7,  7,  7,  7, -7, -7, -7, -7,
   2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2,  2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
@@ -85,7 +85,7 @@ const int8_t ff_se_golomb_vlc_code[512]={
 };
 
 
-const uint8_t ff_ue_golomb_len[256]={ 
+const uint8_t ff_ue_golomb_len[256]={
  1, 3, 3, 5, 5, 5, 5, 7, 7, 7, 7, 7, 7, 7, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,11,
 11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,11,13,
 13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,13,
@@ -115,12 +115,12 @@ const uint8_t ff_interleaved_golomb_vlc_len[256]={
 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
 };
 
-const uint8_t ff_interleaved_ue_golomb_vlc_code[256]={ 
+const uint8_t ff_interleaved_ue_golomb_vlc_code[256]={
  15,16,7, 7, 17,18,8, 8, 3, 3, 3, 3, 3, 3, 3, 3,
- 19,20,9, 9, 21,22,10,10,4, 4, 4, 4, 4, 4, 4, 4, 
+ 19,20,9, 9, 21,22,10,10,4, 4, 4, 4, 4, 4, 4, 4,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 23,24,11,11,25,26,12,12,5, 5, 5, 5, 5, 5, 5, 5, 
+ 23,24,11,11,25,26,12,12,5, 5, 5, 5, 5, 5, 5, 5,
  27,28,13,13,29,30,14,14,6, 6, 6, 6, 6, 6, 6, 6,
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -134,7 +134,7 @@ const uint8_t ff_interleaved_ue_golomb_vlc_code[256]={
  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
 };
 
-const int8_t ff_interleaved_se_golomb_vlc_code[256]={ 
+const int8_t ff_interleaved_se_golomb_vlc_code[256]={
   8, -8,  4,  4,  9, -9, -4, -4,  2,  2,  2,  2,  2,  2,  2,  2,
  10,-10,  5,  5, 11,-11, -5, -5, -2, -2, -2, -2, -2, -2, -2, -2,
   1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
diff --git a/src/libffmpeg/libavcodec/golomb.h b/src/libffmpeg/libavcodec/golomb.h
index 4ac74639a..ef74f15c6 100644
--- a/src/libffmpeg/libavcodec/golomb.h
+++ b/src/libffmpeg/libavcodec/golomb.h
@@ -15,13 +15,13 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
- 
+
 /**
  * @file golomb.h
- * @brief 
+ * @brief
  *     exp golomb vlc stuff
  * @author Michael Niedermayer <michaelni@gmx.at> and Alex Beregszaszi
  */
@@ -37,23 +37,23 @@ extern const uint8_t ff_interleaved_golomb_vlc_len[256];
 extern const uint8_t ff_interleaved_ue_golomb_vlc_code[256];
 extern const  int8_t ff_interleaved_se_golomb_vlc_code[256];
 
- 
+
  /**
  * read unsigned exp golomb code.
  */
 static inline int get_ue_golomb(GetBitContext *gb){
     unsigned int buf;
     int log;
-    
+
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf=GET_CACHE(re, gb);
-    
+
     if(buf >= (1<<27)){
         buf >>= 32 - 9;
         LAST_SKIP_BITS(re, gb, ff_golomb_vlc_len[buf]);
         CLOSE_READER(re, gb);
-    
+
         return ff_ue_golomb_vlc_code[buf];
     }else{
         log= 2*av_log2(buf) - 31;
@@ -61,7 +61,7 @@ static inline int get_ue_golomb(GetBitContext *gb){
         buf--;
         LAST_SKIP_BITS(re, gb, 32 - log);
         CLOSE_READER(re, gb);
-    
+
         return buf;
     }
 }
@@ -73,12 +73,12 @@ static inline int svq3_get_ue_golomb(GetBitContext *gb){
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf=GET_CACHE(re, gb);
-    
+
     if(buf&0xAA800000){
         buf >>= 32 - 8;
         LAST_SKIP_BITS(re, gb, ff_interleaved_golomb_vlc_len[buf]);
         CLOSE_READER(re, gb);
-        
+
         return ff_interleaved_ue_golomb_vlc_code[buf];
     }else{
         LAST_SKIP_BITS(re, gb, 8);
@@ -104,7 +104,7 @@ static inline int svq3_get_ue_golomb(GetBitContext *gb){
  */
 static inline int get_te0_golomb(GetBitContext *gb, int range){
     assert(range >= 1);
-    
+
     if(range==1)      return 0;
     else if(range==2) return get_bits1(gb)^1;
     else              return get_ue_golomb(gb);
@@ -115,7 +115,7 @@ static inline int get_te0_golomb(GetBitContext *gb, int range){
  */
 static inline int get_te_golomb(GetBitContext *gb, int range){
     assert(range >= 1);
-    
+
     if(range==2) return get_bits1(gb)^1;
     else         return get_ue_golomb(gb);
 }
@@ -127,24 +127,24 @@ static inline int get_te_golomb(GetBitContext *gb, int range){
 static inline int get_se_golomb(GetBitContext *gb){
     unsigned int buf;
     int log;
-    
+
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf=GET_CACHE(re, gb);
-    
+
     if(buf >= (1<<27)){
         buf >>= 32 - 9;
         LAST_SKIP_BITS(re, gb, ff_golomb_vlc_len[buf]);
         CLOSE_READER(re, gb);
-    
+
         return ff_se_golomb_vlc_code[buf];
     }else{
         log= 2*av_log2(buf) - 31;
         buf>>= log;
-        
+
         LAST_SKIP_BITS(re, gb, 32 - log);
         CLOSE_READER(re, gb);
-    
+
         if(buf&1) buf= -(buf>>1);
         else      buf=  (buf>>1);
 
@@ -164,7 +164,7 @@ static inline int svq3_get_se_golomb(GetBitContext *gb){
         buf >>= 32 - 8;
         LAST_SKIP_BITS(re, gb, ff_interleaved_golomb_vlc_len[buf]);
         CLOSE_READER(re, gb);
-        
+
         return ff_interleaved_se_golomb_vlc_code[buf];
     }else{
         LAST_SKIP_BITS(re, gb, 8);
@@ -191,7 +191,7 @@ static inline int svq3_get_se_golomb(GetBitContext *gb){
 static inline int get_ur_golomb(GetBitContext *gb, int k, int limit, int esc_len){
     unsigned int buf;
     int log;
-    
+
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf=GET_CACHE(re, gb);
@@ -203,13 +203,13 @@ static inline int get_ur_golomb(GetBitContext *gb, int k, int limit, int esc_len
         buf += (30-log)<<k;
         LAST_SKIP_BITS(re, gb, 32 + k - log);
         CLOSE_READER(re, gb);
-    
+
         return buf;
     }else{
         buf >>= 32 - limit - esc_len;
         LAST_SKIP_BITS(re, gb, esc_len + limit);
         CLOSE_READER(re, gb);
-    
+
         return buf + limit - 1;
     }
 }
@@ -220,19 +220,19 @@ static inline int get_ur_golomb(GetBitContext *gb, int k, int limit, int esc_len
 static inline int get_ur_golomb_jpegls(GetBitContext *gb, int k, int limit, int esc_len){
     unsigned int buf;
     int log;
-    
+
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf=GET_CACHE(re, gb);
 
     log= av_log2(buf);
-    
+
     if(log > 31-11){
         buf >>= log - k;
         buf += (30-log)<<k;
         LAST_SKIP_BITS(re, gb, 32 + k - log);
         CLOSE_READER(re, gb);
-    
+
         return buf;
     }else{
         int i;
@@ -256,7 +256,7 @@ static inline int get_ur_golomb_jpegls(GetBitContext *gb, int k, int limit, int
             buf = SHOW_UBITS(re, gb, esc_len);
             LAST_SKIP_BITS(re, gb, esc_len);
             CLOSE_READER(re, gb);
-    
+
             return buf + 1;
         }else
             return -1;
@@ -268,11 +268,11 @@ static inline int get_ur_golomb_jpegls(GetBitContext *gb, int k, int limit, int
  */
 static inline int get_sr_golomb(GetBitContext *gb, int k, int limit, int esc_len){
     int v= get_ur_golomb(gb, k, limit, esc_len);
-    
+
     v++;
     if (v&1) return v>>1;
     else return -(v>>1);
-    
+
 //    return (v>>1) ^ -(v&1);
 }
 
@@ -288,7 +288,7 @@ static inline int get_sr_golomb_flac(GetBitContext *gb, int k, int limit, int es
  * read unsigned golomb rice code (shorten).
  */
 static inline unsigned int get_ur_golomb_shorten(GetBitContext *gb, int k){
-	return get_ur_golomb_jpegls(gb, k, INT_MAX, 0);
+        return get_ur_golomb_jpegls(gb, k, INT_MAX, 0);
 }
 
 /**
@@ -313,11 +313,11 @@ static inline int get_ue(GetBitContext *s, char *file, const char *func, int lin
     int i= get_ue_golomb(s);
     int len= get_bits_count(s) - pos;
     int bits= show>>(24-len);
-    
+
     print_bin(bits, len);
-    
+
     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d ue  @%5d in %s %s:%d\n", bits, len, i, pos, file, func, line);
-    
+
     return i;
 }
 
@@ -327,11 +327,11 @@ static inline int get_se(GetBitContext *s, char *file, const char *func, int lin
     int i= get_se_golomb(s);
     int len= get_bits_count(s) - pos;
     int bits= show>>(24-len);
-    
+
     print_bin(bits, len);
-    
+
     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d se  @%5d in %s %s:%d\n", bits, len, i, pos, file, func, line);
-    
+
     return i;
 }
 
@@ -341,11 +341,11 @@ static inline int get_te(GetBitContext *s, int r, char *file, const char *func,
     int i= get_te0_golomb(s, r);
     int len= get_bits_count(s) - pos;
     int bits= show>>(24-len);
-    
+
     print_bin(bits, len);
-    
+
     av_log(NULL, AV_LOG_DEBUG, "%5d %2d %3d te  @%5d in %s %s:%d\n", bits, len, i, pos, file, func, line);
-    
+
     return i;
 }
 
@@ -361,7 +361,7 @@ static inline int get_te(GetBitContext *s, int r, char *file, const char *func,
  */
 static inline void set_ue_golomb(PutBitContext *pb, int i){
     int e;
-    
+
     assert(i>=0);
 
 #if 0
@@ -374,7 +374,7 @@ static inline void set_ue_golomb(PutBitContext *pb, int i){
         put_bits(pb, ff_ue_golomb_len[i], i+1);
     else{
         e= av_log2(i+1);
-    
+
         put_bits(pb, 2*e+1, i+1);
     }
 }
@@ -395,8 +395,8 @@ static inline void set_te_golomb(PutBitContext *pb, int i, int range){
  */
 static inline void set_se_golomb(PutBitContext *pb, int i){
 //    if (i>32767 || i<-32767)
-//	av_log(NULL,AV_LOG_ERROR,"value out of range %d\n", i);
-#if 0 
+//        av_log(NULL,AV_LOG_ERROR,"value out of range %d\n", i);
+#if 0
     if(i<=0) i= -2*i;
     else     i=  2*i-1;
 #elif 1
@@ -414,9 +414,9 @@ static inline void set_se_golomb(PutBitContext *pb, int i){
  */
 static inline void set_ur_golomb(PutBitContext *pb, int i, int k, int limit, int esc_len){
     int e;
-    
+
     assert(i>=0);
-    
+
     e= i>>k;
     if(e<limit){
         put_bits(pb, e + k + 1, (1<<k) + (i&((1<<k)-1)));
@@ -430,9 +430,9 @@ static inline void set_ur_golomb(PutBitContext *pb, int i, int k, int limit, int
  */
 static inline void set_ur_golomb_jpegls(PutBitContext *pb, int i, int k, int limit, int esc_len){
     int e;
-    
+
     assert(i>=0);
-    
+
     e= (i>>k) + 1;
     if(e<limit){
         put_bits(pb, e, 1);
diff --git a/src/libffmpeg/libavcodec/h261.c b/src/libffmpeg/libavcodec/h261.c
index a823cc39b..c6218c8b9 100644
--- a/src/libffmpeg/libavcodec/h261.c
+++ b/src/libffmpeg/libavcodec/h261.c
@@ -15,7 +15,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
@@ -103,22 +103,22 @@ void ff_h261_encode_picture_header(MpegEncContext * s, int picture_number){
 
     put_bits(&s->pb, 20, 0x10); /* PSC */
 
-    temp_ref= s->picture_number * (int64_t)30000 * s->avctx->time_base.num / 
+    temp_ref= s->picture_number * (int64_t)30000 * s->avctx->time_base.num /
                          (1001 * (int64_t)s->avctx->time_base.den); //FIXME maybe this should use a timestamp
     put_bits(&s->pb, 5, temp_ref & 0x1f); /* TemporalReference */
 
     put_bits(&s->pb, 1, 0); /* split screen off */
     put_bits(&s->pb, 1, 0); /* camera  off */
     put_bits(&s->pb, 1, 0); /* freeze picture release off */
-    
+
     format = ff_h261_get_picture_format(s->width, s->height);
-    
+
     put_bits(&s->pb, 1, format); /* 0 == QCIF, 1 == CIF */
 
     put_bits(&s->pb, 1, 0); /* still image mode */
     put_bits(&s->pb, 1, 0); /* reserved */
 
-    put_bits(&s->pb, 1, 0); /* no PEI */    
+    put_bits(&s->pb, 1, 0); /* no PEI */
     if(format == 0)
         h->gob_number = -1;
     else
@@ -160,7 +160,7 @@ void ff_h261_reorder_mb_index(MpegEncContext* s){
         s->mb_y =     index %  3 ; index /=  3;
         s->mb_x+= 11*(index %  2); index /=  2;
         s->mb_y+=  3*index;
-        
+
         ff_init_block_index(s);
         ff_update_block_index(s);
     }
@@ -172,14 +172,14 @@ static void h261_encode_motion(H261Context * h, int val){
     if(val==0){
         code = 0;
         put_bits(&s->pb,h261_mv_tab[code][1],h261_mv_tab[code][0]);
-    } 
+    }
     else{
         if(val > 15)
             val -=32;
         if(val < -16)
             val+=32;
         sign = val < 0;
-        code = sign ? -val : val; 
+        code = sign ? -val : val;
         put_bits(&s->pb,h261_mv_tab[code][1],h261_mv_tab[code][0]);
         put_bits(&s->pb,1,sign);
     }
@@ -204,14 +204,14 @@ void ff_h261_encode_mb(MpegEncContext * s,
     int mvd, mv_diff_x, mv_diff_y, i, cbp;
     cbp = 63; // avoid warning
     mvd = 0;
- 
+
     h->current_mba++;
     h->mtype = 0;
- 
+
     if (!s->mb_intra){
         /* compute cbp */
         cbp= get_cbp(s, block);
-   
+
         /* mvd indicates if this block is motion compensated */
         mvd = motion_x | motion_y;
 
@@ -226,11 +226,11 @@ void ff_h261_encode_mb(MpegEncContext * s,
 
     /* MB is not skipped, encode MBA */
     put_bits(&s->pb, h261_mba_bits[(h->current_mba-h->previous_mba)-1], h261_mba_code[(h->current_mba-h->previous_mba)-1]);
- 
+
     /* calculate MTYPE */
     if(!s->mb_intra){
         h->mtype++;
-        
+
         if(mvd || s->loop_filter)
             h->mtype+=3;
         if(s->loop_filter)
@@ -240,18 +240,18 @@ void ff_h261_encode_mb(MpegEncContext * s,
         assert(h->mtype > 1);
     }
 
-    if(s->dquant) 
+    if(s->dquant)
         h->mtype++;
 
     put_bits(&s->pb, h261_mtype_bits[h->mtype], h261_mtype_code[h->mtype]);
- 
+
     h->mtype = h261_mtype_map[h->mtype];
- 
+
     if(IS_QUANT(h->mtype)){
         ff_set_qscale(s,s->qscale+s->dquant);
         put_bits(&s->pb, 5, s->qscale);
     }
- 
+
     if(IS_16X16(h->mtype)){
         mv_diff_x = (motion_x >> 1) - h->current_mv_x;
         mv_diff_y = (motion_y >> 1) - h->current_mv_y;
@@ -260,11 +260,11 @@ void ff_h261_encode_mb(MpegEncContext * s,
         h261_encode_motion(h,mv_diff_x);
         h261_encode_motion(h,mv_diff_y);
     }
- 
+
     h->previous_mba = h->current_mba;
- 
+
     if(HAS_CBP(h->mtype)){
-        put_bits(&s->pb,h261_cbp_tab[cbp-1][1],h261_cbp_tab[cbp-1][0]); 
+        put_bits(&s->pb,h261_cbp_tab[cbp-1][1],h261_cbp_tab[cbp-1][0]);
     }
     for(i=0; i<6; i++) {
         /* encode each block */
@@ -279,7 +279,7 @@ void ff_h261_encode_mb(MpegEncContext * s,
 
 void ff_h261_encode_init(MpegEncContext *s){
     static int done = 0;
-    
+
     if (!done) {
         done = 1;
         init_rl(&h261_rl_tcoeff, 1);
@@ -328,7 +328,7 @@ static void h261_encode_block(H261Context * h, DCTELEM * block, int n){
     } else {
         i = 0;
     }
-   
+
     /* AC coefs */
     last_index = s->block_last_index[n];
     last_non_zero = i - 1;
@@ -417,7 +417,7 @@ static int h261_decode_init(AVCodecContext *avctx){
     h261_decode_init_vlc(h);
 
     h->gob_start_code_skipped = 0;
-    
+
     return 0;
 }
 
@@ -428,7 +428,7 @@ static int h261_decode_init(AVCodecContext *avctx){
 static int h261_decode_gob_header(H261Context *h){
     unsigned int val;
     MpegEncContext * const s = &h->s;
-    
+
     if ( !h->gob_start_code_skipped ){
         /* Check for GOB Start Code */
         val = show_bits(&s->gb, 15);
@@ -520,7 +520,7 @@ static int h261_decode_mb_skipped(H261Context *h, int mba1, int mba2 )
 {
     MpegEncContext * const s = &h->s;
     int i;
-    
+
     s->mb_intra = 0;
 
     for(i=mba1; i<mba2; i++){
@@ -560,7 +560,7 @@ static int decode_mv_component(GetBitContext *gb, int v){
 
     if(mv_diff && !get_bits1(gb))
         mv_diff= -mv_diff;
-    
+
     v += mv_diff;
     if     (v <=-16) v+= 32;
     else if(v >= 16) v-= 32;
@@ -599,7 +599,7 @@ static int h261_decode_mb(H261Context *h){
 
     if ( h->current_mba > MBA_STUFFING )
         return SLICE_ERROR;
-    
+
     s->mb_x= ((h->gob_number-1) % 2) * 11 + ((h->current_mba-1) % 11);
     s->mb_y= ((h->gob_number-1) / 2) * 3 + ((h->current_mba-1) / 11);
     xy = s->mb_x + s->mb_y * s->mb_stride;
@@ -687,7 +687,7 @@ static int h261_decode_block(H261Context * h, DCTELEM * block,
     int code, level, i, j, run;
     RLTable *rl = &h261_rl_tcoeff;
     const uint8_t *scan_table;
-    
+
     // For the variable length encoding there are two code tables, one being used for
     // the first transmitted LEVEL in INTER, INTER+MC and INTER+MC+FIL blocks, the second
     // for all other LEVELs except the first one in INTRA blocks which is fixed length
@@ -762,7 +762,7 @@ static int h261_decode_block(H261Context * h, DCTELEM * block,
  * decodes the H261 picture header.
  * @return <0 if no startcode found
  */
-int h261_decode_picture_header(H261Context *h){
+static int h261_decode_picture_header(H261Context *h){
     MpegEncContext * const s = &h->s;
     int format, i;
     uint32_t startcode= 0;
@@ -812,7 +812,7 @@ int h261_decode_picture_header(H261Context *h){
         skip_bits(&s->gb, 8);
     }
 
-    // h261 has no I-FRAMES, but if we pass I_TYPE for the first frame, the codec crashes if it does 
+    // h261 has no I-FRAMES, but if we pass I_TYPE for the first frame, the codec crashes if it does
     // not contain all I-blocks (e.g. when a packet is lost)
     s->pict_type = P_TYPE;
 
@@ -822,7 +822,7 @@ int h261_decode_picture_header(H261Context *h){
 
 static int h261_decode_gob(H261Context *h){
     MpegEncContext * const s = &h->s;
-    
+
     ff_set_qscale(s, s->qscale);
 
     /* decode mb's */
@@ -833,16 +833,16 @@ static int h261_decode_gob(H261Context *h){
         ret= h261_decode_mb(h);
         if(ret<0){
             if(ret==SLICE_END){
-                h261_decode_mb_skipped(h, h->current_mba, 33);                
+                h261_decode_mb_skipped(h, h->current_mba, 33);
                 return 0;
             }
             av_log(s->avctx, AV_LOG_ERROR, "Error at MB: %d\n", s->mb_x + s->mb_y*s->mb_stride);
             return -1;
         }
-        
+
         h261_decode_mb_skipped(h, h->current_mba-h->mba_diff, h->current_mba-1);
     }
-    
+
     return -1;
 }
 
@@ -852,7 +852,7 @@ static int h261_find_frame_end(ParseContext *pc, AVCodecContext* avctx, const ui
 
     vop_found= pc->frame_start_found;
     state= pc->state;
-   
+
     for(i=0; i<buf_size && !vop_found; i++){
         state= (state<<8) | buf[i];
         for(j=0; j<8; j++){
@@ -883,12 +883,12 @@ static int h261_find_frame_end(ParseContext *pc, AVCodecContext* avctx, const ui
 
 static int h261_parse(AVCodecParserContext *s,
                       AVCodecContext *avctx,
-                      uint8_t **poutbuf, int *poutbuf_size, 
+                      uint8_t **poutbuf, int *poutbuf_size,
                       const uint8_t *buf, int buf_size)
 {
     ParseContext *pc = s->priv_data;
     int next;
-    
+
     next= h261_find_frame_end(pc,avctx, buf, buf_size);
     if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
         *poutbuf = NULL;
diff --git a/src/libffmpeg/libavcodec/h263.c b/src/libffmpeg/libavcodec/h263.c
index 8d15461f6..f7369c18d 100644
--- a/src/libffmpeg/libavcodec/h263.c
+++ b/src/libffmpeg/libavcodec/h263.c
@@ -17,10 +17,10 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * ac prediction encoding, b-frame support, error resilience, optimizations,
- * qpel decoding, gmc decoding, interlaced decoding, 
+ * qpel decoding, gmc decoding, interlaced decoding,
  * by Michael Niedermayer <michaelni@gmx.at>
  */
 
@@ -28,7 +28,7 @@
  * @file h263.c
  * h263/mpeg4 codec.
  */
- 
+
 //#define DEBUG
 #include <limits.h>
 
@@ -58,7 +58,7 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block,
                               int n);
 static void h263p_encode_umotion(MpegEncContext * s, int val);
 static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block,
-                               int n, int dc, uint8_t *scan_table, 
+                               int n, int dc, uint8_t *scan_table,
                                PutBitContext *dc_pb, PutBitContext *ac_pb);
 #endif
 
@@ -69,7 +69,7 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
 static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr);
 static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
                               int n, int coded, int intra, int rvlc);
-static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, 
+static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc,
                                uint8_t *scan_table);
 static int h263_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr);
 #ifdef CONFIG_ENCODERS
@@ -112,7 +112,7 @@ max run: 29/41
 
 #if 0 //3IV1 is quite rare and it slows things down a tiny bit
 #define IS_3IV1 s->avctx->codec_tag == ff_get_fourcc("3IV1")
-#else 
+#else
 #define IS_3IV1 0
 #endif
 
@@ -148,7 +148,7 @@ static void aspect_to_info(MpegEncContext * s, AVRational aspect){
             return;
         }
     }
-    
+
     s->aspect_ratio_info= FF_ASPECT_EXTENDED;
 }
 
@@ -190,7 +190,7 @@ void ff_flv_encode_picture_header(MpegEncContext * s, int picture_number)
       put_bits(&s->pb, 1, 0); /* ExtraInformation */
 
       if(s->h263_aic){
-        s->y_dc_scale_table= 
+        s->y_dc_scale_table=
           s->c_dc_scale_table= ff_aic_dc_scale_table;
       }else{
         s->y_dc_scale_table=
@@ -204,7 +204,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
     int best_clock_code=1;
     int best_divisor=60;
     int best_error= INT_MAX;
-   
+
     if(s->h263_plus){
         for(i=0; i<2; i++){
             int div, error;
@@ -231,12 +231,12 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
                          (coded_frame_rate_base * (int64_t)s->avctx->time_base.den);
     put_bits(&s->pb, 8, temp_ref & 0xff); /* TemporalReference */
 
-    put_bits(&s->pb, 1, 1);	/* marker */
-    put_bits(&s->pb, 1, 0);	/* h263 id */
-    put_bits(&s->pb, 1, 0);	/* split screen off */
-    put_bits(&s->pb, 1, 0);	/* camera  off */
-    put_bits(&s->pb, 1, 0);	/* freeze picture release off */
-    
+    put_bits(&s->pb, 1, 1);     /* marker */
+    put_bits(&s->pb, 1, 0);     /* h263 id */
+    put_bits(&s->pb, 1, 0);     /* split screen off */
+    put_bits(&s->pb, 1, 0);     /* camera  off */
+    put_bits(&s->pb, 1, 0);     /* freeze picture release off */
+
     format = h263_get_picture_format(s->width, s->height);
     if (!s->h263_plus) {
         /* H.263v1 */
@@ -245,24 +245,24 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
         /* By now UMV IS DISABLED ON H.263v1, since the restrictions
         of H.263v1 UMV implies to check the predicted MV after
         calculation of the current MB to see if we're on the limits */
-        put_bits(&s->pb, 1, 0);	/* Unrestricted Motion Vector: off */
-        put_bits(&s->pb, 1, 0);	/* SAC: off */
-        put_bits(&s->pb, 1, s->obmc);	/* Advanced Prediction */
-        put_bits(&s->pb, 1, 0);	/* only I/P frames, no PB frame */
+        put_bits(&s->pb, 1, 0);         /* Unrestricted Motion Vector: off */
+        put_bits(&s->pb, 1, 0);         /* SAC: off */
+        put_bits(&s->pb, 1, s->obmc);   /* Advanced Prediction */
+        put_bits(&s->pb, 1, 0);         /* only I/P frames, no PB frame */
         put_bits(&s->pb, 5, s->qscale);
-        put_bits(&s->pb, 1, 0);	/* Continuous Presence Multipoint mode: off */
+        put_bits(&s->pb, 1, 0);         /* Continuous Presence Multipoint mode: off */
     } else {
         int ufep=1;
         /* H.263v2 */
         /* H.263 Plus PTYPE */
-        
+
         put_bits(&s->pb, 3, 7);
         put_bits(&s->pb,3,ufep); /* Update Full Extended PTYPE */
         if (format == 7)
             put_bits(&s->pb,3,6); /* Custom Source Format */
         else
             put_bits(&s->pb, 3, format);
-            
+
         put_bits(&s->pb,1, s->custom_pcf);
         put_bits(&s->pb,1, s->umvplus); /* Unrestricted Motion Vector */
         put_bits(&s->pb,1,0); /* SAC: off */
@@ -276,19 +276,19 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
         put_bits(&s->pb,1,s->modified_quant); /* Modified Quantization: */
         put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
         put_bits(&s->pb,3,0); /* Reserved */
-		
+
         put_bits(&s->pb, 3, s->pict_type == P_TYPE);
-		
+
         put_bits(&s->pb,1,0); /* Reference Picture Resampling: off */
         put_bits(&s->pb,1,0); /* Reduced-Resolution Update: off */
         put_bits(&s->pb,1,s->no_rounding); /* Rounding Type */
         put_bits(&s->pb,2,0); /* Reserved */
         put_bits(&s->pb,1,1); /* "1" to prevent start code emulation */
-		
+
         /* This should be here if PLUSPTYPE */
-        put_bits(&s->pb, 1, 0);	/* Continuous Presence Multipoint mode: off */
-		
-		if (format == 7) {
+        put_bits(&s->pb, 1, 0); /* Continuous Presence Multipoint mode: off */
+
+                if (format == 7) {
             /* Custom Picture Format (CPFMT) */
             aspect_to_info(s, s->avctx->sample_aspect_ratio);
 
@@ -299,7 +299,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
             if (s->aspect_ratio_info == FF_ASPECT_EXTENDED){
                 put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.num);
                 put_bits(&s->pb, 8, s->avctx->sample_aspect_ratio.den);
-	    }
+            }
         }
         if(s->custom_pcf){
             if(ufep){
@@ -308,7 +308,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
             }
             put_bits(&s->pb, 2, (temp_ref>>8)&3);
         }
-        
+
         /* Unlimited Unrestricted Motion Vectors Indicator (UUI) */
         if (s->umvplus)
 //            put_bits(&s->pb,1,1); /* Limited according tables of Annex D */
@@ -320,11 +320,11 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
         put_bits(&s->pb, 5, s->qscale);
     }
 
-    put_bits(&s->pb, 1, 0);	/* no PEI */
+    put_bits(&s->pb, 1, 0);     /* no PEI */
 
     if(s->h263_slice_structured){
         put_bits(&s->pb, 1, 1);
-        
+
         assert(s->mb_x == 0 && s->mb_y == 0);
         ff_h263_encode_mba(s);
 
@@ -332,7 +332,7 @@ void h263_encode_picture_header(MpegEncContext * s, int picture_number)
     }
 
     if(s->h263_aic){
-         s->y_dc_scale_table= 
+         s->y_dc_scale_table=
          s->c_dc_scale_table= ff_aic_dc_scale_table;
     }else{
         s->y_dc_scale_table=
@@ -386,7 +386,7 @@ static inline int get_block_rate(MpegEncContext * s, DCTELEM block[64], int bloc
             last= j;
         }
     }
-    
+
     return rate;
 }
 
@@ -397,10 +397,10 @@ static inline int decide_ac_pred(MpegEncContext * s, DCTELEM block[6][64], int d
     int8_t * const qscale_table= s->current_picture.qscale_table;
 
     memcpy(zigzag_last_index, s->block_last_index, sizeof(int)*6);
-    
+
     for(n=0; n<6; n++){
         int16_t *ac_val, *ac_val1;
-        
+
         score -= get_block_rate(s, block[n], s->block_last_index[n], s->intra_scantable.permutated);
 
         ac_val = s->ac_val[0][0] + s->block_index[n] * 16;
@@ -490,7 +490,7 @@ static inline void restore_ac_coeffs(MpegEncContext * s, DCTELEM block[6][64], i
 void ff_clean_h263_qscales(MpegEncContext *s){
     int i;
     int8_t * const qscale_table= s->current_picture.qscale_table;
-    
+
     for(i=1; i<s->mb_num; i++){
         if(qscale_table[ s->mb_index2xy[i] ] - qscale_table[ s->mb_index2xy[i-1] ] >2)
             qscale_table[ s->mb_index2xy[i] ]= qscale_table[ s->mb_index2xy[i-1] ]+2;
@@ -503,7 +503,7 @@ void ff_clean_h263_qscales(MpegEncContext *s){
     if(s->codec_id != CODEC_ID_H263P){
         for(i=1; i<s->mb_num; i++){
             int mb_xy= s->mb_index2xy[i];
-        
+
             if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTER4V)){
                 s->mb_type[mb_xy]&= ~CANDIDATE_MB_TYPE_INTER4V;
                 s->mb_type[mb_xy]|= CANDIDATE_MB_TYPE_INTER;
@@ -520,27 +520,27 @@ void ff_clean_mpeg4_qscales(MpegEncContext *s){
     int8_t * const qscale_table= s->current_picture.qscale_table;
 
     ff_clean_h263_qscales(s);
-    
+
     if(s->pict_type== B_TYPE){
         int odd=0;
         /* ok, come on, this isn't funny anymore, there's more code for handling this mpeg4 mess than for the actual adaptive quantization */
-        
+
         for(i=0; i<s->mb_num; i++){
             int mb_xy= s->mb_index2xy[i];
             odd += qscale_table[mb_xy]&1;
         }
-        
+
         if(2*odd > s->mb_num) odd=1;
         else                  odd=0;
-        
+
         for(i=0; i<s->mb_num; i++){
             int mb_xy= s->mb_index2xy[i];
             if((qscale_table[mb_xy]&1) != odd)
                 qscale_table[mb_xy]++;
             if(qscale_table[mb_xy] > 31)
                 qscale_table[mb_xy]= 31;
-        }            
-    
+        }
+
         for(i=1; i<s->mb_num; i++){
             int mb_xy= s->mb_index2xy[i];
             if(qscale_table[mb_xy] != qscale_table[s->mb_index2xy[i-1]] && (s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_DIRECT)){
@@ -563,9 +563,9 @@ int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){
     uint16_t time_pp= s->pp_time;
     uint16_t time_pb= s->pb_time;
     int i;
-    
+
     //FIXME avoid divides
-    
+
     if(IS_8X8(colocated_mb_type)){
         s->mv_type = MV_TYPE_8X8;
         for(i=0; i<4; i++){
@@ -574,7 +574,7 @@ int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){
             s->mv[0][i][1] = s->next_picture.motion_val[0][xy][1]*time_pb/time_pp + my;
             s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->next_picture.motion_val[0][xy][0]
                                 : s->next_picture.motion_val[0][xy][0]*(time_pb - time_pp)/time_pp;
-            s->mv[1][i][1] = my ? s->mv[0][i][1] - s->next_picture.motion_val[0][xy][1] 
+            s->mv[1][i][1] = my ? s->mv[0][i][1] - s->next_picture.motion_val[0][xy][1]
                                 : s->next_picture.motion_val[0][xy][1]*(time_pb - time_pp)/time_pp;
         }
         return MB_TYPE_DIRECT2 | MB_TYPE_8x8 | MB_TYPE_L0L1;
@@ -593,7 +593,7 @@ int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){
             s->mv[0][i][1] = s->p_field_mv_table[i][0][mb_index][1]*time_pb/time_pp + my;
             s->mv[1][i][0] = mx ? s->mv[0][i][0] - s->p_field_mv_table[i][0][mb_index][0]
                                 : s->p_field_mv_table[i][0][mb_index][0]*(time_pb - time_pp)/time_pp;
-            s->mv[1][i][1] = my ? s->mv[0][i][1] - s->p_field_mv_table[i][0][mb_index][1] 
+            s->mv[1][i][1] = my ? s->mv[0][i][1] - s->p_field_mv_table[i][0][mb_index][1]
                                 : s->p_field_mv_table[i][0][mb_index][1]*(time_pb - time_pp)/time_pp;
         }
         return MB_TYPE_DIRECT2 | MB_TYPE_16x8 | MB_TYPE_L0L1 | MB_TYPE_INTERLACED;
@@ -602,7 +602,7 @@ int ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my){
         s->mv[0][0][1] = s->mv[0][1][1] = s->mv[0][2][1] = s->mv[0][3][1] = s->next_picture.motion_val[0][xy][1]*time_pb/time_pp + my;
         s->mv[1][0][0] = s->mv[1][1][0] = s->mv[1][2][0] = s->mv[1][3][0] = mx ? s->mv[0][0][0] - s->next_picture.motion_val[0][xy][0]
                             : s->next_picture.motion_val[0][xy][0]*(time_pb - time_pp)/time_pp;
-        s->mv[1][0][1] = s->mv[1][1][1] = s->mv[1][2][1] = s->mv[1][3][1] = my ? s->mv[0][0][1] - s->next_picture.motion_val[0][xy][1] 
+        s->mv[1][0][1] = s->mv[1][1][1] = s->mv[1][2][1] = s->mv[1][3][1] = my ? s->mv[0][0][1] - s->next_picture.motion_val[0][xy][1]
                             : s->next_picture.motion_val[0][xy][1]*(time_pb - time_pp)/time_pp;
         if((s->avctx->workaround_bugs & FF_BUG_DIRECT_BLOCKSIZE) || !s->quarter_sample)
             s->mv_type= MV_TYPE_16X16;
@@ -617,8 +617,8 @@ void ff_h263_update_motion_val(MpegEncContext * s){
                //FIXME a lot of that is only needed for !low_delay
     const int wrap = s->b8_stride;
     const int xy = s->block_index[0];
-    
-    s->current_picture.mbskip_table[mb_xy]= s->mb_skipped; 
+
+    s->current_picture.mbskip_table[mb_xy]= s->mb_skipped;
 
     if(s->mv_type != MV_TYPE_8X8){
         int motion_x, motion_y;
@@ -655,7 +655,7 @@ void ff_h263_update_motion_val(MpegEncContext * s){
     }
 
     if(s->encoding){ //FIXME encoding MUST be cleaned up
-        if (s->mv_type == MV_TYPE_8X8) 
+        if (s->mv_type == MV_TYPE_8X8)
             s->current_picture.mb_type[mb_xy]= MB_TYPE_L0 | MB_TYPE_8x8;
         else if(s->mb_intra)
             s->current_picture.mb_type[mb_xy]= MB_TYPE_INTRA;
@@ -685,7 +685,7 @@ static inline int h263_get_motion_length(MpegEncContext * s, int val, int f_code
 
 static inline void ff_h263_encode_motion_vector(MpegEncContext * s, int x, int y, int f_code){
     if(s->flags2 & CODEC_FLAG2_NO_OUTPUT){
-        skip_put_bits(&s->pb, 
+        skip_put_bits(&s->pb,
             h263_get_motion_length(s, x, f_code)
            +h263_get_motion_length(s, y, f_code));
     }else{
@@ -758,14 +758,14 @@ static inline int get_b_cbp(MpegEncContext * s, DCTELEM block[6][64],
     if(s->flags & CODEC_FLAG_CBP_RD){
         int score=0;
         const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
-        
+
         for(i=0; i<6; i++){
             if(s->coded_score[i] < 0){
                 score += s->coded_score[i];
                 cbp |= 1 << (5 - i);
             }
         }
-        
+
         if(cbp){
             int zero_score= -6;
             if ((motion_x | motion_y | s->dquant | mb_type) == 0){
@@ -793,10 +793,10 @@ static inline int get_b_cbp(MpegEncContext * s, DCTELEM block[6][64],
     return cbp;
 }
 
-static inline void mpeg4_encode_blocks(MpegEncContext * s, DCTELEM block[6][64], int intra_dc[6], 
+static inline void mpeg4_encode_blocks(MpegEncContext * s, DCTELEM block[6][64], int intra_dc[6],
                                uint8_t **scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb){
     int i;
-    
+
     if(scan_table){
         if(s->flags2 & CODEC_FLAG2_NO_OUTPUT){
             for (i = 0; i < 6; i++) {
@@ -823,8 +823,8 @@ static inline void mpeg4_encode_blocks(MpegEncContext * s, DCTELEM block[6][64],
 }
 
 void mpeg4_encode_mb(MpegEncContext * s,
-		    DCTELEM block[6][64],
-		    int motion_x, int motion_y)
+                    DCTELEM block[6][64],
+                    int motion_x, int motion_y)
 {
     int cbpc, cbpy, pred_x, pred_y;
     PutBitContext * const pb2    = s->data_partitioning                         ? &s->pb2    : &s->pb;
@@ -832,24 +832,24 @@ void mpeg4_encode_mb(MpegEncContext * s,
     PutBitContext * const dc_pb  = s->data_partitioning && s->pict_type!=I_TYPE ? &s->pb2    : &s->pb;
     const int interleaved_stats= (s->flags&CODEC_FLAG_PASS1) && !s->data_partitioning ? 1 : 0;
     const int dquant_code[5]= {1,0,9,2,3};
-    
+
     //    printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
     if (!s->mb_intra) {
         int i, cbp;
-        
+
         if(s->pict_type==B_TYPE){
             static const int mb_type_table[8]= {-1, 2, 3, 1,-1,-1,-1, 0}; /* convert from mv_dir to type */
             int mb_type=  mb_type_table[s->mv_dir];
 
             if(s->mb_x==0){
                 for(i=0; i<2; i++){
-                    s->last_mv[i][0][0]= 
-                    s->last_mv[i][0][1]= 
-                    s->last_mv[i][1][0]= 
+                    s->last_mv[i][0][0]=
+                    s->last_mv[i][0][1]=
+                    s->last_mv[i][1][0]=
                     s->last_mv[i][1][1]= 0;
                 }
             }
-            
+
             assert(s->dquant>=-2 && s->dquant<=2);
             assert((s->dquant&1)==0);
             assert(mb_type>=0);
@@ -857,9 +857,9 @@ void mpeg4_encode_mb(MpegEncContext * s,
             /* nothing to do if this MB was skipped in the next P Frame */
             if(s->next_picture.mbskip_table[s->mb_y * s->mb_stride + s->mb_x]){ //FIXME avoid DCT & ...
                 s->skip_count++;
-                s->mv[0][0][0]= 
-                s->mv[0][0][1]= 
-                s->mv[1][0][0]= 
+                s->mv[0][0][0]=
+                s->mv[0][0][1]=
+                s->mv[1][0][0]=
                 s->mv[1][0][1]= 0;
                 s->mv_dir= MV_DIR_FORWARD; //doesn't matter
                 s->qscale -= s->dquant;
@@ -867,13 +867,13 @@ void mpeg4_encode_mb(MpegEncContext * s,
 
                 return;
             }
-            
+
             cbp= get_b_cbp(s, block, motion_x, motion_y, mb_type);
-            
+
             if ((cbp | motion_x | motion_y | mb_type) ==0) {
                 /* direct MB with MV={0,0} */
                 assert(s->dquant==0);
-                
+
                 put_bits(&s->pb, 1, 1); /* mb not coded modb1=1 */
 
                 if(interleaved_stats){
@@ -883,12 +883,12 @@ void mpeg4_encode_mb(MpegEncContext * s,
                 s->skip_count++;
                 return;
             }
-            
-            put_bits(&s->pb, 1, 0);	/* mb coded modb1=0 */
+
+            put_bits(&s->pb, 1, 0);     /* mb coded modb1=0 */
             put_bits(&s->pb, 1, cbp ? 0 : 1); /* modb2 */ //FIXME merge
             put_bits(&s->pb, mb_type+1, 1); // this table is so simple that we don't need it :)
             if(cbp) put_bits(&s->pb, 6, cbp);
-            
+
             if(cbp && mb_type){
                 if(s->dquant)
                     put_bits(&s->pb, 2, (s->dquant>>2)+3);
@@ -896,7 +896,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
                     put_bits(&s->pb, 1, 0);
             }else
                 s->qscale -= s->dquant;
-            
+
             if(!s->progressive_sequence){
                 if(cbp)
                     put_bits(&s->pb, 1, s->interlaced_dct);
@@ -972,9 +972,9 @@ void mpeg4_encode_mb(MpegEncContext * s,
 
         }else{ /* s->pict_type==B_TYPE */
             cbp= get_p_cbp(s, block, motion_x, motion_y);
-        
+
             if ((cbp | motion_x | motion_y | s->dquant) == 0 && s->mv_type==MV_TYPE_16X16) {
-                /* check if the B frames can skip it too, as we must skip it if we skip here 
+                /* check if the B frames can skip it too, as we must skip it if we skip here
                    why didn't they just compress the skip-mb bits instead of reusing them ?! */
                 if(s->max_b_frames>0){
                     int i;
@@ -988,7 +988,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
 
                     offset= x + y*s->linesize;
                     p_pic= s->new_picture.data[0] + offset;
-                    
+
                     s->mb_skipped=1;
                     for(i=0; i<s->max_b_frames; i++){
                         uint8_t *b_pic;
@@ -997,15 +997,17 @@ void mpeg4_encode_mb(MpegEncContext * s,
 
                         if(pic==NULL || pic->pict_type!=B_TYPE) break;
 
-                        b_pic= pic->data[0] + offset + 16; //FIXME +16
-			diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16);
+                        b_pic= pic->data[0] + offset;
+                        if(pic->type != FF_BUFFER_TYPE_SHARED)
+                            b_pic+= INPLACE_OFFSET;
+                        diff= s->dsp.sad[0](NULL, p_pic, b_pic, s->linesize, 16);
                         if(diff>s->qscale*70){ //FIXME check that 70 is optimal
                             s->mb_skipped=0;
                             break;
                         }
                     }
                 }else
-                    s->mb_skipped=1; 
+                    s->mb_skipped=1;
 
                 if(s->mb_skipped==1){
                     /* skip macroblock */
@@ -1016,12 +1018,12 @@ void mpeg4_encode_mb(MpegEncContext * s,
                         s->last_bits++;
                     }
                     s->skip_count++;
-                    
+
                     return;
                 }
             }
 
-            put_bits(&s->pb, 1, 0);	/* mb coded */
+            put_bits(&s->pb, 1, 0);     /* mb coded */
             cbpc = cbp & 3;
             cbpy = cbp >> 2;
             cbpy ^= 0xf;
@@ -1040,14 +1042,14 @@ void mpeg4_encode_mb(MpegEncContext * s,
                         put_bits(pb2, 1, s->interlaced_dct);
                     put_bits(pb2, 1, 0);
                 }
-                    
+
                 if(interleaved_stats){
                     s->misc_bits+= get_bits_diff(s);
                 }
 
                 /* motion vectors: 16x16 mode */
                 h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
-            
+
                 ff_h263_encode_motion_vector(s, motion_x - pred_x,
                                                 motion_y - pred_y, s->f_code);
             }else if(s->mv_type==MV_TYPE_FIELD){
@@ -1064,7 +1066,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
                 if(cbp)
                     put_bits(pb2, 1, s->interlaced_dct);
                 put_bits(pb2, 1, 1);
-                    
+
                 if(interleaved_stats){
                     s->misc_bits+= get_bits_diff(s);
                 }
@@ -1072,10 +1074,10 @@ void mpeg4_encode_mb(MpegEncContext * s,
                 /* motion vectors: 16x8 interlaced mode */
                 h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
                 pred_y /=2;
-                
+
                 put_bits(&s->pb, 1, s->field_select[0][0]);
                 put_bits(&s->pb, 1, s->field_select[0][1]);
-            
+
                 ff_h263_encode_motion_vector(s, s->mv[0][0][0] - pred_x,
                                                 s->mv[0][0][1] - pred_y, s->f_code);
                 ff_h263_encode_motion_vector(s, s->mv[0][1][0] - pred_x,
@@ -1091,7 +1093,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
                     if(cbp)
                         put_bits(pb2, 1, s->interlaced_dct);
                 }
-    
+
                 if(interleaved_stats){
                     s->misc_bits+= get_bits_diff(s);
                 }
@@ -1105,7 +1107,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
                 }
             }
 
-            if(interleaved_stats){ 
+            if(interleaved_stats){
                 s->mv_bits+= get_bits_diff(s);
             }
 
@@ -1118,10 +1120,10 @@ void mpeg4_encode_mb(MpegEncContext * s,
         }
     } else {
         int cbp;
-        int dc_diff[6];   //dc values with the dc prediction subtracted 
+        int dc_diff[6];   //dc values with the dc prediction subtracted
         int dir[6];  //prediction direction
         int zigzag_last_index[6];
-	uint8_t *scan_table[6];
+        uint8_t *scan_table[6];
         int i;
 
         for(i=0; i<6; i++){
@@ -1152,7 +1154,7 @@ void mpeg4_encode_mb(MpegEncContext * s,
                 intra_MCBPC_code[cbpc]);
         } else {
             if(s->dquant) cbpc+=8;
-            put_bits(&s->pb, 1, 0);	/* mb coded */
+            put_bits(&s->pb, 1, 0);     /* mb coded */
             put_bits(&s->pb,
                 inter_MCBPC_bits[cbpc + 4],
                 inter_MCBPC_code[cbpc + 4]);
@@ -1185,8 +1187,8 @@ void mpeg4_encode_mb(MpegEncContext * s,
 }
 
 void h263_encode_mb(MpegEncContext * s,
-		    DCTELEM block[6][64],
-		    int motion_x, int motion_y)
+                    DCTELEM block[6][64],
+                    int motion_x, int motion_y)
 {
     int cbpc, cbpy, i, cbp, pred_x, pred_y;
     int16_t pred_dc;
@@ -1194,7 +1196,7 @@ void h263_encode_mb(MpegEncContext * s,
     uint16_t *dc_ptr[6];
     const int interleaved_stats= (s->flags&CODEC_FLAG_PASS1);
     const int dquant_code[5]= {1,0,9,2,3};
-           
+
     //printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
     if (!s->mb_intra) {
         /* compute cbp */
@@ -1211,8 +1213,8 @@ void h263_encode_mb(MpegEncContext * s,
 
             return;
         }
-        put_bits(&s->pb, 1, 0);	/* mb coded */
-        
+        put_bits(&s->pb, 1, 0);         /* mb coded */
+
         cbpc = cbp & 3;
         cbpy = cbp >> 2;
         if(s->alt_inter_vlc==0 || cbpc!=3)
@@ -1226,15 +1228,15 @@ void h263_encode_mb(MpegEncContext * s,
             put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
             if(s->dquant)
                 put_bits(&s->pb, 2, dquant_code[s->dquant+2]);
-                
+
             if(interleaved_stats){
                 s->misc_bits+= get_bits_diff(s);
             }
 
             /* motion vectors: 16x16 mode */
             h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
-            
-            if (!s->umvplus) {  
+
+            if (!s->umvplus) {
                 ff_h263_encode_motion_vector(s, motion_x - pred_x,
                                                 motion_y - pred_y, 1);
             }
@@ -1263,7 +1265,7 @@ void h263_encode_mb(MpegEncContext * s,
 
                 motion_x= s->current_picture.motion_val[0][ s->block_index[i] ][0];
                 motion_y= s->current_picture.motion_val[0][ s->block_index[i] ][1];
-                if (!s->umvplus) {  
+                if (!s->umvplus) {
                     ff_h263_encode_motion_vector(s, motion_x - pred_x,
                                                     motion_y - pred_y, 1);
                 }
@@ -1282,14 +1284,14 @@ void h263_encode_mb(MpegEncContext * s,
         }
     } else {
         assert(s->mb_intra);
-        
+
         cbp = 0;
         if (s->h263_aic) {
             /* Predict DC */
             for(i=0; i<6; i++) {
                 int16_t level = block[i][0];
                 int scale;
-                
+
                 if(i<4) scale= s->y_dc_scale;
                 else    scale= s->c_dc_scale;
 
@@ -1300,7 +1302,7 @@ void h263_encode_mb(MpegEncContext * s,
                     level = (level + (scale>>1))/scale;
                 else
                     level = (level - (scale>>1))/scale;
-                    
+
                 /* AIC can change CBP */
                 if (level == 0 && s->block_last_index[i] == 0)
                     s->block_last_index[i] = -1;
@@ -1313,7 +1315,7 @@ void h263_encode_mb(MpegEncContext * s,
                 }
 
                 block[i][0] = level;
-                /* Reconstruction */ 
+                /* Reconstruction */
                 rec_intradc[i] = scale*level + pred_dc;
                 /* Oddify */
                 rec_intradc[i] |= 1;
@@ -1324,7 +1326,7 @@ void h263_encode_mb(MpegEncContext * s,
                     rec_intradc[i] = 0;
                 else if (rec_intradc[i] > 2047)
                     rec_intradc[i] = 2047;
-                                
+
                 /* Update AC/DC tables */
                 *dc_ptr[i] = rec_intradc[i];
                 if (s->block_last_index[i] >= 0)
@@ -1346,14 +1348,14 @@ void h263_encode_mb(MpegEncContext * s,
                 intra_MCBPC_code[cbpc]);
         } else {
             if(s->dquant) cbpc+=8;
-            put_bits(&s->pb, 1, 0);	/* mb coded */
+            put_bits(&s->pb, 1, 0);     /* mb coded */
             put_bits(&s->pb,
                 inter_MCBPC_bits[cbpc + 4],
                 inter_MCBPC_code[cbpc + 4]);
         }
         if (s->h263_aic) {
             /* XXX: currently, we do not try to use ac prediction */
-            put_bits(&s->pb, 1, 0);	/* no AC prediction */
+            put_bits(&s->pb, 1, 0);     /* no AC prediction */
         }
         cbpy = cbp >> 2;
         put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
@@ -1368,11 +1370,11 @@ void h263_encode_mb(MpegEncContext * s,
     for(i=0; i<6; i++) {
         /* encode each block */
         h263_encode_block(s, block[i], i);
-    
+
         /* Update INTRADC for decoding */
         if (s->h263_aic && s->mb_intra) {
             block[i][0] = rec_intradc[i];
-            
+
         }
     }
 
@@ -1396,7 +1398,7 @@ void ff_h263_loop_filter(MpegEncContext * s){
     uint8_t *dest_y = s->dest[0];
     uint8_t *dest_cb= s->dest[1];
     uint8_t *dest_cr= s->dest[2];
-    
+
 //    if(s->pict_type==B_TYPE && !s->readable) return;
 
     /*
@@ -1415,32 +1417,32 @@ void ff_h263_loop_filter(MpegEncContext * s){
 
         if(IS_SKIP(s->current_picture.mb_type[xy-s->mb_stride]))
             qp_t=0;
-        else 
+        else
             qp_t= s->current_picture.qscale_table[xy-s->mb_stride];
 
-        if(qp_c) 
+        if(qp_c)
             qp_tc= qp_c;
         else
             qp_tc= qp_t;
-            
+
         if(qp_tc){
             const int chroma_qp= s->chroma_qscale_table[qp_tc];
             s->dsp.h263_v_loop_filter(dest_y  ,   linesize, qp_tc);
             s->dsp.h263_v_loop_filter(dest_y+8,   linesize, qp_tc);
-        
+
             s->dsp.h263_v_loop_filter(dest_cb , uvlinesize, chroma_qp);
             s->dsp.h263_v_loop_filter(dest_cr , uvlinesize, chroma_qp);
         }
-        
+
         if(qp_t)
             s->dsp.h263_h_loop_filter(dest_y-8*linesize+8  ,   linesize, qp_t);
-        
+
         if(s->mb_x){
             if(qp_t || IS_SKIP(s->current_picture.mb_type[xy-1-s->mb_stride]))
                 qp_dt= qp_t;
             else
                 qp_dt= s->current_picture.qscale_table[xy-1-s->mb_stride];
-            
+
             if(qp_dt){
                 const int chroma_qp= s->chroma_qscale_table[qp_dt];
                 s->dsp.h263_h_loop_filter(dest_y -8*linesize  ,   linesize, qp_dt);
@@ -1455,14 +1457,14 @@ void ff_h263_loop_filter(MpegEncContext * s){
         if(s->mb_y + 1 == s->mb_height)
             s->dsp.h263_h_loop_filter(dest_y+8*linesize+8,   linesize, qp_c);
     }
-    
+
     if(s->mb_x){
         int qp_lc;
         if(qp_c || IS_SKIP(s->current_picture.mb_type[xy-1]))
             qp_lc= qp_c;
         else
             qp_lc= s->current_picture.qscale_table[xy-1];
-        
+
         if(qp_lc){
             s->dsp.h263_h_loop_filter(dest_y,   linesize, qp_lc);
             if(s->mb_y + 1 == s->mb_height){
@@ -1497,11 +1499,11 @@ static int h263_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr)
         scale = s->c_dc_scale;
     }
     /* B C
-     * A X 
+     * A X
      */
     a = dc_val[(x - 1) + (y) * wrap];
     c = dc_val[(x) + (y - 1) * wrap];
-    
+
     /* No prediction outside GOB boundary */
     if(s->first_slice_line && n!=3){
         if(n!=2) c= 1024;
@@ -1515,7 +1517,7 @@ static int h263_pred_dc(MpegEncContext * s, int n, uint16_t **dc_val_ptr)
         pred_dc = a;
     else
         pred_dc = c;
-    
+
     /* we assume pred is positive */
     //pred_dc = (pred_dc + (scale >> 1)) / scale;
     *dc_val_ptr = &dc_val[x + y * wrap];
@@ -1543,22 +1545,22 @@ static void h263_pred_acdc(MpegEncContext * s, DCTELEM *block, int n)
         ac_val = s->ac_val[n - 4 + 1][0];
         scale = s->c_dc_scale;
     }
-    
+
     ac_val += ((y) * wrap + (x)) * 16;
     ac_val1 = ac_val;
-    
+
     /* B C
-     * A X 
+     * A X
      */
     a = dc_val[(x - 1) + (y) * wrap];
     c = dc_val[(x) + (y - 1) * wrap];
-    
+
     /* No prediction outside GOB boundary */
     if(s->first_slice_line && n!=3){
         if(n!=2) c= 1024;
         if(n!=1 && s->mb_x == s->resync_mb_x) a= 1024;
     }
-    
+
     if (s->ac_pred) {
         pred_dc = 1024;
         if (s->h263_aic_dir) {
@@ -1589,18 +1591,18 @@ static void h263_pred_acdc(MpegEncContext * s, DCTELEM *block, int n)
         else
             pred_dc = c;
     }
-    
+
     /* we assume pred is positive */
     block[0]=block[0]*scale + pred_dc;
-    
+
     if (block[0] < 0)
         block[0] = 0;
-    else 
+    else
         block[0] |= 1;
-    
+
     /* Update AC/DC tables */
     dc_val[(x) + (y) * wrap] = block[0];
-    
+
     /* left copy */
     for(i=1;i<8;i++)
         ac_val1[i    ] = block[s->dsp.idct_permutation[i<<3]];
@@ -1654,7 +1656,7 @@ int16_t *h263_pred_motion(MpegEncContext * s, int block, int dir,
             C = mot_val[off[block] - wrap];
             if(s->mb_x == s->resync_mb_x) //rare
                 A[0]=A[1]=0;
-    
+
             *px = mid_pred(A[0], B[0], C[0]);
             *py = mid_pred(A[1], B[1], C[1]);
         }
@@ -1690,7 +1692,7 @@ void ff_h263_encode_motion(MpegEncContext * s, int val, int f_code)
         code = (val >> bit_size) + 1;
         bits = val & (range - 1);
 
-        put_bits(&s->pb, mvtab[code][1] + 1, (mvtab[code][0] << 1) | sign); 
+        put_bits(&s->pb, mvtab[code][1] + 1, (mvtab[code][0] << 1) | sign);
         if (bit_size > 0) {
             put_bits(&s->pb, bit_size, bits);
         }
@@ -1700,13 +1702,13 @@ void ff_h263_encode_motion(MpegEncContext * s, int val, int f_code)
 /* Encode MV differences on H.263+ with Unrestricted MV mode */
 static void h263p_encode_umotion(MpegEncContext * s, int val)
 {
-    short sval = 0; 
+    short sval = 0;
     short i = 0;
     short n_bits = 0;
     short temp_val;
     int code = 0;
     int tcode;
-    
+
     if ( val == 0)
         put_bits(&s->pb, 1, 1);
     else if (val == 1)
@@ -1714,15 +1716,15 @@ static void h263p_encode_umotion(MpegEncContext * s, int val)
     else if (val == -1)
         put_bits(&s->pb, 3, 2);
     else {
-        
+
         sval = ((val < 0) ? (short)(-val):(short)val);
         temp_val = sval;
-        
+
         while (temp_val != 0) {
             temp_val = temp_val >> 1;
             n_bits++;
         }
-        
+
         i = n_bits - 1;
         while (i > 0) {
             tcode = (sval & (1 << (i-1))) >> (i-1);
@@ -1740,10 +1742,10 @@ static void init_mv_penalty_and_fcode(MpegEncContext *s)
 {
     int f_code;
     int mv;
-    
+
     if(mv_penalty==NULL)
         mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
-    
+
     for(f_code=1; f_code<=MAX_FCODE; f_code++){
         for(mv=-MAX_MV; mv<=MAX_MV; mv++){
             int len;
@@ -1756,14 +1758,14 @@ static void init_mv_penalty_and_fcode(MpegEncContext *s)
                 range = 1 << bit_size;
 
                 val=mv;
-                if (val < 0) 
+                if (val < 0)
                     val = -val;
                 val--;
                 code = (val >> bit_size) + 1;
                 if(code<33){
                     len= mvtab[code][1] + 1 + bit_size;
                 }else{
-                    len= mvtab[32][1] + 2 + bit_size;
+                    len= mvtab[32][1] + av_log2(code>>5) + 2 + bit_size;
                 }
             }
 
@@ -1796,7 +1798,7 @@ static void init_uni_dc_tab(void)
         v = abs(level);
         while (v) {
             v >>= 1;
-	    size++;
+            size++;
         }
 
         if (level < 0)
@@ -1822,7 +1824,7 @@ static void init_uni_dc_tab(void)
         /* chrominance */
         uni_code= DCtab_chrom[size][0];
         uni_len = DCtab_chrom[size][1];
-        
+
         if (size > 0) {
             uni_code<<=size; uni_code|=l;
             uni_len+=size;
@@ -1842,7 +1844,7 @@ static void init_uni_dc_tab(void)
 #ifdef CONFIG_ENCODERS
 static void init_uni_mpeg4_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_tab){
     int slevel, run, last;
-    
+
     assert(MAX_LEVEL >= 64);
     assert(MAX_RUN   >= 63);
 
@@ -1855,15 +1857,15 @@ static void init_uni_mpeg4_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_
                 int sign= slevel < 0 ? 1 : 0;
                 int bits, len, code;
                 int level1, run1;
-                
+
                 len_tab[index]= 100;
-                     
+
                 /* ESC0 */
                 code= get_rl_index(rl, last, run, level);
                 bits= rl->table_vlc[code][0];
                 len=  rl->table_vlc[code][1];
                 bits=bits*2+sign; len++;
-                
+
                 if(code!=rl->n && len < len_tab[index]){
                     bits_tab[index]= bits;
                     len_tab [index]= len;
@@ -1880,13 +1882,13 @@ static void init_uni_mpeg4_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_
                     len  += rl->table_vlc[code][1];
                     bits += rl->table_vlc[code][0];
                     bits=bits*2+sign; len++;
-                
+
                     if(code!=rl->n && len < len_tab[index]){
                         bits_tab[index]= bits;
                         len_tab [index]= len;
                     }
                 }
-#endif 
+#endif
 #if 1
                 /* ESC2 */
                 bits= rl->table_vlc[rl->n][0];
@@ -1899,14 +1901,14 @@ static void init_uni_mpeg4_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_
                     len  += rl->table_vlc[code][1];
                     bits += rl->table_vlc[code][0];
                     bits=bits*2+sign; len++;
-                
+
                     if(code!=rl->n && len < len_tab[index]){
                         bits_tab[index]= bits;
                         len_tab [index]= len;
                     }
                 }
-#endif           
-                /* ESC3 */        
+#endif
+                /* ESC3 */
                 bits= rl->table_vlc[rl->n][0];
                 len = rl->table_vlc[rl->n][1];
                 bits=bits*4+3;    len+=2; //esc3
@@ -1915,7 +1917,7 @@ static void init_uni_mpeg4_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_
                 bits=bits*2+1;    len++;  //marker
                 bits=bits*4096+(slevel&0xfff); len+=12;
                 bits=bits*2+1;    len++;  //marker
-                
+
                 if(len < len_tab[index]){
                     bits_tab[index]= bits;
                     len_tab [index]= len;
@@ -1927,7 +1929,7 @@ static void init_uni_mpeg4_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_
 
 static void init_uni_h263_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_tab){
     int slevel, run, last;
-    
+
     assert(MAX_LEVEL >= 64);
     assert(MAX_RUN   >= 63);
 
@@ -1939,15 +1941,15 @@ static void init_uni_h263_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_t
                 int level= slevel < 0 ? -slevel : slevel;
                 int sign= slevel < 0 ? 1 : 0;
                 int bits, len, code;
-                
+
                 len_tab[index]= 100;
-                     
+
                 /* ESC0 */
                 code= get_rl_index(rl, last, run, level);
                 bits= rl->table_vlc[code][0];
                 len=  rl->table_vlc[code][1];
                 bits=bits*2+sign; len++;
-                
+
                 if(code!=rl->n && len < len_tab[index]){
                     if(bits_tab) bits_tab[index]= bits;
                     len_tab [index]= len;
@@ -1958,7 +1960,7 @@ static void init_uni_h263_rl_tab(RLTable *rl, uint32_t *bits_tab, uint8_t *len_t
                 bits=bits*2+last; len++;
                 bits=bits*64+run; len+=6;
                 bits=bits*256+(level&0xff); len+=8;
-                
+
                 if(len < len_tab[index]){
                     if(bits_tab) bits_tab[index]= bits;
                     len_tab [index]= len;
@@ -1980,7 +1982,7 @@ void h263_encode_init(MpegEncContext *s)
         init_rl(&rl_inter, 1);
         init_rl(&rl_intra, 1);
         init_rl(&rl_intra_aic, 1);
-        
+
         init_uni_mpeg4_rl_tab(&rl_intra, uni_mpeg4_intra_rl_bits, uni_mpeg4_intra_rl_len);
         init_uni_mpeg4_rl_tab(&rl_inter, uni_mpeg4_inter_rl_bits, uni_mpeg4_inter_rl_len);
 
@@ -1990,7 +1992,7 @@ void h263_encode_init(MpegEncContext *s)
         init_mv_penalty_and_fcode(s);
     }
     s->me.mv_penalty= mv_penalty; //FIXME exact table for msmpeg4 & h263p
-    
+
     s->intra_ac_vlc_length     =s->inter_ac_vlc_length     = uni_h263_inter_rl_len;
     s->intra_ac_vlc_last_length=s->inter_ac_vlc_last_length= uni_h263_inter_rl_len + 128*64;
     if(s->h263_aic){
@@ -2019,7 +2021,7 @@ void h263_encode_init(MpegEncContext *s)
 
             s->avctx->extradata= av_malloc(1024);
             init_put_bits(&s->pb, s->avctx->extradata, 1024);
-            
+
             if(!(s->workaround_bugs & FF_BUG_MS))
                 mpeg4_encode_visual_object_header(s);
             mpeg4_encode_vol_header(s, 0, 0);
@@ -2028,7 +2030,7 @@ void h263_encode_init(MpegEncContext *s)
             flush_put_bits(&s->pb);
             s->avctx->extradata_size= (put_bits_count(&s->pb)+7)>>3;
         }
-        
+
         break;
     case CODEC_ID_H263P:
         if(s->umvplus)
@@ -2041,7 +2043,7 @@ void h263_encode_init(MpegEncContext *s)
             s->max_qcoeff=  127;
         }
         break;
-        //Note for mpeg4 & h263 the dc-scale table will be set per frame as needed later 
+        //Note for mpeg4 & h263 the dc-scale table will be set per frame as needed later
     case CODEC_ID_FLV1:
         if (s->h263_flv > 1) {
             s->min_qcoeff= -1023;
@@ -2094,13 +2096,13 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
         i = 0;
         if (s->h263_aic && s->mb_intra)
             rl = &rl_intra_aic;
-            
+
         if(s->alt_inter_vlc && !s->mb_intra){
             int aic_vlc_bits=0;
             int inter_vlc_bits=0;
             int wrong_pos=-1;
             int aic_code;
-            
+
             last_index = s->block_last_index[n];
             last_non_zero = i - 1;
             for (; i <= last_index; i++) {
@@ -2109,9 +2111,9 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
                 if (level) {
                     run = i - last_non_zero - 1;
                     last = (i == last_index);
-                    
+
                     if(level<0) level= -level;
-                
+
                     code = get_rl_index(rl, last, run, level);
                     aic_code = get_rl_index(&rl_intra_aic, last, run, level);
                     inter_vlc_bits += rl->table_vlc[code][1]+1;
@@ -2119,21 +2121,21 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
 
                     if (code == rl->n) {
                         inter_vlc_bits += 1+6+8-1;
-                    }                
+                    }
                     if (aic_code == rl_intra_aic.n) {
                         aic_vlc_bits += 1+6+8-1;
                         wrong_pos += run + 1;
                     }else
                         wrong_pos += wrong_run[aic_code];
                     last_non_zero = i;
-                }    
+                }
             }
             i = 0;
             if(aic_vlc_bits < inter_vlc_bits && wrong_pos > 63)
                 rl = &rl_intra_aic;
         }
     }
-   
+
     /* AC coefs */
     last_index = s->block_last_index[n];
     last_non_zero = i - 1;
@@ -2155,10 +2157,10 @@ static void h263_encode_block(MpegEncContext * s, DCTELEM * block, int n)
               if(s->h263_flv <= 1){
                 put_bits(&s->pb, 1, last);
                 put_bits(&s->pb, 6, run);
-                
+
                 assert(slevel != 0);
 
-                if(level < 128) 
+                if(level < 128)
                     put_bits(&s->pb, 8, slevel & 0xff);
                 else{
                     put_bits(&s->pb, 8, 128);
@@ -2229,10 +2231,10 @@ void ff_set_mpeg4_time(MpegEncContext * s, int picture_number){
 static void mpeg4_encode_gop_header(MpegEncContext * s){
     int hours, minutes, seconds;
     int64_t time;
-    
+
     put_bits(&s->pb, 16, 0);
     put_bits(&s->pb, 16, GOP_STARTCODE);
-    
+
     time= s->current_picture_ptr->pts;
     if(s->reordered_input_picture[1])
         time= FFMIN(time, s->reordered_input_picture[1]->pts);
@@ -2247,11 +2249,11 @@ static void mpeg4_encode_gop_header(MpegEncContext * s){
     put_bits(&s->pb, 6, minutes);
     put_bits(&s->pb, 1, 1);
     put_bits(&s->pb, 6, seconds);
-    
-    put_bits(&s->pb, 1, !!(s->flags&CODEC_FLAG_CLOSED_GOP)); 
+
+    put_bits(&s->pb, 1, !!(s->flags&CODEC_FLAG_CLOSED_GOP));
     put_bits(&s->pb, 1, 0); //broken link == NO
-    
-    s->last_time_base= time / s->avctx->time_base.den; 
+
+    s->last_time_base= time / s->avctx->time_base.den;
 
     ff_mpeg4_stuffing(&s->pb);
 }
@@ -2259,7 +2261,7 @@ static void mpeg4_encode_gop_header(MpegEncContext * s){
 static void mpeg4_encode_visual_object_header(MpegEncContext * s){
     int profile_and_level_indication;
     int vo_ver_id;
-    
+
     if(s->avctx->profile != FF_PROFILE_UNKNOWN){
         profile_and_level_indication = s->avctx->profile << 4;
     }else if(s->max_b_frames || s->quarter_sample){
@@ -2289,13 +2291,13 @@ static void mpeg4_encode_visual_object_header(MpegEncContext * s){
 
     put_bits(&s->pb, 16, 0);
     put_bits(&s->pb, 16, VISUAL_OBJ_STARTCODE);
-    
+
     put_bits(&s->pb, 1, 1);
         put_bits(&s->pb, 4, vo_ver_id);
         put_bits(&s->pb, 3, 1); //priority
- 
+
     put_bits(&s->pb, 4, 1); //visual obj type== video obj
-    
+
     put_bits(&s->pb, 1, 0); //video signal type == no clue //FIXME
 
     ff_mpeg4_stuffing(&s->pb);
@@ -2318,16 +2320,16 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n
     put_bits(&s->pb, 16, 0);
     put_bits(&s->pb, 16, 0x120 + vol_number);       /* video obj layer */
 
-    put_bits(&s->pb, 1, 0);		/* random access vol */
-    put_bits(&s->pb, 8, s->vo_type);	/* video obj type indication */
+    put_bits(&s->pb, 1, 0);             /* random access vol */
+    put_bits(&s->pb, 8, s->vo_type);    /* video obj type indication */
     if(s->workaround_bugs & FF_BUG_MS) {
-        put_bits(&s->pb, 1, 0);        /* is obj layer id= no */
+        put_bits(&s->pb, 1, 0);         /* is obj layer id= no */
     } else {
-        put_bits(&s->pb, 1, 1);        /* is obj layer id= yes */
-        put_bits(&s->pb, 4, vo_ver_id);    /* is obj layer ver id */
-        put_bits(&s->pb, 3, 1);        /* is obj layer priority */
+        put_bits(&s->pb, 1, 1);         /* is obj layer id= yes */
+        put_bits(&s->pb, 4, vo_ver_id); /* is obj layer ver id */
+        put_bits(&s->pb, 3, 1);         /* is obj layer priority */
     }
-    
+
     aspect_to_info(s, s->avctx->sample_aspect_ratio);
 
     put_bits(&s->pb, 4, s->aspect_ratio_info);/* aspect ratio info */
@@ -2337,37 +2339,37 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n
     }
 
     if(s->workaround_bugs & FF_BUG_MS) { //
-        put_bits(&s->pb, 1, 0);        /* vol control parameters= no @@@ */
+        put_bits(&s->pb, 1, 0);         /* vol control parameters= no @@@ */
     } else {
-        put_bits(&s->pb, 1, 1);        /* vol control parameters= yes */
-        put_bits(&s->pb, 2, 1);        /* chroma format YUV 420/YV12 */
+        put_bits(&s->pb, 1, 1);         /* vol control parameters= yes */
+        put_bits(&s->pb, 2, 1);         /* chroma format YUV 420/YV12 */
         put_bits(&s->pb, 1, s->low_delay);
-        put_bits(&s->pb, 1, 0);        /* vbv parameters= no */
+        put_bits(&s->pb, 1, 0);         /* vbv parameters= no */
     }
 
-    put_bits(&s->pb, 2, RECT_SHAPE);	/* vol shape= rectangle */
-    put_bits(&s->pb, 1, 1);		/* marker bit */
-    
+    put_bits(&s->pb, 2, RECT_SHAPE);    /* vol shape= rectangle */
+    put_bits(&s->pb, 1, 1);             /* marker bit */
+
     put_bits(&s->pb, 16, s->avctx->time_base.den);
     if (s->time_increment_bits < 1)
         s->time_increment_bits = 1;
-    put_bits(&s->pb, 1, 1);		/* marker bit */
-    put_bits(&s->pb, 1, 0);		/* fixed vop rate=no */
-    put_bits(&s->pb, 1, 1);		/* marker bit */
-    put_bits(&s->pb, 13, s->width);	/* vol width */
-    put_bits(&s->pb, 1, 1);		/* marker bit */
-    put_bits(&s->pb, 13, s->height);	/* vol height */
-    put_bits(&s->pb, 1, 1);		/* marker bit */
+    put_bits(&s->pb, 1, 1);             /* marker bit */
+    put_bits(&s->pb, 1, 0);             /* fixed vop rate=no */
+    put_bits(&s->pb, 1, 1);             /* marker bit */
+    put_bits(&s->pb, 13, s->width);     /* vol width */
+    put_bits(&s->pb, 1, 1);             /* marker bit */
+    put_bits(&s->pb, 13, s->height);    /* vol height */
+    put_bits(&s->pb, 1, 1);             /* marker bit */
     put_bits(&s->pb, 1, s->progressive_sequence ? 0 : 1);
-    put_bits(&s->pb, 1, 1);		/* obmc disable */
+    put_bits(&s->pb, 1, 1);             /* obmc disable */
     if (vo_ver_id == 1) {
-        put_bits(&s->pb, 1, s->vol_sprite_usage);		/* sprite enable */
+        put_bits(&s->pb, 1, s->vol_sprite_usage);       /* sprite enable */
     }else{
-        put_bits(&s->pb, 2, s->vol_sprite_usage);		/* sprite enable */
+        put_bits(&s->pb, 2, s->vol_sprite_usage);       /* sprite enable */
     }
-    
-    put_bits(&s->pb, 1, 0);		/* not 8 bit == false */
-    put_bits(&s->pb, 1, s->mpeg_quant);	/* quant type= (0=h263 style)*/
+
+    put_bits(&s->pb, 1, 0);             /* not 8 bit == false */
+    put_bits(&s->pb, 1, s->mpeg_quant); /* quant type= (0=h263 style)*/
 
     if(s->mpeg_quant){
         ff_write_quant_matrix(&s->pb, s->avctx->intra_matrix);
@@ -2376,27 +2378,27 @@ static void mpeg4_encode_vol_header(MpegEncContext * s, int vo_number, int vol_n
 
     if (vo_ver_id != 1)
         put_bits(&s->pb, 1, s->quarter_sample);
-    put_bits(&s->pb, 1, 1);		/* complexity estimation disable */
+    put_bits(&s->pb, 1, 1);             /* complexity estimation disable */
     s->resync_marker= s->rtp_mode;
     put_bits(&s->pb, 1, s->resync_marker ? 0 : 1);/* resync marker disable */
     put_bits(&s->pb, 1, s->data_partitioning ? 1 : 0);
     if(s->data_partitioning){
-        put_bits(&s->pb, 1, 0);		/* no rvlc */
+        put_bits(&s->pb, 1, 0);         /* no rvlc */
     }
 
     if (vo_ver_id != 1){
-        put_bits(&s->pb, 1, 0);		/* newpred */
-        put_bits(&s->pb, 1, 0);		/* reduced res vop */
+        put_bits(&s->pb, 1, 0);         /* newpred */
+        put_bits(&s->pb, 1, 0);         /* reduced res vop */
     }
-    put_bits(&s->pb, 1, 0);		/* scalability */
-    
+    put_bits(&s->pb, 1, 0);             /* scalability */
+
     ff_mpeg4_stuffing(&s->pb);
 
     /* user data */
     if(!(s->flags & CODEC_FLAG_BITEXACT)){
         put_bits(&s->pb, 16, 0);
-        put_bits(&s->pb, 16, 0x1B2);	/* user_data */
-	ff_put_string(&s->pb, LIBAVCODEC_IDENT, 0);
+        put_bits(&s->pb, 16, 0x1B2);    /* user_data */
+        ff_put_string(&s->pb, LIBAVCODEC_IDENT, 0);
     }
 }
 
@@ -2405,7 +2407,7 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
 {
     int time_incr;
     int time_div, time_mod;
-    
+
     if(s->pict_type==I_TYPE){
         if(!(s->flags&CODEC_FLAG_GLOBAL_HEADER)){
             if(s->strict_std_compliance < FF_COMPLIANCE_VERY_STRICT) //HACK, the reference sw is buggy
@@ -2416,14 +2418,14 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
         if(!(s->workaround_bugs & FF_BUG_MS))
             mpeg4_encode_gop_header(s);
     }
-    
+
     s->partitioned_frame= s->data_partitioning && s->pict_type!=B_TYPE;
 
 //printf("num:%d rate:%d base:%d\n", s->picture_number, s->time_base.den, FRAME_RATE_BASE);
-    
-    put_bits(&s->pb, 16, 0);	        /* vop header */
-    put_bits(&s->pb, 16, VOP_STARTCODE);	/* vop header */
-    put_bits(&s->pb, 2, s->pict_type - 1);	/* pict type: I = 0 , P = 1 */
+
+    put_bits(&s->pb, 16, 0);                /* vop header */
+    put_bits(&s->pb, 16, VOP_STARTCODE);    /* vop header */
+    put_bits(&s->pb, 2, s->pict_type - 1);  /* pict type: I = 0 , P = 1 */
 
     assert(s->time>=0);
     time_div= s->time/s->avctx->time_base.den;
@@ -2432,18 +2434,18 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
     assert(time_incr >= 0);
     while(time_incr--)
         put_bits(&s->pb, 1, 1);
-        
+
     put_bits(&s->pb, 1, 0);
 
-    put_bits(&s->pb, 1, 1);	/* marker */
-    put_bits(&s->pb, s->time_increment_bits, time_mod);	/* time increment */
-    put_bits(&s->pb, 1, 1);	/* marker */
-    put_bits(&s->pb, 1, 1);	/* vop coded */
-    if (    s->pict_type == P_TYPE 
+    put_bits(&s->pb, 1, 1);                             /* marker */
+    put_bits(&s->pb, s->time_increment_bits, time_mod); /* time increment */
+    put_bits(&s->pb, 1, 1);                             /* marker */
+    put_bits(&s->pb, 1, 1);                             /* vop coded */
+    if (    s->pict_type == P_TYPE
         || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE)) {
-	put_bits(&s->pb, 1, s->no_rounding);	/* rounding type */
+        put_bits(&s->pb, 1, s->no_rounding);    /* rounding type */
     }
-    put_bits(&s->pb, 3, 0);	/* intra dc VLC threshold */
+    put_bits(&s->pb, 3, 0);     /* intra dc VLC threshold */
     if(!s->progressive_sequence){
          put_bits(&s->pb, 1, s->current_picture_ptr->top_field_first);
          put_bits(&s->pb, 1, s->alternate_scan);
@@ -2453,9 +2455,9 @@ void mpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
     put_bits(&s->pb, 5, s->qscale);
 
     if (s->pict_type != I_TYPE)
-	put_bits(&s->pb, 3, s->f_code);	/* fcode_for */
+        put_bits(&s->pb, 3, s->f_code); /* fcode_for */
     if (s->pict_type == B_TYPE)
-	put_bits(&s->pb, 3, s->b_code);	/* fcode_back */
+        put_bits(&s->pb, 3, s->b_code); /* fcode_back */
     //    printf("****frame %d\n", picture_number);
 }
 
@@ -2470,7 +2472,7 @@ void ff_set_qscale(MpegEncContext * s, int qscale)
         qscale = 1;
     else if (qscale > 31)
         qscale = 31;
-        
+
     s->qscale = qscale;
     s->chroma_qscale= s->chroma_qscale_table[qscale];
 
@@ -2481,7 +2483,7 @@ void ff_set_qscale(MpegEncContext * s, int qscale)
 /**
  * predicts the dc.
  * encoding quantized level -> quantized diff
- * decoding quantized diff -> quantized level  
+ * decoding quantized diff -> quantized level
  * @param n block index (0-3 are luma, 4-5 are chroma)
  * @param dir_ptr pointer to an integer where the prediction direction will be stored
  */
@@ -2492,9 +2494,9 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *di
 
     /* find prediction */
     if (n < 4) {
-	scale = s->y_dc_scale;
+        scale = s->y_dc_scale;
     } else {
-	scale = s->c_dc_scale;
+        scale = s->c_dc_scale;
     }
     if(IS_3IV1)
         scale= 8;
@@ -2503,7 +2505,7 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *di
     dc_val = s->dc_val[0] + s->block_index[n];
 
     /* B C
-     * A X 
+     * A X
      */
     a = dc_val[ - 1];
     b = dc_val[ - 1 - wrap];
@@ -2520,10 +2522,10 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *di
     }
 
     if (abs(a - b) < abs(b - c)) {
-	pred = c;
+        pred = c;
         *dir_ptr = 1; /* top */
     } else {
-	pred = a;
+        pred = a;
         *dir_ptr = 0; /* left */
     }
     /* we assume pred is positive */
@@ -2547,7 +2549,7 @@ static inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, int level, int *di
     }
     level *=scale;
     if(level&(~2047)){
-        if(level<0) 
+        if(level<0)
             level=0;
         else if(!(s->workaround_bugs&FF_BUG_DC_CLIP))
             level=2047;
@@ -2577,7 +2579,7 @@ void mpeg4_pred_ac(MpegEncContext * s, DCTELEM *block, int n,
             const int xy= s->mb_x-1 + s->mb_y*s->mb_stride;
             /* left prediction */
             ac_val -= 16;
-            
+
             if(s->mb_x==0 || s->qscale == qscale_table[xy] || n==1 || n==3){
                 /* same qscale */
                 for(i=1;i<8;i++) {
@@ -2629,11 +2631,11 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n)
 //    if(level<-255 || level>255) printf("dc overflow\n");
     level+=256;
     if (n < 4) {
-	/* luminance */
-	put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]);
+        /* luminance */
+        put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]);
     } else {
-	/* chrominance */
-	put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]);
+        /* chrominance */
+        put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]);
     }
 #else
     int size, v;
@@ -2641,25 +2643,25 @@ static inline void mpeg4_encode_dc(PutBitContext * s, int level, int n)
     size = 0;
     v = abs(level);
     while (v) {
-	v >>= 1;
-	size++;
+        v >>= 1;
+        size++;
     }
 
     if (n < 4) {
-	/* luminance */
-	put_bits(&s->pb, DCtab_lum[size][1], DCtab_lum[size][0]);
+        /* luminance */
+        put_bits(&s->pb, DCtab_lum[size][1], DCtab_lum[size][0]);
     } else {
-	/* chrominance */
-	put_bits(&s->pb, DCtab_chrom[size][1], DCtab_chrom[size][0]);
+        /* chrominance */
+        put_bits(&s->pb, DCtab_chrom[size][1], DCtab_chrom[size][0]);
     }
 
     /* encode remaining bits */
     if (size > 0) {
-	if (level < 0)
-	    level = (-level) ^ ((1 << size) - 1);
-	put_bits(&s->pb, size, level);
-	if (size > 8)
-	    put_bits(&s->pb, 1, 1);
+        if (level < 0)
+            level = (-level) ^ ((1 << size) - 1);
+        put_bits(&s->pb, size, level);
+        if (size > 8)
+            put_bits(&s->pb, 1, 1);
     }
 #endif
 }
@@ -2676,7 +2678,7 @@ static inline int mpeg4_get_dc_length(int level, int n){
  * encodes a 8x8 block
  * @param n block index (0-3 are luma, 4-5 are chroma)
  */
-static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, 
+static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n, int intra_dc,
                                uint8_t *scan_table, PutBitContext *dc_pb, PutBitContext *ac_pb)
 {
     int i, last_non_zero;
@@ -2689,16 +2691,16 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
     const int last_index = s->block_last_index[n];
 
     if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away
-	/* mpeg4 based DC predictor */
-	mpeg4_encode_dc(dc_pb, intra_dc, n);
+        /* mpeg4 based DC predictor */
+        mpeg4_encode_dc(dc_pb, intra_dc, n);
         if(last_index<1) return;
-	i = 1;
+        i = 1;
         rl = &rl_intra;
         bits_tab= uni_mpeg4_intra_rl_bits;
         len_tab = uni_mpeg4_intra_rl_len;
     } else {
         if(last_index<0) return;
-	i = 0;
+        i = 0;
         rl = &rl_inter;
         bits_tab= uni_mpeg4_inter_rl_bits;
         len_tab = uni_mpeg4_inter_rl_len;
@@ -2708,9 +2710,9 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
     last_non_zero = i - 1;
 #if 1
     for (; i < last_index; i++) {
-	int level = block[ scan_table[i] ];
-	if (level) {
-	    int run = i - last_non_zero - 1;
+        int level = block[ scan_table[i] ];
+        if (level) {
+            int run = i - last_non_zero - 1;
             level+=64;
             if((level&(~127)) == 0){
                 const int index= UNI_MPEG4_ENC_INDEX(0, run, level);
@@ -2718,11 +2720,11 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
             }else{ //ESC3
                 put_bits(ac_pb, 7+2+1+6+1+12+1, (3<<23)+(3<<21)+(0<<20)+(run<<14)+(1<<13)+(((level-64)&0xfff)<<1)+1);
             }
-	    last_non_zero = i;
-	}
+            last_non_zero = i;
+        }
     }
     /*if(i<=last_index)*/{
-	int level = block[ scan_table[i] ];
+        int level = block[ scan_table[i] ];
         int run = i - last_non_zero - 1;
         level+=64;
         if((level&(~127)) == 0){
@@ -2734,23 +2736,23 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
     }
 #else
     for (; i <= last_index; i++) {
-	const int slevel = block[ scan_table[i] ];
-	if (slevel) {
+        const int slevel = block[ scan_table[i] ];
+        if (slevel) {
             int level;
-	    int run = i - last_non_zero - 1;
-	    last = (i == last_index);
-	    sign = 0;
-	    level = slevel;
-	    if (level < 0) {
-		sign = 1;
-		level = -level;
-	    }
+            int run = i - last_non_zero - 1;
+            last = (i == last_index);
+            sign = 0;
+            level = slevel;
+            if (level < 0) {
+                sign = 1;
+                level = -level;
+            }
             code = get_rl_index(rl, last, run, level);
             put_bits(ac_pb, rl->table_vlc[code][1], rl->table_vlc[code][0]);
             if (code == rl->n) {
                 int level1, run1;
                 level1 = level - rl->max_level[last][run];
-                if (level1 < 1) 
+                if (level1 < 1)
                     goto esc2;
                 code = get_rl_index(rl, last, run, level1);
                 if (code == rl->n) {
@@ -2786,13 +2788,13 @@ static inline void mpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int n
             } else {
                 put_bits(ac_pb, 1, sign);
             }
-	    last_non_zero = i;
-	}
+            last_non_zero = i;
+        }
     }
 #endif
 }
 
-static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc, 
+static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, int intra_dc,
                                uint8_t *scan_table)
 {
     int i, last_non_zero;
@@ -2802,15 +2804,15 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in
     int len=0;
 
     if (s->mb_intra) { //Note gcc (3.2.1 at least) will optimize this away
-	/* mpeg4 based DC predictor */
-	len += mpeg4_get_dc_length(intra_dc, n);
+        /* mpeg4 based DC predictor */
+        len += mpeg4_get_dc_length(intra_dc, n);
         if(last_index<1) return len;
-	i = 1;
+        i = 1;
         rl = &rl_intra;
         len_tab = uni_mpeg4_intra_rl_len;
     } else {
         if(last_index<0) return 0;
-	i = 0;
+        i = 0;
         rl = &rl_inter;
         len_tab = uni_mpeg4_inter_rl_len;
     }
@@ -2818,9 +2820,9 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in
     /* AC coefs */
     last_non_zero = i - 1;
     for (; i < last_index; i++) {
-	int level = block[ scan_table[i] ];
-	if (level) {
-	    int run = i - last_non_zero - 1;
+        int level = block[ scan_table[i] ];
+        if (level) {
+            int run = i - last_non_zero - 1;
             level+=64;
             if((level&(~127)) == 0){
                 const int index= UNI_MPEG4_ENC_INDEX(0, run, level);
@@ -2828,11 +2830,11 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in
             }else{ //ESC3
                 len += 7+2+1+6+1+12+1;
             }
-	    last_non_zero = i;
-	}
+            last_non_zero = i;
+        }
     }
     /*if(i<=last_index)*/{
-	int level = block[ scan_table[i] ];
+        int level = block[ scan_table[i] ];
         int run = i - last_non_zero - 1;
         level+=64;
         if((level&(~127)) == 0){
@@ -2842,7 +2844,7 @@ static int mpeg4_get_block_length(MpegEncContext * s, DCTELEM * block, int n, in
             len += 7+2+1+6+1+12+1;
         }
     }
-    
+
     return len;
 }
 
@@ -2865,25 +2867,25 @@ static VLC cbpc_b_vlc;
 void init_vlc_rl(RLTable *rl, int use_static)
 {
     int i, q;
- 
+
     /* Return if static table is already initialized */
     if(use_static && rl->rl_vlc[0])
-        return;    
+        return;
 
-    init_vlc(&rl->vlc, 9, rl->n + 1, 
+    init_vlc(&rl->vlc, 9, rl->n + 1,
              &rl->table_vlc[0][1], 4, 2,
              &rl->table_vlc[0][0], 4, 2, use_static);
 
-    
+
     for(q=0; q<32; q++){
         int qmul= q*2;
         int qadd= (q-1)|1;
-        
+
         if(q==0){
             qmul=1;
             qadd=0;
         }
-        if(use_static)        
+        if(use_static)
             rl->rl_vlc[q]= av_mallocz_static(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
         else
             rl->rl_vlc[q]= av_malloc(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
@@ -2891,7 +2893,7 @@ void init_vlc_rl(RLTable *rl, int use_static)
             int code= rl->vlc.table[i][0];
             int len = rl->vlc.table[i][1];
             int level, run;
-        
+
             if(len==0){ // illegal code
                 run= 66;
                 level= MAX_LEVEL;
@@ -2925,10 +2927,10 @@ void h263_decode_init_vlc(MpegEncContext *s)
     if (!done) {
         done = 1;
 
-        init_vlc(&intra_MCBPC_vlc, INTRA_MCBPC_VLC_BITS, 9, 
+        init_vlc(&intra_MCBPC_vlc, INTRA_MCBPC_VLC_BITS, 9,
                  intra_MCBPC_bits, 1, 1,
                  intra_MCBPC_code, 1, 1, 1);
-        init_vlc(&inter_MCBPC_vlc, INTER_MCBPC_VLC_BITS, 28, 
+        init_vlc(&inter_MCBPC_vlc, INTER_MCBPC_VLC_BITS, 28,
                  inter_MCBPC_bits, 1, 1,
                  inter_MCBPC_code, 1, 1, 1);
         init_vlc(&cbpy_vlc, CBPY_VLC_BITS, 16,
@@ -3013,7 +3015,7 @@ static int h263_decode_gob_header(MpegEncContext *s)
 {
     unsigned int val, gfid, gob_number;
     int left;
-    
+
     /* Check for GOB Start Code */
     val = show_bits(&s->gb, 16);
     if(val)
@@ -3026,7 +3028,7 @@ static int h263_decode_gob_header(MpegEncContext *s)
     for(;left>13; left--){
         if(get_bits1(&s->gb)) break; /* Seek the '1' bit */
     }
-    if(left<=13) 
+    if(left<=13)
         return -1;
 
     if(s->h263_slice_structured){
@@ -3038,7 +3040,7 @@ static int h263_decode_gob_header(MpegEncContext *s)
         if(s->mb_num > 1583)
             if(get_bits1(&s->gb)==0)
                 return -1;
-        
+
         s->qscale = get_bits(&s->gb, 5); /* SQUANT */
         if(get_bits1(&s->gb)==0)
             return -1;
@@ -3050,11 +3052,11 @@ static int h263_decode_gob_header(MpegEncContext *s)
         gfid = get_bits(&s->gb, 2); /* GFID */
         s->qscale = get_bits(&s->gb, 5); /* GQUANT */
     }
-        
-    if(s->mb_y >= s->mb_height) 
+
+    if(s->mb_y >= s->mb_height)
         return -1;
 
-    if(s->qscale==0) 
+    if(s->qscale==0)
         return -1;
 
     return 0;
@@ -3076,7 +3078,7 @@ void ff_mpeg4_init_partitions(MpegEncContext *s)
     int size= end - start;
     int pb_size = (((long)start + size/3)&(~3)) - (long)start;
     int tex_size= (size - 2*pb_size)&(~3);
-    
+
     set_put_bits_buffer_size(&s->pb, pb_size);
     init_put_bits(&s->tex_pb, start + pb_size           , tex_size);
     init_put_bits(&s->pb2   , start + pb_size + tex_size, pb_size);
@@ -3132,7 +3134,7 @@ void ff_mpeg4_encode_video_packet_header(MpegEncContext *s)
 
     put_bits(&s->pb, ff_mpeg4_get_video_packet_prefix_length(s), 0);
     put_bits(&s->pb, 1, 1);
-    
+
     put_bits(&s->pb, mb_num_bits, s->mb_x + s->mb_y*s->mb_width);
     put_bits(&s->pb, s->quant_precision, s->qscale);
     put_bits(&s->pb, 1, 0); /* no HEC */
@@ -3146,7 +3148,7 @@ void ff_mpeg4_encode_video_packet_header(MpegEncContext *s)
  */
 static inline int mpeg4_is_resync(MpegEncContext *s){
     const int bits_count= get_bits_count(&s->gb);
-    
+
     if(s->workaround_bugs&FF_BUG_NO_PADDING){
         return 0;
     }
@@ -3154,17 +3156,17 @@ static inline int mpeg4_is_resync(MpegEncContext *s){
     if(bits_count + 8 >= s->gb.size_in_bits){
         int v= show_bits(&s->gb, 8);
         v|= 0x7F >> (7-(bits_count&7));
-                
+
         if(v==0x7F)
             return 1;
     }else{
         if(show_bits(&s->gb, 16) == ff_mpeg4_resync_prefix[bits_count&7]){
             int len;
             GetBitContext gb= s->gb;
-        
+
             skip_bits(&s->gb, 1);
             align_get_bits(&s->gb);
-        
+
             for(len=0; len<32; len++){
                 if(get_bits1(&s->gb)) break;
             }
@@ -3186,7 +3188,7 @@ static int mpeg4_decode_video_packet_header(MpegEncContext *s)
 {
     int mb_num_bits= av_log2(s->mb_num - 1) + 1;
     int header_extension=0, mb_num, len;
-    
+
     /* is there enough space left for a video packet + header */
     if( get_bits_count(&s->gb) > s->gb.size_in_bits-20) return -1;
 
@@ -3198,7 +3200,7 @@ static int mpeg4_decode_video_packet_header(MpegEncContext *s)
         av_log(s->avctx, AV_LOG_ERROR, "marker does not match f_code\n");
         return -1;
     }
-    
+
     if(s->shape != RECT_SHAPE){
         header_extension= get_bits1(&s->gb);
         //FIXME more stuff here
@@ -3213,12 +3215,12 @@ static int mpeg4_decode_video_packet_header(MpegEncContext *s)
         while(s->next_picture.mbskip_table[ s->mb_index2xy[ mb_num ] ]) mb_num++;
         if(mb_num >= s->mb_num) return -1; // slice contains just skipped MBs which where allready decoded
     }
-    
+
     s->mb_x= mb_num % s->mb_width;
     s->mb_y= mb_num / s->mb_width;
 
     if(s->shape != BIN_ONLY_SHAPE){
-        int qscale= get_bits(&s->gb, s->quant_precision); 
+        int qscale= get_bits(&s->gb, s->quant_precision);
         if(qscale)
             s->chroma_qscale=s->qscale= qscale;
     }
@@ -3230,13 +3232,13 @@ static int mpeg4_decode_video_packet_header(MpegEncContext *s)
         int time_increment;
         int time_incr=0;
 
-        while (get_bits1(&s->gb) != 0) 
+        while (get_bits1(&s->gb) != 0)
             time_incr++;
 
         check_marker(&s->gb, "before time_increment in video packed header");
         time_increment= get_bits(&s->gb, s->time_increment_bits);
         check_marker(&s->gb, "before vop_coding_type in video packed header");
-        
+
         skip_bits(&s->gb, 2); /* vop coding type */
         //FIXME not rect stuff here
 
@@ -3249,9 +3251,9 @@ static int mpeg4_decode_video_packet_header(MpegEncContext *s)
             }
 
             //FIXME reduced res stuff here
-            
+
             if (s->pict_type != I_TYPE) {
-                int f_code = get_bits(&s->gb, 3);	/* fcode_for */
+                int f_code = get_bits(&s->gb, 3);       /* fcode_for */
                 if(f_code==0){
                     av_log(s->avctx, AV_LOG_ERROR, "Error, video packet header damaged (f_code=0)\n");
                 }
@@ -3261,11 +3263,11 @@ static int mpeg4_decode_video_packet_header(MpegEncContext *s)
                 if(b_code==0){
                     av_log(s->avctx, AV_LOG_ERROR, "Error, video packet header damaged (b_code=0)\n");
                 }
-            }       
+            }
         }
     }
     //FIXME new-pred stuff
-    
+
 //printf("parse ok %d %d %d %d\n", mb_num, s->mb_x + s->mb_y*s->mb_width, get_bits_count(gb), get_bits_count(&s->gb));
 
     return 0;
@@ -3308,7 +3310,7 @@ void ff_mpeg4_clean_buffers(MpegEncContext *s)
  */
 int ff_h263_resync(MpegEncContext *s){
     int left, ret;
-    
+
     if(s->codec_id==CODEC_ID_MPEG4){
         skip_bits1(&s->gb);
         align_get_bits(&s->gb);
@@ -3326,8 +3328,8 @@ int ff_h263_resync(MpegEncContext *s){
     s->gb= s->last_resync_gb;
     align_get_bits(&s->gb);
     left= s->gb.size_in_bits - get_bits_count(&s->gb);
-    
-    for(;left>16+1+5+5; left-=8){ 
+
+    for(;left>16+1+5+5; left-=8){
         if(show_bits(&s->gb, 16)==0){
             GetBitContext bak= s->gb;
 
@@ -3342,7 +3344,7 @@ int ff_h263_resync(MpegEncContext *s){
         }
         skip_bits(&s->gb, 8);
     }
-    
+
     return -1;
 }
 
@@ -3355,7 +3357,7 @@ static inline int get_amv(MpegEncContext *s, int n){
     int x, y, mb_v, sum, dx, dy, shift;
     int len = 1 << (s->f_code + 4);
     const int a= s->sprite_warping_accuracy;
-    
+
     if(s->workaround_bugs & FF_BUG_AMV)
         len >>= s->quarter_sample;
 
@@ -3375,7 +3377,7 @@ static inline int get_amv(MpegEncContext *s, int n){
         sum=0;
         for(y=0; y<16; y++){
             int v;
-        
+
             v= mb_v + dy*y;
             //XXX FIXME optimize
             for(x=0; x<16; x++){
@@ -3399,7 +3401,7 @@ static inline int get_amv(MpegEncContext *s, int n){
 static int mpeg4_decode_partition_a(MpegEncContext *s){
     int mb_num;
     static const int8_t quant_tab[4] = { -1, -2, 1, 2 };
-    
+
     /* decode first partition */
     mb_num=0;
     s->first_slice_line=1;
@@ -3409,12 +3411,12 @@ static int mpeg4_decode_partition_a(MpegEncContext *s){
             const int xy= s->mb_x + s->mb_y*s->mb_stride;
             int cbpc;
             int dir=0;
-            
+
             mb_num++;
             ff_update_block_index(s);
             if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1)
                 s->first_slice_line=0;
-            
+
             if(s->pict_type==I_TYPE){
                 int i;
 
@@ -3429,7 +3431,7 @@ static int mpeg4_decode_partition_a(MpegEncContext *s){
                         return -1;
                     }
                 }while(cbpc == 8);
-                
+
                 s->cbp_table[xy]= cbpc & 3;
                 s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
                 s->mb_intra = 1;
@@ -3442,7 +3444,7 @@ static int mpeg4_decode_partition_a(MpegEncContext *s){
                 s->mbintra_table[xy]= 1;
                 for(i=0; i<6; i++){
                     int dc_pred_dir;
-                    int dc= mpeg4_decode_dc(s, i, &dc_pred_dir); 
+                    int dc= mpeg4_decode_dc(s, i, &dc_pred_dir);
                     if(dc < 0){
                         av_log(s->avctx, AV_LOG_ERROR, "DC corrupted at %d %d\n", s->mb_x, s->mb_y);
                         return -1;
@@ -3491,13 +3493,13 @@ try_again:
                     goto try_again;
 
                 s->cbp_table[xy]= cbpc&(8+3); //8 is dquant
-    
+
                 s->mb_intra = ((cbpc & 4) != 0);
-        
+
                 if(s->mb_intra){
                     s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
                     s->mbintra_table[xy]= 1;
-                    mot_val[0       ]= mot_val[2       ]= 
+                    mot_val[0       ]= mot_val[2       ]=
                     mot_val[0+stride]= mot_val[2+stride]= 0;
                     mot_val[1       ]= mot_val[3       ]=
                     mot_val[1+stride]= mot_val[3+stride]= 0;
@@ -3508,7 +3510,7 @@ try_again:
                     if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE && (cbpc & 16) == 0)
                         s->mcsel= get_bits1(&s->gb);
                     else s->mcsel= 0;
-        
+
                     if ((cbpc & 16) == 0) {
                         /* 16x16 motion prediction */
 
@@ -3540,7 +3542,7 @@ try_again:
                             mx = h263_decode_motion(s, pred_x, s->f_code);
                             if (mx >= 0xffff)
                                 return -1;
-                
+
                             my = h263_decode_motion(s, pred_y, s->f_code);
                             if (my >= 0xffff)
                                 return -1;
@@ -3576,7 +3578,7 @@ static int mpeg4_decode_partition_b(MpegEncContext *s, int mb_count){
             ff_update_block_index(s);
             if(s->mb_x == s->resync_mb_x && s->mb_y == s->resync_mb_y+1)
                 s->first_slice_line=0;
-            
+
             if(s->pict_type==I_TYPE){
                 int ac_pred= get_bits1(&s->gb);
                 int cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
@@ -3584,11 +3586,11 @@ static int mpeg4_decode_partition_b(MpegEncContext *s, int mb_count){
                     av_log(s->avctx, AV_LOG_ERROR, "cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
                     return -1;
                 }
-                
+
                 s->cbp_table[xy]|= cbpy<<2;
-                s->current_picture.mb_type[xy] |= ac_pred*MB_TYPE_ACPRED; 
+                s->current_picture.mb_type[xy] |= ac_pred*MB_TYPE_ACPRED;
             }else{ /* P || S_TYPE */
-                if(IS_INTRA(s->current_picture.mb_type[xy])){          
+                if(IS_INTRA(s->current_picture.mb_type[xy])){
                     int dir=0,i;
                     int ac_pred = get_bits1(&s->gb);
                     int cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
@@ -3597,7 +3599,7 @@ static int mpeg4_decode_partition_b(MpegEncContext *s, int mb_count){
                         av_log(s->avctx, AV_LOG_ERROR, "I cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
                         return -1;
                     }
-                    
+
                     if(s->cbp_table[xy] & 8) {
                         ff_set_qscale(s, s->qscale + quant_tab[get_bits(&s->gb, 2)]);
                     }
@@ -3605,7 +3607,7 @@ static int mpeg4_decode_partition_b(MpegEncContext *s, int mb_count){
 
                     for(i=0; i<6; i++){
                         int dc_pred_dir;
-                        int dc= mpeg4_decode_dc(s, i, &dc_pred_dir); 
+                        int dc= mpeg4_decode_dc(s, i, &dc_pred_dir);
                         if(dc < 0){
                             av_log(s->avctx, AV_LOG_ERROR, "DC corrupted at %d %d\n", s->mb_x, s->mb_y);
                             return -1;
@@ -3615,7 +3617,7 @@ static int mpeg4_decode_partition_b(MpegEncContext *s, int mb_count){
                     }
                     s->cbp_table[xy]&= 3; //remove dquant
                     s->cbp_table[xy]|= cbpy<<2;
-                    s->current_picture.mb_type[xy] |= ac_pred*MB_TYPE_ACPRED; 
+                    s->current_picture.mb_type[xy] |= ac_pred*MB_TYPE_ACPRED;
                     s->pred_dir_table[xy]= dir;
                 }else if(IS_SKIP(s->current_picture.mb_type[xy])){
                     s->current_picture.qscale_table[xy]= s->qscale;
@@ -3627,7 +3629,7 @@ static int mpeg4_decode_partition_b(MpegEncContext *s, int mb_count){
                         av_log(s->avctx, AV_LOG_ERROR, "P cbpy corrupted at %d %d\n", s->mb_x, s->mb_y);
                         return -1;
                     }
-                    
+
                     if(s->cbp_table[xy] & 8) {
                         ff_set_qscale(s, s->qscale + quant_tab[get_bits(&s->gb, 2)]);
                     }
@@ -3653,13 +3655,13 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s)
     int mb_num;
     const int part_a_error= s->pict_type==I_TYPE ? (DC_ERROR|MV_ERROR) : MV_ERROR;
     const int part_a_end  = s->pict_type==I_TYPE ? (DC_END  |MV_END)   : MV_END;
-    
-    mb_num= mpeg4_decode_partition_a(s);    
+
+    mb_num= mpeg4_decode_partition_a(s);
     if(mb_num<0){
         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, part_a_error);
         return -1;
     }
-    
+
     if(s->resync_mb_x + s->resync_mb_y*s->mb_width + mb_num > s->mb_num){
         av_log(s->avctx, AV_LOG_ERROR, "slice below monitor ...\n");
         ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, part_a_error);
@@ -3667,7 +3669,7 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s)
     }
 
     s->mb_num_left= mb_num;
-        
+
     if(s->pict_type==I_TYPE){
         while(show_bits(&s->gb, 9) == 1)
             skip_bits(&s->gb, 9);
@@ -3684,7 +3686,7 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s)
         }
     }
     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, part_a_end);
-    
+
     if( mpeg4_decode_partition_b(s, mb_num) < 0){
         if(s->pict_type==P_TYPE)
             ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, DC_ERROR);
@@ -3694,7 +3696,7 @@ int ff_mpeg4_decode_partitions(MpegEncContext *s)
             ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, DC_END);
     }
 
-    return 0;        
+    return 0;
 }
 
 /**
@@ -3712,7 +3714,7 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64])
     if(s->current_picture.qscale_table[xy] != s->qscale){
         ff_set_qscale(s, s->current_picture.qscale_table[xy] );
     }
-    
+
     if (s->pict_type == P_TYPE || s->pict_type==S_TYPE) {
         int i;
         for(i=0; i<4; i++){
@@ -3738,7 +3740,7 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64])
             s->ac_pred = IS_ACPRED(s->current_picture.mb_type[xy]);
         }else if(!s->mb_intra){
 //            s->mcsel= 0; //FIXME do we need to init that
-            
+
             s->mv_dir = MV_DIR_FORWARD;
             if (IS_8X8(mb_type)) {
                 s->mv_type = MV_TYPE_8X8;
@@ -3771,7 +3773,7 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64])
         if(mpeg4_is_resync(s))
             return SLICE_END;
         else
-            return SLICE_NOEND;     
+            return SLICE_NOEND;
     }else{
         if(mpeg4_is_resync(s)){
             const int delta= s->mb_x + 1 == s->mb_width ? 2 : 1;
@@ -3787,35 +3789,35 @@ static int mpeg4_decode_partitioned_mb(MpegEncContext *s, DCTELEM block[6][64])
  */
 static void preview_obmc(MpegEncContext *s){
     GetBitContext gb= s->gb;
-    
+
     int cbpc, i, pred_x, pred_y, mx, my;
     int16_t *mot_val;
     const int xy= s->mb_x + 1 + s->mb_y * s->mb_stride;
     const int stride= s->b8_stride*2;
-    
+
     for(i=0; i<4; i++)
         s->block_index[i]+= 2;
     for(i=4; i<6; i++)
         s->block_index[i]+= 1;
     s->mb_x++;
-    
+
     assert(s->pict_type == P_TYPE);
 
     do{
         if (get_bits1(&s->gb)) {
             /* skip mb */
             mot_val = s->current_picture.motion_val[0][ s->block_index[0] ];
-            mot_val[0       ]= mot_val[2       ]= 
+            mot_val[0       ]= mot_val[2       ]=
             mot_val[0+stride]= mot_val[2+stride]= 0;
             mot_val[1       ]= mot_val[3       ]=
             mot_val[1+stride]= mot_val[3+stride]= 0;
-            
+
             s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0;
             goto end;
         }
         cbpc = get_vlc2(&s->gb, inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2);
     }while(cbpc == 20);
-    
+
     if(cbpc & 4){
         s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
     }else{
@@ -3827,37 +3829,37 @@ static void preview_obmc(MpegEncContext *s){
             }else
                 skip_bits(&s->gb, 2);
         }
-        
+
         if ((cbpc & 16) == 0) {
-                s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0; 
+                s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0;
                 /* 16x16 motion prediction */
                 mot_val= h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
                 if (s->umvplus)
                    mx = h263p_decode_umotion(s, pred_x);
                 else
                    mx = h263_decode_motion(s, pred_x, 1);
-            
+
                 if (s->umvplus)
                    my = h263p_decode_umotion(s, pred_y);
                 else
                    my = h263_decode_motion(s, pred_y, 1);
-            
-                mot_val[0       ]= mot_val[2       ]= 
+
+                mot_val[0       ]= mot_val[2       ]=
                 mot_val[0+stride]= mot_val[2+stride]= mx;
                 mot_val[1       ]= mot_val[3       ]=
                 mot_val[1+stride]= mot_val[3+stride]= my;
         } else {
-            s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0; 
+            s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0;
             for(i=0;i<4;i++) {
                 mot_val = h263_pred_motion(s, i, 0, &pred_x, &pred_y);
                 if (s->umvplus)
                   mx = h263p_decode_umotion(s, pred_x);
                 else
                   mx = h263_decode_motion(s, pred_x, 1);
-                
+
                 if (s->umvplus)
                   my = h263p_decode_umotion(s, pred_y);
-                else    
+                else
                   my = h263_decode_motion(s, pred_y, 1);
                 if (s->umvplus && (mx - pred_x) == 1 && (my - pred_y) == 1)
                   skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */
@@ -3867,7 +3869,7 @@ static void preview_obmc(MpegEncContext *s){
         }
     }
 end:
-        
+
     for(i=0; i<4; i++)
         s->block_index[i]-= 2;
     for(i=4; i<6; i++)
@@ -3896,9 +3898,9 @@ int ff_h263_decode_mb(MpegEncContext *s,
     int cbpc, cbpy, i, cbp, pred_x, pred_y, mx, my, dquant;
     int16_t *mot_val;
     const int xy= s->mb_x + s->mb_y * s->mb_stride;
-    
+
     assert(!s->h263_pred);
-    
+
     if (s->pict_type == P_TYPE) {
         do{
             if (get_bits1(&s->gb)) {
@@ -3921,26 +3923,26 @@ int ff_h263_decode_mb(MpegEncContext *s,
                 return -1;
             }
         }while(cbpc == 20);
-        
+
         s->dsp.clear_blocks(s->block[0]);
-        
+
         dquant = cbpc & 8;
         s->mb_intra = ((cbpc & 4) != 0);
         if (s->mb_intra) goto intra;
-        
+
         cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
-        
+
         if(s->alt_inter_vlc==0 || (cbpc & 3)!=3)
             cbpy ^= 0xF;
-        
+
         cbp = (cbpc & 3) | (cbpy << 2);
         if (dquant) {
             h263_decode_dquant(s);
         }
-        
+
         s->mv_dir = MV_DIR_FORWARD;
         if ((cbpc & 16) == 0) {
-            s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0; 
+            s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0;
             /* 16x16 motion prediction */
             s->mv_type = MV_TYPE_16X16;
             h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
@@ -3948,24 +3950,24 @@ int ff_h263_decode_mb(MpegEncContext *s,
                mx = h263p_decode_umotion(s, pred_x);
             else
                mx = h263_decode_motion(s, pred_x, 1);
-            
+
             if (mx >= 0xffff)
                 return -1;
-            
+
             if (s->umvplus)
                my = h263p_decode_umotion(s, pred_y);
             else
                my = h263_decode_motion(s, pred_y, 1);
-            
+
             if (my >= 0xffff)
                 return -1;
             s->mv[0][0][0] = mx;
             s->mv[0][0][1] = my;
 
             if (s->umvplus && (mx - pred_x) == 1 && (my - pred_y) == 1)
-               skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */                   
+               skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */
         } else {
-            s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0; 
+            s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0;
             s->mv_type = MV_TYPE_8X8;
             for(i=0;i<4;i++) {
                 mot_val = h263_pred_motion(s, i, 0, &pred_x, &pred_y);
@@ -3975,10 +3977,10 @@ int ff_h263_decode_mb(MpegEncContext *s,
                   mx = h263_decode_motion(s, pred_x, 1);
                 if (mx >= 0xffff)
                     return -1;
-                
+
                 if (s->umvplus)
                   my = h263p_decode_umotion(s, pred_y);
-                else    
+                else
                   my = h263_decode_motion(s, pred_y, 1);
                 if (my >= 0xffff)
                     return -1;
@@ -4009,10 +4011,10 @@ int ff_h263_decode_mb(MpegEncContext *s,
         int16_t *mot_val1 = s->current_picture.motion_val[1][ 2*(s->mb_x + s->mb_y*stride) ];
 //        const int mv_xy= s->mb_x + 1 + s->mb_y * s->mb_stride;
 
-        //FIXME ugly 
-        mot_val0[0       ]= mot_val0[2       ]= mot_val0[0+2*stride]= mot_val0[2+2*stride]= 
-        mot_val0[1       ]= mot_val0[3       ]= mot_val0[1+2*stride]= mot_val0[3+2*stride]= 
-        mot_val1[0       ]= mot_val1[2       ]= mot_val1[0+2*stride]= mot_val1[2+2*stride]= 
+        //FIXME ugly
+        mot_val0[0       ]= mot_val0[2       ]= mot_val0[0+2*stride]= mot_val0[2+2*stride]=
+        mot_val0[1       ]= mot_val0[3       ]= mot_val0[1+2*stride]= mot_val0[3+2*stride]=
+        mot_val1[0       ]= mot_val1[2       ]= mot_val1[0+2*stride]= mot_val1[2+2*stride]=
         mot_val1[1       ]= mot_val1[3       ]= mot_val1[1+2*stride]= mot_val1[3+2*stride]= 0;
 
         do{
@@ -4040,14 +4042,14 @@ int ff_h263_decode_mb(MpegEncContext *s,
                 av_log(s->avctx, AV_LOG_ERROR, "b cbpy damaged at %d %d\n", s->mb_x, s->mb_y);
                 return -1;
             }
-        
+
             if(s->alt_inter_vlc==0 || (cbpc & 3)!=3)
                 cbpy ^= 0xF;
-        
+
             cbp = (cbpc & 3) | (cbpy << 2);
         }else
             cbp=0;
-            
+
         assert(!s->mb_intra);
 
         if(IS_QUANT(mb_type)){
@@ -4068,17 +4070,17 @@ int ff_h263_decode_mb(MpegEncContext *s,
 
                 mx = h263_decode_motion(s, mx, 1);
                 my = h263_decode_motion(s, my, 1);
-                
+
                 s->mv[0][0][0] = mx;
                 s->mv[0][0][1] = my;
                 mot_val[0       ]= mot_val[2       ]= mot_val[0+2*stride]= mot_val[2+2*stride]= mx;
                 mot_val[1       ]= mot_val[3       ]= mot_val[1+2*stride]= mot_val[3+2*stride]= my;
             }
-    
+
             if(USES_LIST(mb_type, 1)){
                 int16_t *mot_val= h263_pred_motion(s, 0, 1, &mx, &my);
                 s->mv_dir |= MV_DIR_BACKWARD;
-                
+
                 mx = h263_decode_motion(s, mx, 1);
                 my = h263_decode_motion(s, my, 1);
 
@@ -4088,7 +4090,7 @@ int ff_h263_decode_mb(MpegEncContext *s,
                 mot_val[1       ]= mot_val[3       ]= mot_val[1+2*stride]= mot_val[3+2*stride]= my;
             }
         }
-          
+
         s->current_picture.mb_type[xy]= mb_type;
 
         /* decode each block */
@@ -4116,12 +4118,12 @@ intra:
             s->ac_pred = get_bits1(&s->gb);
             if(s->ac_pred){
                 s->current_picture.mb_type[xy]= MB_TYPE_INTRA | MB_TYPE_ACPRED;
-            
+
                 s->h263_aic_dir = get_bits1(&s->gb);
             }
         }else
             s->ac_pred = 0;
-        
+
         cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
         if(cbpy<0){
             av_log(s->avctx, AV_LOG_ERROR, "I cbpy damaged at %d %d\n", s->mb_x, s->mb_y);
@@ -4144,7 +4146,7 @@ end:
         /* per-MB end of slice check */
     {
         int v= show_bits(&s->gb, 16);
-    
+
         if(get_bits_count(&s->gb) + 16 > s->gb.size_in_bits){
             v>>= get_bits_count(&s->gb) + 16 - s->gb.size_in_bits;
         }
@@ -4153,7 +4155,7 @@ end:
             return SLICE_END;
     }
 
-    return SLICE_OK;     
+    return SLICE_OK;
 }
 
 int ff_mpeg4_decode_mb(MpegEncContext *s,
@@ -4163,9 +4165,9 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
     int16_t *mot_val;
     static int8_t quant_tab[4] = { -1, -2, 1, 2 };
     const int xy= s->mb_x + s->mb_y * s->mb_stride;
-    
+
     assert(s->h263_pred);
-    
+
     if (s->pict_type == P_TYPE || s->pict_type==S_TYPE) {
         do{
             if (get_bits1(&s->gb)) {
@@ -4198,24 +4200,24 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
                 return -1;
             }
         }while(cbpc == 20);
-        
+
         s->dsp.clear_blocks(s->block[0]);
         dquant = cbpc & 8;
         s->mb_intra = ((cbpc & 4) != 0);
         if (s->mb_intra) goto intra;
-        
+
         if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE && (cbpc & 16) == 0)
             s->mcsel= get_bits1(&s->gb);
         else s->mcsel= 0;
         cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1) ^ 0x0F;
-        
+
         cbp = (cbpc & 3) | (cbpy << 2);
         if (dquant) {
             ff_set_qscale(s, s->qscale + quant_tab[get_bits(&s->gb, 2)]);
         }
         if((!s->progressive_sequence) && (cbp || (s->workaround_bugs&FF_BUG_XVID_ILACE)))
             s->interlaced_dct= get_bits1(&s->gb);
-        
+
         s->mv_dir = MV_DIR_FORWARD;
         if ((cbpc & 16) == 0) {
             if(s->mcsel){
@@ -4227,7 +4229,7 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
                 s->mv[0][0][0] = mx;
                 s->mv[0][0][1] = my;
             }else if((!s->progressive_sequence) && get_bits1(&s->gb)){
-                s->current_picture.mb_type[xy]= MB_TYPE_16x8 | MB_TYPE_L0 | MB_TYPE_INTERLACED; 
+                s->current_picture.mb_type[xy]= MB_TYPE_16x8 | MB_TYPE_L0 | MB_TYPE_INTERLACED;
                 /* 16x8 field motion prediction */
                 s->mv_type= MV_TYPE_FIELD;
 
@@ -4235,12 +4237,12 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
                 s->field_select[0][1]= get_bits1(&s->gb);
 
                 h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
-                
+
                 for(i=0; i<2; i++){
                     mx = h263_decode_motion(s, pred_x, s->f_code);
                     if (mx >= 0xffff)
                         return -1;
-            
+
                     my = h263_decode_motion(s, pred_y/2, s->f_code);
                     if (my >= 0xffff)
                         return -1;
@@ -4249,31 +4251,31 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
                     s->mv[0][i][1] = my;
                 }
             }else{
-                s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0; 
+                s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0;
                 /* 16x16 motion prediction */
                 s->mv_type = MV_TYPE_16X16;
                 h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
                 mx = h263_decode_motion(s, pred_x, s->f_code);
-            
+
                 if (mx >= 0xffff)
                     return -1;
-            
+
                 my = h263_decode_motion(s, pred_y, s->f_code);
-            
+
                 if (my >= 0xffff)
                     return -1;
                 s->mv[0][0][0] = mx;
                 s->mv[0][0][1] = my;
             }
         } else {
-            s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0; 
+            s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0;
             s->mv_type = MV_TYPE_8X8;
             for(i=0;i<4;i++) {
                 mot_val = h263_pred_motion(s, i, 0, &pred_x, &pred_y);
                 mx = h263_decode_motion(s, pred_x, s->f_code);
                 if (mx >= 0xffff)
                     return -1;
-                
+
                 my = h263_decode_motion(s, pred_y, s->f_code);
                 if (my >= 0xffff)
                     return -1;
@@ -4293,9 +4295,9 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
 
         if(s->mb_x==0){
             for(i=0; i<2; i++){
-                s->last_mv[i][0][0]= 
-                s->last_mv[i][0][1]= 
-                s->last_mv[i][1][0]= 
+                s->last_mv[i][0][0]=
+                s->last_mv[i][0][1]=
+                s->last_mv[i][1][0]=
                 s->last_mv[i][1][1]= 0;
             }
         }
@@ -4314,11 +4316,11 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
             s->mv[0][0][1] = 0;
             s->mv[1][0][0] = 0;
             s->mv[1][0][1] = 0;
-            s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0; 
+            s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0;
             goto end;
         }
 
-        modb1= get_bits1(&s->gb); 
+        modb1= get_bits1(&s->gb);
         if(modb1){
             mb_type= MB_TYPE_DIRECT2 | MB_TYPE_SKIP | MB_TYPE_L0L1; //like MB_TYPE_B_DIRECT but no vectors coded
             cbp=0;
@@ -4373,7 +4375,7 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
                     s->last_mv[0][1][0]= s->last_mv[0][0][0]= s->mv[0][0][0] = mx;
                     s->last_mv[0][1][1]= s->last_mv[0][0][1]= s->mv[0][0][1] = my;
                 }
-    
+
                 if(USES_LIST(mb_type, 1)){
                     s->mv_dir |= MV_DIR_BACKWARD;
 
@@ -4387,7 +4389,7 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
 
                 if(USES_LIST(mb_type, 0)){
                     s->mv_dir = MV_DIR_FORWARD;
-                
+
                     for(i=0; i<2; i++){
                         mx = h263_decode_motion(s, s->last_mv[0][i][0]  , s->f_code);
                         my = h263_decode_motion(s, s->last_mv[0][i][1]/2, s->f_code);
@@ -4395,7 +4397,7 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
                         s->last_mv[0][i][1]= (s->mv[0][i][1] = my)*2;
                     }
                 }
-    
+
                 if(USES_LIST(mb_type, 1)){
                     s->mv_dir |= MV_DIR_BACKWARD;
 
@@ -4408,7 +4410,7 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
                 }
             }
         }
-          
+
         if(IS_DIRECT(mb_type)){
             if(IS_SKIP(mb_type))
                 mx=my=0;
@@ -4416,7 +4418,7 @@ int ff_mpeg4_decode_mb(MpegEncContext *s,
                 mx = h263_decode_motion(s, 0, 1);
                 my = h263_decode_motion(s, 0, 1);
             }
- 
+
             s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
             mb_type |= ff_mpeg4_set_direct_mv(s, mx, my);
         }
@@ -4438,7 +4440,7 @@ intra:
             s->current_picture.mb_type[xy]= MB_TYPE_INTRA | MB_TYPE_ACPRED;
         else
             s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
-        
+
         cbpy = get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
         if(cbpy<0){
             av_log(s->avctx, AV_LOG_ERROR, "I cbpy damaged at %d %d\n", s->mb_x, s->mb_y);
@@ -4448,7 +4450,7 @@ intra:
         if (dquant) {
             ff_set_qscale(s, s->qscale + quant_tab[get_bits(&s->gb, 2)]);
         }
-        
+
         if(!s->progressive_sequence)
             s->interlaced_dct= get_bits1(&s->gb);
 
@@ -4480,7 +4482,7 @@ end:
         }
     }
 
-    return SLICE_OK;     
+    return SLICE_OK;
 }
 
 static int h263_decode_motion(MpegEncContext * s, int pred, int f_code)
@@ -4515,7 +4517,7 @@ static int h263_decode_motion(MpegEncContext * s, int pred, int f_code)
             val += 64;
         if (pred > 32 && val > 63)
             val -= 64;
-        
+
     }
     return val;
 }
@@ -4524,12 +4526,12 @@ static int h263_decode_motion(MpegEncContext * s, int pred, int f_code)
 static int h263p_decode_umotion(MpegEncContext * s, int pred)
 {
    int code = 0, sign;
-   
+
    if (get_bits1(&s->gb)) /* Motion difference = 0 */
       return pred;
-   
+
    code = 2 + get_bits1(&s->gb);
-   
+
    while (get_bits1(&s->gb))
    {
       code <<= 1;
@@ -4537,12 +4539,12 @@ static int h263p_decode_umotion(MpegEncContext * s, int pred)
    }
    sign = code & 1;
    code >>= 1;
-   
+
    code = (sign) ? (pred - code) : (pred + code);
 #ifdef DEBUG
    av_log( s->avctx, AV_LOG_DEBUG,"H.263+ UMV Motion = %d\n", code);
 #endif
-   return code;   
+   return code;
 
 }
 
@@ -4559,7 +4561,7 @@ static int h263_decode_block(MpegEncContext * s, DCTELEM * block,
         rl = &rl_intra_aic;
         i = 0;
         if (s->ac_pred) {
-            if (s->h263_aic_dir) 
+            if (s->h263_aic_dir)
                 scan_table = s->intra_v_scantable.permutated; /* left */
             else
                 scan_table = s->intra_h_scantable.permutated; /* top */
@@ -4667,7 +4669,7 @@ retry:
             break;
         i++;
     }
-not_coded:    
+not_coded:
     if (s->mb_intra && s->h263_aic) {
         h263_pred_acdc(s, block, n);
         i = 63;
@@ -4686,9 +4688,9 @@ static inline int mpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
 {
     int level, code;
 
-    if (n < 4) 
+    if (n < 4)
         code = get_vlc2(&s->gb, dc_lum.table, DC_VLC_BITS, 1);
-    else 
+    else
         code = get_vlc2(&s->gb, dc_chrom.table, DC_VLC_BITS, 1);
     if (code < 0 || code > 9 /* && s->nbit<9 */){
         av_log(s->avctx, AV_LOG_ERROR, "illegal dc vlc\n");
@@ -4738,10 +4740,10 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
     int qmul, qadd;
 
     //Note intra & rvlc should be optimized away if this is inlined
-    
+
     if(intra) {
       if(s->qscale < s->intra_dc_threshold){
-	/* DC coef */
+        /* DC coef */
         if(s->partitioned_frame){
             level = s->dc_val[0][ s->block_index[n] ];
             if(n<4) level= FASTDIV((level + (s->y_dc_scale>>1)), s->y_dc_scale);
@@ -4756,11 +4758,11 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
         i = 0;
       }else{
             i = -1;
-      }  
-        if (!coded) 
+      }
+        if (!coded)
             goto not_coded;
-        
-        if(rvlc){        
+
+        if(rvlc){
             rl = &rvlc_rl_intra;
             rl_vlc = rvlc_rl_intra.rl_vlc[0];
         }else{
@@ -4768,7 +4770,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
             rl_vlc = rl_intra.rl_vlc[0];
         }
         if (s->ac_pred) {
-            if (dc_pred_dir == 0) 
+            if (dc_pred_dir == 0)
                 scan_table = s->intra_v_scantable.permutated; /* left */
             else
                 scan_table = s->intra_h_scantable.permutated; /* top */
@@ -4785,24 +4787,24 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
         }
         if(rvlc) rl = &rvlc_rl_inter;
         else     rl = &rl_inter;
-   
+
         scan_table = s->intra_scantable.permutated;
 
         if(s->mpeg_quant){
             qmul=1;
             qadd=0;
-            if(rvlc){        
-                rl_vlc = rvlc_rl_inter.rl_vlc[0];        
+            if(rvlc){
+                rl_vlc = rvlc_rl_inter.rl_vlc[0];
             }else{
-                rl_vlc = rl_inter.rl_vlc[0];        
+                rl_vlc = rl_inter.rl_vlc[0];
             }
         }else{
             qmul = s->qscale << 1;
             qadd = (s->qscale - 1) | 1;
-            if(rvlc){        
-                rl_vlc = rvlc_rl_inter.rl_vlc[s->qscale];        
+            if(rvlc){
+                rl_vlc = rvlc_rl_inter.rl_vlc[s->qscale];
             }else{
-                rl_vlc = rl_inter.rl_vlc[s->qscale];        
+                rl_vlc = rl_inter.rl_vlc[s->qscale];
             }
         }
     }
@@ -4812,25 +4814,25 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
         UPDATE_CACHE(re, &s->gb);
         GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2, 0);
         if (level==0) {
-          /* escape */                
+          /* escape */
           if(rvlc){
                 if(SHOW_UBITS(re, &s->gb, 1)==0){
                     av_log(s->avctx, AV_LOG_ERROR, "1. marker bit missing in rvlc esc\n");
                     return -1;
                 }; SKIP_CACHE(re, &s->gb, 1);
- 
+
                 last=  SHOW_UBITS(re, &s->gb, 1); SKIP_CACHE(re, &s->gb, 1);
                 run=   SHOW_UBITS(re, &s->gb, 6); LAST_SKIP_CACHE(re, &s->gb, 6);
                 SKIP_COUNTER(re, &s->gb, 1+1+6);
                 UPDATE_CACHE(re, &s->gb);
-              
+
                 if(SHOW_UBITS(re, &s->gb, 1)==0){
                     av_log(s->avctx, AV_LOG_ERROR, "2. marker bit missing in rvlc esc\n");
                     return -1;
                 }; SKIP_CACHE(re, &s->gb, 1);
- 
+
                 level= SHOW_UBITS(re, &s->gb, 11); SKIP_CACHE(re, &s->gb, 11);
- 
+
                 if(SHOW_UBITS(re, &s->gb, 5)!=0x10){
                     av_log(s->avctx, AV_LOG_ERROR, "reverse esc missing\n");
                     return -1;
@@ -4846,7 +4848,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
             int cache;
             cache= GET_CACHE(re, &s->gb);
 
-            if(IS_3IV1) 
+            if(IS_3IV1)
                 cache ^= 0xC0000000;
 
             if (cache&0x80000000) {
@@ -4875,7 +4877,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
 
                         SKIP_COUNTER(re, &s->gb, 1+12+1);
                     }
- 
+
 #if 0
                     if(s->error_resilience >= FF_ER_COMPLIANT){
                         const int abs_level= ABS(level);
@@ -4898,7 +4900,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
                         }
                     }
 #endif
-		    if (level>0) level= level * qmul + qadd;
+                    if (level>0) level= level * qmul + qadd;
                     else         level= level * qmul - qadd;
 
                     if((unsigned)(level + 2048) > 4095){
@@ -4965,7 +4967,7 @@ static inline int mpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
     if (intra) {
         if(s->qscale >= s->intra_dc_threshold){
             block[0] = ff_mpeg4_pred_dc(s, n, block[0], &dc_pred_dir, 0);
-            
+
             if(i == -1) i=0;
         }
 
@@ -4983,18 +4985,18 @@ int h263_decode_picture_header(MpegEncContext *s)
 {
     int format, width, height, i;
     uint32_t startcode;
-    
+
     align_get_bits(&s->gb);
 
     startcode= get_bits(&s->gb, 22-8);
 
     for(i= s->gb.size_in_bits - get_bits_count(&s->gb); i>24; i-=8) {
         startcode = ((startcode << 8) | get_bits(&s->gb, 8)) & 0x003FFFFF;
-        
+
         if(startcode == 0x20)
             break;
     }
-        
+
     if (startcode != 0x20) {
         av_log(s->avctx, AV_LOG_ERROR, "Bad picture start code\n");
         return -1;
@@ -5006,7 +5008,7 @@ int h263_decode_picture_header(MpegEncContext *s)
     s->current_picture_ptr->pts=
     s->picture_number= (s->picture_number&~0xFF) + i;
 
-    /* PTYPE starts here */    
+    /* PTYPE starts here */
     if (get_bits1(&s->gb) != 1) {
         /* marker */
         av_log(s->avctx, AV_LOG_ERROR, "Bad marker\n");
@@ -5014,18 +5016,18 @@ int h263_decode_picture_header(MpegEncContext *s)
     }
     if (get_bits1(&s->gb) != 0) {
         av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n");
-        return -1;	/* h263 id */
+        return -1;      /* h263 id */
     }
-    skip_bits1(&s->gb);	/* split screen off */
-    skip_bits1(&s->gb);	/* camera  off */
-    skip_bits1(&s->gb);	/* freeze picture release off */
+    skip_bits1(&s->gb);         /* split screen off */
+    skip_bits1(&s->gb);         /* camera  off */
+    skip_bits1(&s->gb);         /* freeze picture release off */
 
     format = get_bits(&s->gb, 3);
     /*
         0    forbidden
         1    sub-QCIF
         10   QCIF
-        7	extended PTYPE (PLUSPTYPE)
+        7       extended PTYPE (PLUSPTYPE)
     */
 
     if (format != 7 && format != 6) {
@@ -5035,24 +5037,24 @@ int h263_decode_picture_header(MpegEncContext *s)
         height = h263_format[format][1];
         if (!width)
             return -1;
-        
+
         s->pict_type = I_TYPE + get_bits1(&s->gb);
 
-        s->h263_long_vectors = get_bits1(&s->gb); 
+        s->h263_long_vectors = get_bits1(&s->gb);
 
         if (get_bits1(&s->gb) != 0) {
             av_log(s->avctx, AV_LOG_ERROR, "H263 SAC not supported\n");
-            return -1;	/* SAC: off */
+            return -1; /* SAC: off */
         }
         s->obmc= get_bits1(&s->gb); /* Advanced prediction mode */
         s->unrestricted_mv = s->h263_long_vectors || s->obmc;
-        
+
         if (get_bits1(&s->gb) != 0) {
             av_log(s->avctx, AV_LOG_ERROR, "H263 PB frame not supported\n");
-            return -1;	/* not PB frame */
+            return -1; /* not PB frame */
         }
         s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
-        skip_bits1(&s->gb);	/* Continuous Presence Multipoint mode: off */
+        skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
 
         s->width = width;
         s->height = height;
@@ -5060,14 +5062,14 @@ int h263_decode_picture_header(MpegEncContext *s)
         s->avctx->time_base= (AVRational){1001, 30000};
     } else {
         int ufep;
-        
+
         /* H.263v2 */
         s->h263_plus = 1;
         ufep = get_bits(&s->gb, 3); /* Update Full Extended PTYPE */
 
-        /* ufep other than 0 and 1 are reserved */        
+        /* ufep other than 0 and 1 are reserved */
         if (ufep == 1) {
-            /* OPPTYPE */       
+            /* OPPTYPE */
             format = get_bits(&s->gb, 3);
             dprintf("ufep=1, format: %d\n", format);
             s->custom_pcf= get_bits1(&s->gb);
@@ -5079,7 +5081,7 @@ int h263_decode_picture_header(MpegEncContext *s)
             s->h263_aic = get_bits1(&s->gb); /* Advanced Intra Coding (AIC) */
             s->loop_filter= get_bits1(&s->gb);
             s->unrestricted_mv = s->umvplus || s->obmc || s->loop_filter;
-            
+
             s->h263_slice_structured= get_bits1(&s->gb);
             if (get_bits1(&s->gb) != 0) {
                 av_log(s->avctx, AV_LOG_ERROR, "Reference Picture Selection not supported\n");
@@ -5091,7 +5093,7 @@ int h263_decode_picture_header(MpegEncContext *s)
             s->modified_quant= get_bits1(&s->gb);
             if(s->modified_quant)
                 s->chroma_qscale_table= ff_h263_chroma_qscale_table;
-            
+
             skip_bits(&s->gb, 1); /* Prevent start code emulation */
 
             skip_bits(&s->gb, 3); /* Reserved */
@@ -5099,7 +5101,7 @@ int h263_decode_picture_header(MpegEncContext *s)
             av_log(s->avctx, AV_LOG_ERROR, "Bad UFEP type (%d)\n", ufep);
             return -1;
         }
-            
+
         /* MPPTYPE */
         s->pict_type = get_bits(&s->gb, 3);
         switch(s->pict_type){
@@ -5113,7 +5115,7 @@ int h263_decode_picture_header(MpegEncContext *s)
         skip_bits(&s->gb, 2);
         s->no_rounding = get_bits1(&s->gb);
         skip_bits(&s->gb, 4);
-        
+
         /* Get the picture dimensions */
         if (ufep) {
             if (format == 6) {
@@ -5167,7 +5169,7 @@ int h263_decode_picture_header(MpegEncContext *s)
                 s->avctx->time_base= (AVRational){1001, 30000};
             }
         }
-            
+
         if(s->custom_pcf){
             skip_bits(&s->gb, 2); //extended Temporal reference
         }
@@ -5175,7 +5177,7 @@ int h263_decode_picture_header(MpegEncContext *s)
         if (ufep) {
             if (s->umvplus) {
                 if(get_bits1(&s->gb)==0) /* Unlimited Unrestricted Motion Vectors Indicator (UUI) */
-                    skip_bits1(&s->gb); 
+                    skip_bits1(&s->gb);
             }
             if(s->h263_slice_structured){
                 if (get_bits1(&s->gb) != 0) {
@@ -5186,7 +5188,7 @@ int h263_decode_picture_header(MpegEncContext *s)
                 }
             }
         }
-            
+
         s->qscale = get_bits(&s->gb, 5);
     }
 
@@ -5213,9 +5215,9 @@ int h263_decode_picture_header(MpegEncContext *s)
         }
     }
     s->f_code = 1;
-    
+
     if(s->h263_aic){
-         s->y_dc_scale_table= 
+         s->y_dc_scale_table=
          s->c_dc_scale_table= ff_aic_dc_scale_table;
     }else{
         s->y_dc_scale_table=
@@ -5223,7 +5225,7 @@ int h263_decode_picture_header(MpegEncContext *s)
     }
 
      if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-         av_log(s->avctx, AV_LOG_DEBUG, "qp:%d %c size:%d rnd:%d%s%s%s%s%s%s%s%s%s %d/%d\n", 
+         av_log(s->avctx, AV_LOG_DEBUG, "qp:%d %c size:%d rnd:%d%s%s%s%s%s%s%s%s%s %d/%d\n",
          s->qscale, av_get_pict_type_char(s->pict_type),
          s->gb.size_in_bits, 1-s->no_rounding,
          s->obmc ? " AP" : "",
@@ -5236,7 +5238,7 @@ int h263_decode_picture_header(MpegEncContext *s)
          s->loop_filter ? " LOOP" : "",
          s->h263_slice_structured ? " SS" : "",
          s->avctx->time_base.den, s->avctx->time_base.num
-         ); 
+         );
      }
 #if 1
     if (s->pict_type == I_TYPE && s->avctx->codec_tag == ff_get_fourcc("ZYGO")){
@@ -5278,13 +5280,13 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s, GetBitContext *gb
         int length;
         int x=0, y=0;
 
-        length= get_vlc(gb, &sprite_trajectory);
+        length= get_vlc2(gb, sprite_trajectory.table, SPRITE_TRAJ_VLC_BITS, 3);
         if(length){
             x= get_xbits(gb, length);
         }
         if(!(s->divx_version==500 && s->divx_build==413)) skip_bits1(gb); /* marker bit */
-        
-        length= get_vlc(gb, &sprite_trajectory);
+
+        length= get_vlc2(gb, sprite_trajectory.table, SPRITE_TRAJ_VLC_BITS, 3);
         if(length){
             y=get_xbits(gb, length);
         }
@@ -5317,20 +5319,20 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s, GetBitContext *gb
     }
 /*    sprite_ref[3][0]= (a>>1)*(2*vop_ref[3][0] + d[0][0] + d[1][0] + d[2][0] + d[3][0]);
     sprite_ref[3][1]= (a>>1)*(2*vop_ref[3][1] + d[0][1] + d[1][1] + d[2][1] + d[3][1]); */
-    
+
 // this is mostly identical to the mpeg4 std (and is totally unreadable because of that ...)
 // perhaps it should be reordered to be more readable ...
 // the idea behind this virtual_ref mess is to be able to use shifts later per pixel instead of divides
 // so the distance between points is converted from w&h based to w2&h2 based which are of the 2^x form
-    virtual_ref[0][0]= 16*(vop_ref[0][0] + w2) 
+    virtual_ref[0][0]= 16*(vop_ref[0][0] + w2)
         + ROUNDED_DIV(((w - w2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + w2*(r*sprite_ref[1][0] - 16*vop_ref[1][0])),w);
-    virtual_ref[0][1]= 16*vop_ref[0][1] 
+    virtual_ref[0][1]= 16*vop_ref[0][1]
         + ROUNDED_DIV(((w - w2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + w2*(r*sprite_ref[1][1] - 16*vop_ref[1][1])),w);
-    virtual_ref[1][0]= 16*vop_ref[0][0] 
+    virtual_ref[1][0]= 16*vop_ref[0][0]
         + ROUNDED_DIV(((h - h2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + h2*(r*sprite_ref[2][0] - 16*vop_ref[2][0])),h);
-    virtual_ref[1][1]= 16*(vop_ref[0][1] + h2) 
+    virtual_ref[1][1]= 16*(vop_ref[0][1] + h2)
         + ROUNDED_DIV(((h - h2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + h2*(r*sprite_ref[2][1] - 16*vop_ref[2][1])),h);
-        
+
     switch(s->num_sprite_warping_points)
     {
         case 0:
@@ -5368,19 +5370,19 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s, GetBitContext *gb
                                                   + (1<<(alpha+rho-1));
             s->sprite_offset[1][0]= ( (-r*sprite_ref[0][0] + virtual_ref[0][0])*(-2*vop_ref[0][0] + 1)
                                      +( r*sprite_ref[0][1] - virtual_ref[0][1])*(-2*vop_ref[0][1] + 1)
-                                     +2*w2*r*sprite_ref[0][0] 
-                                     - 16*w2 
+                                     +2*w2*r*sprite_ref[0][0]
+                                     - 16*w2
                                      + (1<<(alpha+rho+1)));
-            s->sprite_offset[1][1]= ( (-r*sprite_ref[0][1] + virtual_ref[0][1])*(-2*vop_ref[0][0] + 1) 
+            s->sprite_offset[1][1]= ( (-r*sprite_ref[0][1] + virtual_ref[0][1])*(-2*vop_ref[0][0] + 1)
                                      +(-r*sprite_ref[0][0] + virtual_ref[0][0])*(-2*vop_ref[0][1] + 1)
-                                     +2*w2*r*sprite_ref[0][1] 
+                                     +2*w2*r*sprite_ref[0][1]
                                      - 16*w2
                                      + (1<<(alpha+rho+1)));
             s->sprite_delta[0][0]=   (-r*sprite_ref[0][0] + virtual_ref[0][0]);
             s->sprite_delta[0][1]=   (+r*sprite_ref[0][1] - virtual_ref[0][1]);
             s->sprite_delta[1][0]=   (-r*sprite_ref[0][1] + virtual_ref[0][1]);
             s->sprite_delta[1][1]=   (-r*sprite_ref[0][0] + virtual_ref[0][0]);
-            
+
             s->sprite_shift[0]= alpha+rho;
             s->sprite_shift[1]= alpha+rho+2;
             break;
@@ -5410,12 +5412,12 @@ static void mpeg4_decode_sprite_trajectory(MpegEncContext * s, GetBitContext *gb
             s->sprite_delta[0][1]=   (-r*sprite_ref[0][0] + virtual_ref[1][0])*w3;
             s->sprite_delta[1][0]=   (-r*sprite_ref[0][1] + virtual_ref[0][1])*h3;
             s->sprite_delta[1][1]=   (-r*sprite_ref[0][1] + virtual_ref[1][1])*w3;
-                                   
+
             s->sprite_shift[0]= alpha + beta + rho - min_ab;
             s->sprite_shift[1]= alpha + beta + rho - min_ab + 2;
             break;
     }
-    /* try to simplify the situation */ 
+    /* try to simplify the situation */
     if(   s->sprite_delta[0][0] == a<<s->sprite_shift[0]
        && s->sprite_delta[0][1] == 0
        && s->sprite_delta[1][0] == 0
@@ -5451,13 +5453,13 @@ printf("vop:%d:%d %d:%d %d:%d, sprite:%d:%d %d:%d %d:%d, virtual: %d:%d %d:%d\n"
     vop_ref[0][0], vop_ref[0][1],
     vop_ref[1][0], vop_ref[1][1],
     vop_ref[2][0], vop_ref[2][1],
-    sprite_ref[0][0], sprite_ref[0][1], 
-    sprite_ref[1][0], sprite_ref[1][1], 
-    sprite_ref[2][0], sprite_ref[2][1], 
-    virtual_ref[0][0], virtual_ref[0][1], 
+    sprite_ref[0][0], sprite_ref[0][1],
+    sprite_ref[1][0], sprite_ref[1][1],
+    sprite_ref[2][0], sprite_ref[2][1],
+    virtual_ref[0][0], virtual_ref[0][1],
     virtual_ref[1][0], virtual_ref[1][1]
     );
-    
+
 printf("offset: %d:%d , delta: %d %d %d %d, shift %d\n",
     s->sprite_offset[0][0], s->sprite_offset[0][1],
     s->sprite_delta[0][0], s->sprite_delta[0][1],
@@ -5479,7 +5481,7 @@ static int mpeg4_decode_gop_header(MpegEncContext * s, GetBitContext *gb){
 
     skip_bits1(gb);
     skip_bits1(gb);
-    
+
     return 0;
 }
 
@@ -5497,7 +5499,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
     }
 //printf("vo type:%d\n",s->vo_type);
     s->aspect_ratio_info= get_bits(gb, 4);
-    if(s->aspect_ratio_info == FF_ASPECT_EXTENDED){	    
+    if(s->aspect_ratio_info == FF_ASPECT_EXTENDED){
         s->avctx->sample_aspect_ratio.num= get_bits(gb, 8); // par_width
         s->avctx->sample_aspect_ratio.den= get_bits(gb, 8); // par_height
     }else{
@@ -5511,17 +5513,17 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
         }
         s->low_delay= get_bits1(gb);
         if(get_bits1(gb)){ /* vbv parameters */
-            get_bits(gb, 15);	/* first_half_bitrate */
-            skip_bits1(gb);	/* marker */
-            get_bits(gb, 15);	/* latter_half_bitrate */
-            skip_bits1(gb);	/* marker */
-            get_bits(gb, 15);	/* first_half_vbv_buffer_size */
-            skip_bits1(gb);	/* marker */
-            get_bits(gb, 3);	/* latter_half_vbv_buffer_size */
-            get_bits(gb, 11);	/* first_half_vbv_occupancy */
-            skip_bits1(gb);	/* marker */
-            get_bits(gb, 15);	/* latter_half_vbv_occupancy */
-            skip_bits1(gb);	/* marker */               
+            get_bits(gb, 15);   /* first_half_bitrate */
+            skip_bits1(gb);     /* marker */
+            get_bits(gb, 15);   /* latter_half_bitrate */
+            skip_bits1(gb);     /* marker */
+            get_bits(gb, 15);   /* first_half_vbv_buffer_size */
+            skip_bits1(gb);     /* marker */
+            get_bits(gb, 3);    /* latter_half_vbv_buffer_size */
+            get_bits(gb, 11);   /* first_half_vbv_occupancy */
+            skip_bits1(gb);     /* marker */
+            get_bits(gb, 15);   /* latter_half_vbv_occupancy */
+            skip_bits1(gb);     /* marker */
         }
     }else{
         // set low delay flag only once the smartest? low delay detection won't be overriden
@@ -5537,17 +5539,17 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
     }
 
     check_marker(gb, "before time_increment_resolution");
-    
+
     s->avctx->time_base.den = get_bits(gb, 16);
     if(!s->avctx->time_base.den){
         av_log(s->avctx, AV_LOG_ERROR, "time_base.den==0\n");
         return -1;
     }
-    
+
     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
     if (s->time_increment_bits < 1)
         s->time_increment_bits = 1;
-        
+
     check_marker(gb, "before fixed_vop_rate");
 
     if (get_bits1(gb) != 0) {   /* fixed_vop_rate  */
@@ -5570,10 +5572,10 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
 //                printf("width/height: %d %d\n", width, height);
             }
         }
-        
-        s->progressive_sequence= 
+
+        s->progressive_sequence=
         s->progressive_frame= get_bits1(gb)^1;
-        if(!get_bits1(gb) && (s->avctx->debug & FF_DEBUG_PICT_INFO)) 
+        if(!get_bits1(gb) && (s->avctx->debug & FF_DEBUG_PICT_INFO))
             av_log(s->avctx, AV_LOG_INFO, "MPEG4 OBMC not supported (very likely buggy encoder)\n");   /* OBMC Disable */
         if (vo_ver_id == 1) {
             s->vol_sprite_usage = get_bits1(gb); /* vol_sprite_usage */
@@ -5596,10 +5598,10 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
             s->sprite_warping_accuracy = get_bits(gb, 2);
             s->sprite_brightness_change= get_bits1(gb);
             if(s->vol_sprite_usage==STATIC_SPRITE)
-                s->low_latency_sprite= get_bits1(gb);            
+                s->low_latency_sprite= get_bits1(gb);
         }
         // FIXME sadct disable bit if verid!=1 && shape not rect
-        
+
         if (get_bits1(gb) == 1) {   /* not_8_bit */
             s->quant_precision = get_bits(gb, 4); /* quant_precision */
             if(get_bits(gb, 4)!=8) av_log(s->avctx, AV_LOG_ERROR, "N-bit not supported\n"); /* bits_per_pixel */
@@ -5607,19 +5609,19 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
         } else {
             s->quant_precision = 5;
         }
-        
+
         // FIXME a bunch of grayscale shape things
 
         if((s->mpeg_quant=get_bits1(gb))){ /* vol_quant_type */
             int i, v;
-            
+
             /* load default matrixes */
             for(i=0; i<64; i++){
                 int j= s->dsp.idct_permutation[i];
                 v= ff_mpeg4_default_intra_matrix[i];
                 s->intra_matrix[j]= v;
                 s->chroma_intra_matrix[j]= v;
-                
+
                 v= ff_mpeg4_default_non_intra_matrix[i];
                 s->inter_matrix[j]= v;
                 s->chroma_inter_matrix[j]= v;
@@ -5628,11 +5630,11 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
             /* load custom intra matrix */
             if(get_bits1(gb)){
                 int last=0;
-		for(i=0; i<64; i++){
+                for(i=0; i<64; i++){
                     int j;
                     v= get_bits(gb, 8);
                     if(v==0) break;
-                    
+
                     last= v;
                     j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
                     s->intra_matrix[j]= v;
@@ -5641,7 +5643,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
 
                 /* replicate last value */
                 for(; i<64; i++){
-		    int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
+                    int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
                     s->intra_matrix[j]= last;
                     s->chroma_intra_matrix[j]= last;
                 }
@@ -5650,7 +5652,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
             /* load custom non intra matrix */
             if(get_bits1(gb)){
                 int last=0;
-		for(i=0; i<64; i++){
+                for(i=0; i<64; i++){
                     int j;
                     v= get_bits(gb, 8);
                     if(v==0) break;
@@ -5663,7 +5665,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
 
                 /* replicate last value */
                 for(; i<64; i++){
-		    int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
+                    int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
                     s->inter_matrix[j]= last;
                     s->chroma_inter_matrix[j]= last;
                 }
@@ -5684,7 +5686,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
         if(s->data_partitioning){
             s->rvlc= get_bits1(gb);
         }
-        
+
         if(vo_ver_id != 1) {
             s->new_pred= get_bits1(gb);
             if(s->new_pred){
@@ -5710,7 +5712,7 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
             int h_sampling_factor_m;
             int v_sampling_factor_n;
             int v_sampling_factor_m;
-            
+
             s->hierachy_type= get_bits1(gb);
             ref_layer_id= get_bits(gb, 4);
             ref_layer_sampling_dir= get_bits1(gb);
@@ -5719,17 +5721,17 @@ static int decode_vol_header(MpegEncContext *s, GetBitContext *gb){
             v_sampling_factor_n= get_bits(gb, 5);
             v_sampling_factor_m= get_bits(gb, 5);
             s->enhancement_type= get_bits1(gb);
-            
-            if(   h_sampling_factor_n==0 || h_sampling_factor_m==0 
+
+            if(   h_sampling_factor_n==0 || h_sampling_factor_m==0
                || v_sampling_factor_n==0 || v_sampling_factor_m==0){
-               
+
 //                fprintf(stderr, "illegal scalability header (VERY broken encoder), trying to workaround\n");
                 s->scalability=0;
-               
+
                 *gb= bak;
             }else
                 av_log(s->avctx, AV_LOG_ERROR, "scalability not supported\n");
-            
+
             // bin shape stuff FIXME
         }
     }
@@ -5744,10 +5746,10 @@ static int decode_user_data(MpegEncContext *s, GetBitContext *gb){
     char buf[256];
     int i;
     int e;
-    int ver, build, ver2, ver3;
+    int ver = 0, build = 0, ver2 = 0, ver3 = 0;
     char last;
 
-    for(i=0; i<255; i++){
+    for(i=0; i<255 && gb->index < gb->size_in_bits; i++){
         if(show_bits(gb, 23) == 0) break;
         buf[i]= get_bits(gb, 8);
     }
@@ -5762,14 +5764,15 @@ static int decode_user_data(MpegEncContext *s, GetBitContext *gb){
         s->divx_build= build;
         s->divx_packed= e==3 && last=='p';
     }
-    
+
     /* ffmpeg detection */
     e=sscanf(buf, "FFmpe%*[^b]b%d", &build)+3;
     if(e!=4)
-        e=sscanf(buf, "FFmpeg v%d.%d.%d / libavcodec build: %d", &ver, &ver2, &ver3, &build); 
+        e=sscanf(buf, "FFmpeg v%d.%d.%d / libavcodec build: %d", &ver, &ver2, &ver3, &build);
     if(e!=4){
         e=sscanf(buf, "Lavc%d.%d.%d", &ver, &ver2, &ver3)+1;
-        build= (ver<<16) + (ver2<<8) + ver3;
+        if (e>1)
+            build= (ver<<16) + (ver2<<8) + ver3;
     }
     if(e!=4){
         if(strcmp(buf, "ffmpeg")==0){
@@ -5779,7 +5782,7 @@ static int decode_user_data(MpegEncContext *s, GetBitContext *gb){
     if(e==4){
         s->lavc_build= build;
     }
-    
+
     /* xvid detection */
     e=sscanf(buf, "XviD%d", &build);
     if(e==1){
@@ -5793,12 +5796,12 @@ static int decode_user_data(MpegEncContext *s, GetBitContext *gb){
 static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
     int time_incr, time_increment;
 
-    s->pict_type = get_bits(gb, 2) + I_TYPE;	/* pict type: I = 0 , P = 1 */
+    s->pict_type = get_bits(gb, 2) + I_TYPE;        /* pict type: I = 0 , P = 1 */
     if(s->pict_type==B_TYPE && s->low_delay && s->vol_control_parameters==0 && !(s->flags & CODEC_FLAG_LOW_DELAY)){
         av_log(s->avctx, AV_LOG_ERROR, "low_delay flag incorrectly, clearing it\n");
         s->low_delay=0;
     }
- 
+
     s->partitioned_frame= s->data_partitioning && s->pict_type!=B_TYPE;
     if(s->partitioned_frame)
         s->decode_mb= mpeg4_decode_partitioned_mb;
@@ -5806,7 +5809,7 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
         s->decode_mb= ff_mpeg4_decode_mb;
 
     time_incr=0;
-    while (get_bits1(gb) != 0) 
+    while (get_bits1(gb) != 0)
         time_incr++;
 
     check_marker(gb, "before time_increment");
@@ -5820,10 +5823,10 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
 
         av_log(s->avctx, AV_LOG_ERROR, "my guess is %d bits ;)\n",s->time_increment_bits);
     }
-    
+
     if(IS_3IV1) time_increment= get_bits1(gb); //FIXME investigate further
     else time_increment= get_bits(gb, s->time_increment_bits);
-    
+
 //    printf("%d %X\n", s->time_increment_bits, time_increment);
 //av_log(s->avctx, AV_LOG_DEBUG, " type:%d modulo_time_base:%d increment:%d t_frame %d\n", s->pict_type, time_incr, time_increment, s->t_frame);
     if(s->pict_type!=B_TYPE){
@@ -5846,25 +5849,25 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
 //            printf("messed up order, maybe after seeking? skipping current b frame\n");
             return FRAME_SKIPPED;
         }
-        
+
         if(s->t_frame==0) s->t_frame= s->pb_time;
         if(s->t_frame==0) s->t_frame=1; // 1/0 protection
-        s->pp_field_time= (  ROUNDED_DIV(s->last_non_b_time, s->t_frame) 
+        s->pp_field_time= (  ROUNDED_DIV(s->last_non_b_time, s->t_frame)
                            - ROUNDED_DIV(s->last_non_b_time - s->pp_time, s->t_frame))*2;
-        s->pb_field_time= (  ROUNDED_DIV(s->time, s->t_frame) 
+        s->pb_field_time= (  ROUNDED_DIV(s->time, s->t_frame)
                            - ROUNDED_DIV(s->last_non_b_time - s->pp_time, s->t_frame))*2;
     }
 //av_log(s->avctx, AV_LOG_DEBUG, "last nonb %Ld last_base %d time %Ld pp %d pb %d t %d ppf %d pbf %d\n", s->last_non_b_time, s->last_time_base, s->time, s->pp_time, s->pb_time, s->t_frame, s->pp_field_time, s->pb_field_time);
-    
+
     if(s->avctx->time_base.num)
         s->current_picture_ptr->pts= (s->time + s->avctx->time_base.num/2) / s->avctx->time_base.num;
     else
         s->current_picture_ptr->pts= AV_NOPTS_VALUE;
     if(s->avctx->debug&FF_DEBUG_PTS)
-        av_log(s->avctx, AV_LOG_DEBUG, "MPEG4 PTS: %Ld\n", s->current_picture_ptr->pts);
+        av_log(s->avctx, AV_LOG_DEBUG, "MPEG4 PTS: %"PRId64"\n", s->current_picture_ptr->pts);
 
     check_marker(gb, "before vop_coded");
-    
+
     /* vop coded */
     if (get_bits1(gb) != 1){
         if(s->avctx->debug&FF_DEBUG_PICT_INFO)
@@ -5872,20 +5875,20 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
         return FRAME_SKIPPED;
     }
 //printf("time %d %d %d || %Ld %Ld %Ld\n", s->time_increment_bits, s->avctx->time_base.den, s->time_base,
-//s->time, s->last_non_b_time, s->last_non_b_time - s->pp_time);  
+//s->time, s->last_non_b_time, s->last_non_b_time - s->pp_time);
     if (s->shape != BIN_ONLY_SHAPE && ( s->pict_type == P_TYPE
                           || (s->pict_type == S_TYPE && s->vol_sprite_usage==GMC_SPRITE))) {
         /* rounding type for motion estimation */
-	s->no_rounding = get_bits1(gb);
+        s->no_rounding = get_bits1(gb);
     } else {
-	s->no_rounding = 0;
+        s->no_rounding = 0;
     }
 //FIXME reduced res stuff
 
      if (s->shape != RECT_SHAPE) {
          if (s->vol_sprite_usage != 1 || s->pict_type != I_TYPE) {
              int width, height, hor_spat_ref, ver_spat_ref;
- 
+
              width = get_bits(gb, 13);
              skip_bits1(gb);   /* marker */
              height = get_bits(gb, 13);
@@ -5895,13 +5898,13 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
              ver_spat_ref = get_bits(gb, 13); /* ver_spat_ref */
          }
          skip_bits1(gb); /* change_CR_disable */
- 
+
          if (get_bits1(gb) != 0) {
              skip_bits(gb, 8); /* constant_alpha_value */
          }
      }
 //FIXME complexity estimation stuff
-     
+
      if (s->shape != BIN_ONLY_SHAPE) {
          s->intra_dc_threshold= mpeg4_dc_threshold[ get_bits(gb, 3) ];
          if(!s->progressive_sequence){
@@ -5922,7 +5925,7 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
          ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, ff_alternate_horizontal_scan);
          ff_init_scantable(s->dsp.idct_permutation, &s->intra_v_scantable, ff_alternate_vertical_scan);
      }
- 
+
      if(s->pict_type == S_TYPE && (s->vol_sprite_usage==STATIC_SPRITE || s->vol_sprite_usage==GMC_SPRITE)){
          mpeg4_decode_sprite_trajectory(s, gb);
          if(s->sprite_brightness_change) av_log(s->avctx, AV_LOG_ERROR, "sprite_brightness_change not supported\n");
@@ -5935,28 +5938,28 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
              av_log(s->avctx, AV_LOG_ERROR, "Error, header damaged or not MPEG4 header (qscale=0)\n");
              return -1; // makes no sense to continue, as there is nothing left from the image then
          }
-  
+
          if (s->pict_type != I_TYPE) {
-             s->f_code = get_bits(gb, 3);	/* fcode_for */
+             s->f_code = get_bits(gb, 3);       /* fcode_for */
              if(s->f_code==0){
                  av_log(s->avctx, AV_LOG_ERROR, "Error, header damaged or not MPEG4 header (f_code=0)\n");
                  return -1; // makes no sense to continue, as the MV decoding will break very quickly
              }
          }else
              s->f_code=1;
-     
+
          if (s->pict_type == B_TYPE) {
              s->b_code = get_bits(gb, 3);
          }else
              s->b_code=1;
 
          if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-             av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%d,%d %s size:%d pro:%d alt:%d top:%d %spel part:%d resync:%d w:%d a:%d rnd:%d vot:%d%s dc:%d\n", 
-                 s->qscale, s->f_code, s->b_code, 
-                 s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")), 
-                 gb->size_in_bits,s->progressive_sequence, s->alternate_scan, s->top_field_first, 
+             av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%d,%d %s size:%d pro:%d alt:%d top:%d %spel part:%d resync:%d w:%d a:%d rnd:%d vot:%d%s dc:%d\n",
+                 s->qscale, s->f_code, s->b_code,
+                 s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")),
+                 gb->size_in_bits,s->progressive_sequence, s->alternate_scan, s->top_field_first,
                  s->quarter_sample ? "q" : "h", s->data_partitioning, s->resync_marker, s->num_sprite_warping_points,
-                 s->sprite_warping_accuracy, 1-s->no_rounding, s->vo_type, s->vol_control_parameters ? " VOLC" : " ", s->intra_dc_threshold); 
+                 s->sprite_warping_accuracy, 1-s->no_rounding, s->vo_type, s->vol_control_parameters ? " VOLC" : " ", s->intra_dc_threshold);
          }
 
          if(!s->scalability){
@@ -5982,7 +5985,7 @@ static int decode_vop_header(MpegEncContext *s, GetBitContext *gb){
 
      s->picture_number++; // better than pic number==0 always ;)
 
-     s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table; //FIXME add short header support 
+     s->y_dc_scale_table= ff_mpeg4_y_dc_scale_table; //FIXME add short header support
      s->c_dc_scale_table= ff_mpeg4_c_dc_scale_table;
 
      if(s->workaround_bugs&FF_BUG_EDGE){
@@ -6013,9 +6016,6 @@ int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb)
 
     startcode = 0xff;
     for(;;) {
-        v = get_bits(gb, 8);
-        startcode = ((startcode << 8) | v) & 0xffffffff;
-        
         if(get_bits_count(gb) >= gb->size_in_bits){
             if(gb->size_in_bits==8 && (s->divx_version || s->xvid_build)){
                 av_log(s->avctx, AV_LOG_ERROR, "frame skip %d\n", gb->size_in_bits);
@@ -6024,9 +6024,13 @@ int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb)
                 return -1; //end of stream
         }
 
+        /* use the bits after the test */
+        v = get_bits(gb, 8);
+        startcode = ((startcode << 8) | v) & 0xffffffff;
+
         if((startcode&0xFFFFFF00) != 0x100)
             continue; //no startcode
-        
+
         if(s->avctx->debug&FF_DEBUG_STARTCODE){
             av_log(s->avctx, AV_LOG_DEBUG, "startcode: %3X ", startcode);
             if     (startcode<=0x11F) av_log(s->avctx, AV_LOG_DEBUG, "Video Object Start");
@@ -6060,7 +6064,7 @@ int ff_mpeg4_decode_picture_header(MpegEncContext * s, GetBitContext *gb)
         }
 
         if(startcode >= 0x120 && startcode <= 0x12F){
-            if(decode_vol_header(s, gb) < 0) 
+            if(decode_vol_header(s, gb) < 0)
                 return -1;
         }
         else if(startcode == USER_DATA_STARTCODE){
@@ -6092,15 +6096,15 @@ int intel_h263_decode_picture_header(MpegEncContext *s)
 
     if (get_bits1(&s->gb) != 1) {
         av_log(s->avctx, AV_LOG_ERROR, "Bad marker\n");
-        return -1;	/* marker */
+        return -1;      /* marker */
     }
     if (get_bits1(&s->gb) != 0) {
         av_log(s->avctx, AV_LOG_ERROR, "Bad H263 id\n");
-        return -1;	/* h263 id */
+        return -1;      /* h263 id */
     }
-    skip_bits1(&s->gb);	/* split screen off */
-    skip_bits1(&s->gb);	/* camera  off */
-    skip_bits1(&s->gb);	/* freeze picture release off */
+    skip_bits1(&s->gb);         /* split screen off */
+    skip_bits1(&s->gb);         /* camera  off */
+    skip_bits1(&s->gb);         /* freeze picture release off */
 
     format = get_bits(&s->gb, 3);
     if (format != 7) {
@@ -6110,29 +6114,29 @@ int intel_h263_decode_picture_header(MpegEncContext *s)
     s->h263_plus = 0;
 
     s->pict_type = I_TYPE + get_bits1(&s->gb);
-    
-    s->unrestricted_mv = get_bits1(&s->gb); 
+
+    s->unrestricted_mv = get_bits1(&s->gb);
     s->h263_long_vectors = s->unrestricted_mv;
 
     if (get_bits1(&s->gb) != 0) {
         av_log(s->avctx, AV_LOG_ERROR, "SAC not supported\n");
-        return -1;	/* SAC: off */
+        return -1;      /* SAC: off */
     }
     if (get_bits1(&s->gb) != 0) {
         s->obmc= 1;
         av_log(s->avctx, AV_LOG_ERROR, "Advanced Prediction Mode not supported\n");
-//        return -1;	/* advanced prediction mode: off */
+//        return -1;      /* advanced prediction mode: off */
     }
     if (get_bits1(&s->gb) != 0) {
         av_log(s->avctx, AV_LOG_ERROR, "PB frame mode no supported\n");
-        return -1;	/* PB frame mode */
+        return -1;      /* PB frame mode */
     }
 
     /* skip unknown header garbage */
     skip_bits(&s->gb, 41);
 
     s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
-    skip_bits1(&s->gb);	/* Continuous Presence Multipoint mode: off */
+    skip_bits1(&s->gb); /* Continuous Presence Multipoint mode: off */
 
     /* PEI */
     while (get_bits1(&s->gb) != 0) {
@@ -6205,8 +6209,8 @@ int flv_h263_decode_picture_header(MpegEncContext *s)
     s->dropable= s->pict_type > P_TYPE;
     if (s->dropable)
         s->pict_type = P_TYPE;
-    
-    skip_bits1(&s->gb);	/* deblocking flag */
+
+    skip_bits1(&s->gb); /* deblocking flag */
     s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
 
     s->h263_plus = 0;
@@ -6224,7 +6228,7 @@ int flv_h263_decode_picture_header(MpegEncContext *s)
         av_log(s->avctx, AV_LOG_DEBUG, "%c esc_type:%d, qp:%d num:%d\n",
                s->dropable ? 'D' : av_get_pict_type_char(s->pict_type), s->h263_flv-1, s->qscale, s->picture_number);
     }
-    
+
     s->y_dc_scale_table=
     s->c_dc_scale_table= ff_mpeg1_dc_scale_table;
 
diff --git a/src/libffmpeg/libavcodec/h263data.h b/src/libffmpeg/libavcodec/h263data.h
index f38037840..2968531a5 100644
--- a/src/libffmpeg/libavcodec/h263data.h
+++ b/src/libffmpeg/libavcodec/h263data.h
@@ -10,16 +10,16 @@ const uint8_t intra_MCBPC_bits[9] = { 1, 3, 3, 3, 4, 6, 6, 6, 9 };
 
 /* inter MCBPC, mb_type = (inter), (intra), (interq), (intraq), (inter4v) */
 /* Changed the tables for interq and inter4v+q, following the standard ** Juanjo ** */
-const uint8_t inter_MCBPC_code[28] = { 
-    1, 3, 2, 5, 
-    3, 4, 3, 3, 
+const uint8_t inter_MCBPC_code[28] = {
+    1, 3, 2, 5,
+    3, 4, 3, 3,
     3, 7, 6, 5,
     4, 4, 3, 2,
     2, 5, 4, 5,
     1, 0, 0, 0, /* Stuffing */
     2, 12, 14, 15,
 };
-const uint8_t inter_MCBPC_bits[28] = { 
+const uint8_t inter_MCBPC_bits[28] = {
     1, 4, 4, 6, /* inter  */
     5, 8, 8, 7, /* intra  */
     3, 7, 7, 9, /* interQ */
@@ -30,9 +30,9 @@ const uint8_t inter_MCBPC_bits[28] = {
 };
 
 static const uint8_t h263_mbtype_b_tab[15][2] = {
- {1, 1}, 
- {3, 3}, 
- {1, 5}, 
+ {1, 1},
+ {3, 3},
+ {1, 5},
  {4, 4},
  {5, 4},
  {6, 6},
@@ -158,63 +158,63 @@ static RLTable rl_inter = {
 };
 
 const uint16_t intra_vlc_aic[103][2] = {
-{  0x2,  2 }, {  0x6,  3 }, {  0xe,  4 }, {  0xc,  5 }, 
-{  0xd,  5 }, { 0x10,  6 }, { 0x11,  6 }, { 0x12,  6 }, 
-{ 0x16,  7 }, { 0x1b,  8 }, { 0x20,  9 }, { 0x21,  9 }, 
-{ 0x1a,  9 }, { 0x1b,  9 }, { 0x1c,  9 }, { 0x1d,  9 }, 
-{ 0x1e,  9 }, { 0x1f,  9 }, { 0x23, 11 }, { 0x22, 11 }, 
-{ 0x57, 12 }, { 0x56, 12 }, { 0x55, 12 }, { 0x54, 12 }, 
-{ 0x53, 12 }, {  0xf,  4 }, { 0x14,  6 }, { 0x14,  7 }, 
-{ 0x1e,  8 }, {  0xf, 10 }, { 0x21, 11 }, { 0x50, 12 }, 
-{  0xb,  5 }, { 0x15,  7 }, {  0xe, 10 }, {  0x9, 10 }, 
-{ 0x15,  6 }, { 0x1d,  8 }, {  0xd, 10 }, { 0x51, 12 }, 
-{ 0x13,  6 }, { 0x23,  9 }, {  0x7, 11 }, { 0x17,  7 }, 
-{ 0x22,  9 }, { 0x52, 12 }, { 0x1c,  8 }, {  0xc, 10 }, 
-{ 0x1f,  8 }, {  0xb, 10 }, { 0x25,  9 }, {  0xa, 10 }, 
-{ 0x24,  9 }, {  0x6, 11 }, { 0x21, 10 }, { 0x20, 10 }, 
-{  0x8, 10 }, { 0x20, 11 }, {  0x7,  4 }, {  0xc,  6 }, 
-{ 0x10,  7 }, { 0x13,  8 }, { 0x11,  9 }, { 0x12,  9 }, 
-{  0x4, 10 }, { 0x27, 11 }, { 0x26, 11 }, { 0x5f, 12 }, 
-{  0xf,  6 }, { 0x13,  9 }, {  0x5, 10 }, { 0x25, 11 }, 
-{  0xe,  6 }, { 0x14,  9 }, { 0x24, 11 }, {  0xd,  6 }, 
-{  0x6, 10 }, { 0x5e, 12 }, { 0x11,  7 }, {  0x7, 10 }, 
-{ 0x13,  7 }, { 0x5d, 12 }, { 0x12,  7 }, { 0x5c, 12 }, 
-{ 0x14,  8 }, { 0x5b, 12 }, { 0x15,  8 }, { 0x1a,  8 }, 
-{ 0x19,  8 }, { 0x18,  8 }, { 0x17,  8 }, { 0x16,  8 }, 
-{ 0x19,  9 }, { 0x15,  9 }, { 0x16,  9 }, { 0x18,  9 }, 
-{ 0x17,  9 }, {  0x4, 11 }, {  0x5, 11 }, { 0x58, 12 }, 
+{  0x2,  2 }, {  0x6,  3 }, {  0xe,  4 }, {  0xc,  5 },
+{  0xd,  5 }, { 0x10,  6 }, { 0x11,  6 }, { 0x12,  6 },
+{ 0x16,  7 }, { 0x1b,  8 }, { 0x20,  9 }, { 0x21,  9 },
+{ 0x1a,  9 }, { 0x1b,  9 }, { 0x1c,  9 }, { 0x1d,  9 },
+{ 0x1e,  9 }, { 0x1f,  9 }, { 0x23, 11 }, { 0x22, 11 },
+{ 0x57, 12 }, { 0x56, 12 }, { 0x55, 12 }, { 0x54, 12 },
+{ 0x53, 12 }, {  0xf,  4 }, { 0x14,  6 }, { 0x14,  7 },
+{ 0x1e,  8 }, {  0xf, 10 }, { 0x21, 11 }, { 0x50, 12 },
+{  0xb,  5 }, { 0x15,  7 }, {  0xe, 10 }, {  0x9, 10 },
+{ 0x15,  6 }, { 0x1d,  8 }, {  0xd, 10 }, { 0x51, 12 },
+{ 0x13,  6 }, { 0x23,  9 }, {  0x7, 11 }, { 0x17,  7 },
+{ 0x22,  9 }, { 0x52, 12 }, { 0x1c,  8 }, {  0xc, 10 },
+{ 0x1f,  8 }, {  0xb, 10 }, { 0x25,  9 }, {  0xa, 10 },
+{ 0x24,  9 }, {  0x6, 11 }, { 0x21, 10 }, { 0x20, 10 },
+{  0x8, 10 }, { 0x20, 11 }, {  0x7,  4 }, {  0xc,  6 },
+{ 0x10,  7 }, { 0x13,  8 }, { 0x11,  9 }, { 0x12,  9 },
+{  0x4, 10 }, { 0x27, 11 }, { 0x26, 11 }, { 0x5f, 12 },
+{  0xf,  6 }, { 0x13,  9 }, {  0x5, 10 }, { 0x25, 11 },
+{  0xe,  6 }, { 0x14,  9 }, { 0x24, 11 }, {  0xd,  6 },
+{  0x6, 10 }, { 0x5e, 12 }, { 0x11,  7 }, {  0x7, 10 },
+{ 0x13,  7 }, { 0x5d, 12 }, { 0x12,  7 }, { 0x5c, 12 },
+{ 0x14,  8 }, { 0x5b, 12 }, { 0x15,  8 }, { 0x1a,  8 },
+{ 0x19,  8 }, { 0x18,  8 }, { 0x17,  8 }, { 0x16,  8 },
+{ 0x19,  9 }, { 0x15,  9 }, { 0x16,  9 }, { 0x18,  9 },
+{ 0x17,  9 }, {  0x4, 11 }, {  0x5, 11 }, { 0x58, 12 },
 { 0x59, 12 }, { 0x5a, 12 }, {  0x3,  7 },
 };
 
 const int8_t intra_run_aic[102] = {
- 0,  0,  0,  0,  0,  0,  0,  0, 
- 0,  0,  0,  0,  0,  0,  0,  0, 
- 0,  0,  0,  0,  0,  0,  0,  0, 
- 0,  1,  1,  1,  1,  1,  1,  1, 
- 2,  2,  2,  2,  3,  3,  3,  3, 
- 4,  4,  4,  5,  5,  5,  6,  6, 
- 7,  7,  8,  8,  9,  9, 10, 11, 
-12, 13,  0,  0,  0,  0,  0,  0, 
- 0,  0,  0,  0,  1,  1,  1,  1, 
- 2,  2,  2,  3,  3,  3,  4,  4, 
- 5,  5,  6,  6,  7,  7,  8,  9, 
-10, 11, 12, 13, 14, 15, 16, 17, 
-18, 19, 20, 21, 22, 23, 
+ 0,  0,  0,  0,  0,  0,  0,  0,
+ 0,  0,  0,  0,  0,  0,  0,  0,
+ 0,  0,  0,  0,  0,  0,  0,  0,
+ 0,  1,  1,  1,  1,  1,  1,  1,
+ 2,  2,  2,  2,  3,  3,  3,  3,
+ 4,  4,  4,  5,  5,  5,  6,  6,
+ 7,  7,  8,  8,  9,  9, 10, 11,
+12, 13,  0,  0,  0,  0,  0,  0,
+ 0,  0,  0,  0,  1,  1,  1,  1,
+ 2,  2,  2,  3,  3,  3,  4,  4,
+ 5,  5,  6,  6,  7,  7,  8,  9,
+10, 11, 12, 13, 14, 15, 16, 17,
+18, 19, 20, 21, 22, 23,
 };
 
 const int8_t intra_level_aic[102] = {
- 1,  2,  3,  4,  5,  6,  7,  8, 
- 9, 10, 11, 12, 13, 14, 15, 16, 
-17, 18, 19, 20, 21, 22, 23, 24, 
-25,  1,  2,  3,  4,  5,  6,  7, 
- 1,  2,  3,  4,  1,  2,  3,  4, 
- 1,  2,  3,  1,  2,  3,  1,  2, 
- 1,  2,  1,  2,  1,  2,  1,  1, 
- 1,  1,  1,  2,  3,  4,  5,  6, 
- 7,  8,  9, 10,  1,  2,  3,  4, 
- 1,  2,  3,  1,  2,  3,  1,  2, 
- 1,  2,  1,  2,  1,  2,  1,  1, 
- 1,  1,  1,  1,  1,  1,  1,  1, 
+ 1,  2,  3,  4,  5,  6,  7,  8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+17, 18, 19, 20, 21, 22, 23, 24,
+25,  1,  2,  3,  4,  5,  6,  7,
+ 1,  2,  3,  4,  1,  2,  3,  4,
+ 1,  2,  3,  1,  2,  3,  1,  2,
+ 1,  2,  1,  2,  1,  2,  1,  1,
+ 1,  1,  1,  2,  3,  4,  5,  6,
+ 7,  8,  9, 10,  1,  2,  3,  4,
+ 1,  2,  3,  1,  2,  3,  1,  2,
+ 1,  2,  1,  2,  1,  2,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,  1,
  1,  1,  1,  1,  1,  1,
 };
 
@@ -227,18 +227,18 @@ static RLTable rl_intra_aic = {
 };
 
 static const uint8_t wrong_run[102] = {
- 1,  2,  3,  5,  4, 10,  9,  8, 
-11, 15, 17, 16, 23, 22, 21, 20, 
-19, 18, 25, 24, 27, 26, 11,  7,  
- 6,  1,  2, 13,  2,  2,  2,  2, 
- 6, 12,  3,  9,  1,  3,  4,  3, 
- 7,  4,  1,  1,  5,  5, 14,  6, 
- 1,  7,  1,  8,  1,  1,  1,  1, 
-10,  1,  1,  5,  9, 17, 25, 24, 
-29, 33, 32, 41,  2, 23, 28, 31,  
- 3, 22, 30,  4, 27, 40,  8, 26,  
- 6, 39,  7, 38, 16, 37, 15, 10, 
-11, 12, 13, 14,  1, 21, 20, 18, 
+ 1,  2,  3,  5,  4, 10,  9,  8,
+11, 15, 17, 16, 23, 22, 21, 20,
+19, 18, 25, 24, 27, 26, 11,  7,
+ 6,  1,  2, 13,  2,  2,  2,  2,
+ 6, 12,  3,  9,  1,  3,  4,  3,
+ 7,  4,  1,  1,  5,  5, 14,  6,
+ 1,  7,  1,  8,  1,  1,  1,  1,
+10,  1,  1,  5,  9, 17, 25, 24,
+29, 33, 32, 41,  2, 23, 28, 31,
+ 3, 22, 30,  4, 27, 40,  8, 26,
+ 6, 39,  7, 38, 16, 37, 15, 10,
+11, 12, 13, 14,  1, 21, 20, 18,
 19,  2,  1, 34, 35, 36
 };
 
@@ -262,7 +262,7 @@ static const uint8_t modified_quant_tab[2][32]={
     0, 3, 1, 2, 3, 4, 5, 6, 7, 8, 9, 9,10,11,12,13,14,15,16,17,18,18,19,20,21,22,23,24,25,26,27,28
 },{
     0, 2, 3, 4, 5, 6, 7, 8, 9,10,11,13,14,15,16,17,18,19,20,21,22,24,25,26,27,28,29,30,31,31,31,26
-}   
+}
 };
 
 const uint8_t ff_h263_chroma_qscale_table[32]={
diff --git a/src/libffmpeg/libavcodec/h263dec.c b/src/libffmpeg/libavcodec/h263dec.c
index 87e11794e..87c9e4991 100644
--- a/src/libffmpeg/libavcodec/h263dec.c
+++ b/src/libffmpeg/libavcodec/h263dec.c
@@ -15,14 +15,14 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
- 
+
 /**
  * @file h263dec.c
  * H.263 decoder.
  */
- 
+
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
@@ -109,7 +109,7 @@ int ff_h263_decode_init(AVCodecContext *avctx)
         ff_msmpeg4_decode_init(s);
     else
         h263_decode_init_vlc(s);
-    
+
     return 0;
 }
 
@@ -126,10 +126,10 @@ int ff_h263_decode_end(AVCodecContext *avctx)
  */
 static int get_consumed_bytes(MpegEncContext *s, int buf_size){
     int pos= (get_bits_count(&s->gb)+7)>>3;
-    
+
     if(s->divx_packed){
         //we would have to scan through the whole buf to handle the weird reordering ...
-        return buf_size; 
+        return buf_size;
     }else if(s->flags&CODEC_FLAG_TRUNCATED){
         pos -= s->parse_context.last_index;
         if(pos<0) pos=0; // padding is not really read so this might be -1
@@ -147,20 +147,20 @@ static int decode_slice(MpegEncContext *s){
     const int mb_size= 16>>s->avctx->lowres;
     s->last_resync_gb= s->gb;
     s->first_slice_line= 1;
-        
+
     s->resync_mb_x= s->mb_x;
     s->resync_mb_y= s->mb_y;
 
     ff_set_qscale(s, s->qscale);
-    
+
     if(s->partitioned_frame){
         const int qscale= s->qscale;
 
         if(s->codec_id==CODEC_ID_MPEG4){
             if(ff_mpeg4_decode_partitions(s) < 0)
-                return -1; 
+                return -1;
         }
-        
+
         /* restore variables which were modified */
         s->first_slice_line=1;
         s->mb_x= s->resync_mb_x;
@@ -177,13 +177,13 @@ static int decode_slice(MpegEncContext *s){
                 return 0;
             }
         }
-        
+
         if(s->msmpeg4_version==1){
             s->last_dc[0]=
             s->last_dc[1]=
             s->last_dc[2]= 128;
         }
-    
+
         ff_init_block_index(s);
         for(; s->mb_x < s->mb_width; s->mb_x++) {
             int ret;
@@ -191,11 +191,11 @@ static int decode_slice(MpegEncContext *s){
             ff_update_block_index(s);
 
             if(s->resync_mb_x == s->mb_x && s->resync_mb_y+1 == s->mb_y){
-                s->first_slice_line=0; 
+                s->first_slice_line=0;
             }
 
             /* DCT & quantize */
-           
+
             s->mv_dir = MV_DIR_FORWARD;
             s->mv_type = MV_TYPE_16X16;
 //            s->mb_skipped = 0;
@@ -216,13 +216,13 @@ static int decode_slice(MpegEncContext *s){
                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
 
                     s->padding_bug_score--;
-                        
+
                     if(++s->mb_x >= s->mb_width){
                         s->mb_x=0;
                         ff_draw_horiz_band(s, s->mb_y*mb_size, mb_size);
                         s->mb_y++;
                     }
-                    return 0; 
+                    return 0;
                 }else if(ret==SLICE_NOEND){
                     av_log(s->avctx, AV_LOG_ERROR, "Slice mismatch at MB: %d\n", xy);
                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x+1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
@@ -230,7 +230,7 @@ static int decode_slice(MpegEncContext *s){
                 }
                 av_log(s->avctx, AV_LOG_ERROR, "Error at MB: %d\n", xy);
                 ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_ERROR|DC_ERROR|MV_ERROR)&part_mask);
-    
+
                 return -1;
             }
 
@@ -238,25 +238,25 @@ static int decode_slice(MpegEncContext *s){
             if(s->loop_filter)
                 ff_h263_loop_filter(s);
         }
-        
+
         ff_draw_horiz_band(s, s->mb_y*mb_size, mb_size);
-        
+
         s->mb_x= 0;
     }
-    
+
     assert(s->mb_x==0 && s->mb_y==s->mb_height);
 
     /* try to detect the padding bug */
     if(      s->codec_id==CODEC_ID_MPEG4
-       &&   (s->workaround_bugs&FF_BUG_AUTODETECT) 
+       &&   (s->workaround_bugs&FF_BUG_AUTODETECT)
        &&    s->gb.size_in_bits - get_bits_count(&s->gb) >=0
        &&    s->gb.size_in_bits - get_bits_count(&s->gb) < 48
 //       &&   !s->resync_marker
        &&   !s->data_partitioning){
-        
+
         const int bits_count= get_bits_count(&s->gb);
         const int bits_left = s->gb.size_in_bits - bits_count;
-        
+
         if(bits_left==0){
             s->padding_bug_score+=16;
         } else if(bits_left != 1){
@@ -268,10 +268,10 @@ static int decode_slice(MpegEncContext *s){
             else if(v==0x7F && ((get_bits_count(&s->gb)+8)&8) && bits_left<=16)
                 s->padding_bug_score+= 4;
             else
-                s->padding_bug_score++;            
-        }                          
+                s->padding_bug_score++;
+        }
     }
-    
+
     if(s->workaround_bugs&FF_BUG_AUTODETECT){
         if(s->padding_bug_score > -2 && !s->data_partitioning /*&& (s->divx_version || !s->resync_marker)*/)
             s->workaround_bugs |=  FF_BUG_NO_PADDING;
@@ -283,17 +283,17 @@ static int decode_slice(MpegEncContext *s){
     if(s->msmpeg4_version || (s->workaround_bugs&FF_BUG_NO_PADDING)){ //FIXME perhaps solve this more cleanly
         int left= s->gb.size_in_bits - get_bits_count(&s->gb);
         int max_extra=7;
-        
+
         /* no markers in M$ crap */
         if(s->msmpeg4_version && s->pict_type==I_TYPE)
             max_extra+= 17;
-        
+
         /* buggy padding but the frame should still end approximately at the bitstream end */
         if((s->workaround_bugs&FF_BUG_NO_PADDING) && s->error_resilience>=3)
             max_extra+= 48;
         else if((s->workaround_bugs&FF_BUG_NO_PADDING))
             max_extra+= 256*256*256*64;
-        
+
         if(left>max_extra){
             av_log(s->avctx, AV_LOG_ERROR, "discarding %d junk bits at end, next would be %X\n", left, show_bits(&s->gb, 24));
         }
@@ -301,14 +301,14 @@ static int decode_slice(MpegEncContext *s){
             av_log(s->avctx, AV_LOG_ERROR, "overreading %d bits\n", -left);
         }else
             ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, AC_END|DC_END|MV_END);
-        
+
         return 0;
     }
 
-    av_log(s->avctx, AV_LOG_ERROR, "slice end not reached but screenspace end (%d left %06X, score= %d)\n", 
+    av_log(s->avctx, AV_LOG_ERROR, "slice end not reached but screenspace end (%d left %06X, score= %d)\n",
             s->gb.size_in_bits - get_bits_count(&s->gb),
             show_bits(&s->gb, 24), s->padding_bug_score);
-            
+
     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
 
     return -1;
@@ -321,10 +321,10 @@ static int decode_slice(MpegEncContext *s){
 int ff_mpeg4_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size){
     int vop_found, i;
     uint32_t state;
-    
+
     vop_found= pc->frame_start_found;
     state= pc->state;
-    
+
     i=0;
     if(!vop_found){
         for(i=0; i<buf_size; i++){
@@ -345,7 +345,7 @@ int ff_mpeg4_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size){
             state= (state<<8) | buf[i];
             if((state&0xFFFFFF00) == 0x100){
                 pc->frame_start_found=0;
-                pc->state=-1; 
+                pc->state=-1;
                 return i-3;
             }
         }
@@ -358,10 +358,10 @@ int ff_mpeg4_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size){
 static int h263_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size){
     int vop_found, i;
     uint32_t state;
-    
+
     vop_found= pc->frame_start_found;
     state= pc->state;
-    
+
     i=0;
     if(!vop_found){
         for(i=0; i<buf_size; i++){
@@ -374,30 +374,30 @@ static int h263_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_siz
         }
     }
 
-    if(vop_found){    
+    if(vop_found){
       for(; i<buf_size; i++){
         state= (state<<8) | buf[i];
         if(state>>(32-22) == 0x20){
             pc->frame_start_found=0;
-            pc->state=-1; 
+            pc->state=-1;
             return i-3;
         }
       }
     }
     pc->frame_start_found= vop_found;
     pc->state= state;
-    
+
     return END_NOT_FOUND;
 }
 
 static int h263_parse(AVCodecParserContext *s,
                            AVCodecContext *avctx,
-                           uint8_t **poutbuf, int *poutbuf_size, 
+                           uint8_t **poutbuf, int *poutbuf_size,
                            const uint8_t *buf, int buf_size)
 {
     ParseContext *pc = s->priv_data;
     int next;
-    
+
     next= h263_find_frame_end(pc, buf, buf_size);
 
     if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
@@ -411,14 +411,14 @@ static int h263_parse(AVCodecParserContext *s,
     return next;
 }
 
-int ff_h263_decode_frame(AVCodecContext *avctx, 
+int ff_h263_decode_frame(AVCodecContext *avctx,
                              void *data, int *data_size,
                              uint8_t *buf, int buf_size)
 {
     MpegEncContext *s = avctx->priv_data;
     int ret;
-    AVFrame *pict = data; 
-    
+    AVFrame *pict = data;
+
 #ifdef PRINT_FRAME_TIME
 uint64_t time= rdtsc();
 #endif
@@ -444,7 +444,7 @@ uint64_t time= rdtsc();
 
     if(s->flags&CODEC_FLAG_TRUNCATED){
         int next;
-        
+
         if(s->codec_id==CODEC_ID_MPEG4){
             next= ff_mpeg4_find_frame_end(&s->parse_context, buf, buf_size);
         }else if(s->codec_id==CODEC_ID_H263){
@@ -453,14 +453,14 @@ uint64_t time= rdtsc();
             av_log(s->avctx, AV_LOG_ERROR, "this codec does not support truncated bitstreams\n");
             return -1;
         }
-        
+
         if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
             return buf_size;
     }
 
-    
+
 retry:
-    
+
     if(s->bitstream_buffer_size && (s->divx_packed || buf_size<20)){ //divx 5.01+/xvid frame reorder
         init_get_bits(&s->gb, s->bitstream_buffer, s->bitstream_buffer_size*8);
     }else
@@ -471,13 +471,13 @@ retry:
         if (MPV_common_init(s) < 0) //we need the idct permutaton for reading a custom matrix
             return -1;
     }
-    
+
     //we need to set current_picture_ptr before reading the header, otherwise we cant store anyting im there
     if(s->current_picture_ptr==NULL || s->current_picture_ptr->data[0]){
         int i= ff_find_unused_picture(s, 0);
         s->current_picture_ptr= &s->picture[i];
     }
-      
+
     /* let's go :-) */
     if (s->msmpeg4_version==5) {
         ret= ff_wmv2_decode_picture_header(s);
@@ -486,7 +486,7 @@ retry:
     } else if (s->h263_pred) {
         if(s->avctx->extradata_size && s->picture_number==0){
             GetBitContext gb;
-            
+
             init_get_bits(&gb, s->avctx->extradata, s->avctx->extradata_size*8);
             ret = ff_mpeg4_decode_picture_header(s, &gb);
         }
@@ -501,7 +501,7 @@ retry:
     } else {
         ret = h263_decode_picture_header(s);
     }
-    
+
     if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_size);
 
     /* skip if the header was thrashed */
@@ -509,16 +509,16 @@ retry:
         av_log(s->avctx, AV_LOG_ERROR, "header damaged\n");
         return -1;
     }
-    
+
     avctx->has_b_frames= !s->low_delay;
-    
+
     if(s->xvid_build==0 && s->divx_version==0 && s->lavc_build==0){
-        if(s->avctx->stream_codec_tag == ff_get_fourcc("XVID") || 
+        if(s->avctx->stream_codec_tag == ff_get_fourcc("XVID") ||
            s->avctx->codec_tag == ff_get_fourcc("XVID") || s->avctx->codec_tag == ff_get_fourcc("XVIX"))
             s->xvid_build= -1;
 #if 0
         if(s->avctx->codec_tag == ff_get_fourcc("DIVX") && s->vo_type==0 && s->vol_control_parameters==1
-           && s->padding_bug_score > 0 && s->low_delay) // XVID with modified fourcc 
+           && s->padding_bug_score > 0 && s->low_delay) // XVID with modified fourcc
             s->xvid_build= -1;
 #endif
     }
@@ -527,14 +527,14 @@ retry:
         if(s->avctx->codec_tag == ff_get_fourcc("DIVX") && s->vo_type==0 && s->vol_control_parameters==0)
             s->divx_version= 400; //divx 4
     }
-    
+
     if(s->xvid_build && s->divx_version){
         s->divx_version=
         s->divx_build= 0;
     }
 
     if(s->workaround_bugs&FF_BUG_AUTODETECT){
-        if(s->avctx->codec_tag == ff_get_fourcc("XVIX")) 
+        if(s->avctx->codec_tag == ff_get_fourcc("XVIX"))
             s->workaround_bugs|= FF_BUG_XVID_ILACE;
 
         if(s->avctx->codec_tag == ff_get_fourcc("UMP4")){
@@ -551,7 +551,7 @@ retry:
 
         if(s->xvid_build && s->xvid_build<=3)
             s->padding_bug_score= 256*256*256*64;
-        
+
         if(s->xvid_build && s->xvid_build<=1)
             s->workaround_bugs|= FF_BUG_QPEL_CHROMA;
 
@@ -568,14 +568,14 @@ retry:
 
         if(s->lavc_build && s->lavc_build<4653)
             s->workaround_bugs|= FF_BUG_STD_QPEL;
-        
+
         if(s->lavc_build && s->lavc_build<4655)
             s->workaround_bugs|= FF_BUG_DIRECT_BLOCKSIZE;
 
         if(s->lavc_build && s->lavc_build<4670){
             s->workaround_bugs|= FF_BUG_EDGE;
         }
-        
+
         if(s->lavc_build && s->lavc_build<=4712)
             s->workaround_bugs|= FF_BUG_DC_CLIP;
 
@@ -588,7 +588,7 @@ retry:
         if(s->divx_version && s->divx_version<500){
             s->workaround_bugs|= FF_BUG_EDGE;
         }
-        
+
         if(s->divx_version)
             s->workaround_bugs|= FF_BUG_HPEL_CHROMA;
 #if 0
@@ -601,12 +601,12 @@ retry:
         if(   s->resync_marker==0 && s->data_partitioning==0 && s->divx_version==0
            && s->codec_id==CODEC_ID_MPEG4 && s->vo_type==0)
             s->workaround_bugs|= FF_BUG_NO_PADDING;
-        
+
         if(s->lavc_build && s->lavc_build<4609) //FIXME not sure about the version num but a 4609 file seems ok
             s->workaround_bugs|= FF_BUG_NO_PADDING;
 #endif
     }
-    
+
     if(s->workaround_bugs& FF_BUG_STD_QPEL){
         SET_QPEL_FUNC(qpel_pixels_tab[0][ 5], qpel16_mc11_old_c)
         SET_QPEL_FUNC(qpel_pixels_tab[0][ 7], qpel16_mc31_old_c)
@@ -624,10 +624,10 @@ retry:
     }
 
     if(avctx->debug & FF_DEBUG_BUGS)
-        av_log(s->avctx, AV_LOG_DEBUG, "bugs: %X lavc_build:%d xvid_build:%d divx_version:%d divx_build:%d %s\n", 
+        av_log(s->avctx, AV_LOG_DEBUG, "bugs: %X lavc_build:%d xvid_build:%d divx_version:%d divx_build:%d %s\n",
                s->workaround_bugs, s->lavc_build, s->xvid_build, s->divx_version, s->divx_build,
                s->divx_packed ? "p" : "");
-    
+
 #if 0 // dump bits per frame / qp / complexity
 {
     static FILE *f=NULL;
@@ -637,9 +637,11 @@ retry:
 #endif
 
 #if defined(HAVE_MMX) && defined(CONFIG_GPL)
-    if(s->codec_id == CODEC_ID_MPEG4 && s->xvid_build && avctx->idct_algo == FF_IDCT_AUTO && (mm_flags & MM_MMX) && !(s->flags&CODEC_FLAG_BITEXACT)){
+    if(s->codec_id == CODEC_ID_MPEG4 && s->xvid_build && avctx->idct_algo == FF_IDCT_AUTO && (mm_flags & MM_MMX)){
         avctx->idct_algo= FF_IDCT_XVIDMMX;
         avctx->coded_width= 0; // force reinit
+//        dsputil_init(&s->dsp, avctx);
+        s->picture_number=0;
     }
 #endif
 
@@ -647,8 +649,8 @@ retry:
         /* and other parameters. So then we could init the picture   */
         /* FIXME: By the way H263 decoder is evolving it should have */
         /* an H263EncContext                                         */
-    
-    if (   s->width  != avctx->coded_width 
+
+    if (   s->width  != avctx->coded_width
         || s->height != avctx->coded_height) {
         /* H.263 could change picture size any time */
         ParseContext pc= s->parse_context; //FIXME move these demuxng hack to avformat
@@ -664,7 +666,7 @@ retry:
 
     if((s->codec_id==CODEC_ID_H263 || s->codec_id==CODEC_ID_H263P))
         s->gob_index = ff_h263_get_gob_height(s);
-    
+
     // for hurry_up==5
     s->current_picture.pict_type= s->pict_type;
     s->current_picture.key_frame= s->pict_type == I_TYPE;
@@ -675,11 +677,11 @@ retry:
     if(avctx->hurry_up && s->pict_type==B_TYPE) return get_consumed_bytes(s, buf_size);
     if(   (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==B_TYPE)
        || (avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=I_TYPE)
-       ||  avctx->skip_frame >= AVDISCARD_ALL) 
+       ||  avctx->skip_frame >= AVDISCARD_ALL)
         return get_consumed_bytes(s, buf_size);
     /* skip everything if we are in a hurry>=5 */
     if(avctx->hurry_up>=5) return get_consumed_bytes(s, buf_size);
-    
+
     if(s->next_p_frame_damaged){
         if(s->pict_type==B_TYPE)
             return get_consumed_bytes(s, buf_size);
@@ -695,7 +697,7 @@ retry:
 #endif
 
     ff_er_frame_start(s);
-    
+
     //the second part of the wmv2 header contains the MB skip bits which are stored in current_picture->mb_type
     //which isnt available before MPV_frame_start()
     if (s->msmpeg4_version==5){
@@ -704,9 +706,9 @@ retry:
     }
 
     /* decode each macroblock */
-    s->mb_x=0; 
+    s->mb_x=0;
     s->mb_y=0;
-    
+
     decode_slice(s);
     while(s->mb_y<s->mb_height){
         if(s->msmpeg4_version){
@@ -716,7 +718,7 @@ retry:
             if(ff_h263_resync(s)<0)
                 break;
         }
-        
+
         if(s->msmpeg4_version<4 && s->h263_pred)
             ff_mpeg4_clean_buffers(s);
 
@@ -727,12 +729,12 @@ retry:
         if(msmpeg4_decode_ext_header(s, buf_size) < 0){
             s->error_status_table[s->mb_num-1]= AC_ERROR|DC_ERROR|MV_ERROR;
         }
-    
+
     /* divx 5.01+ bistream reorder stuff */
     if(s->codec_id==CODEC_ID_MPEG4 && s->bitstream_buffer_size==0 && s->divx_packed){
         int current_pos= get_bits_count(&s->gb)>>3;
         int startcode_found=0;
-        
+
         if(buf_size - current_pos > 5){
             int i;
             for(i=current_pos; i<buf_size-3; i++){
@@ -749,8 +751,8 @@ retry:
 
         if(startcode_found){
             s->bitstream_buffer= av_fast_realloc(
-                s->bitstream_buffer, 
-                &s->allocated_bitstream_buffer_size, 
+                s->bitstream_buffer,
+                &s->allocated_bitstream_buffer_size,
                 buf_size - current_pos + FF_INPUT_BUFFER_PADDING_SIZE);
             memcpy(s->bitstream_buffer, buf + current_pos, buf_size - current_pos);
             s->bitstream_buffer_size= buf_size - current_pos;
diff --git a/src/libffmpeg/libavcodec/h264.c b/src/libffmpeg/libavcodec/h264.c
index 5897738ac..e80a3992c 100644
--- a/src/libffmpeg/libavcodec/h264.c
+++ b/src/libffmpeg/libavcodec/h264.c
@@ -14,10 +14,10 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
- 
+
 /**
  * @file h264.c
  * H.264 / AVC / MPEG4 part10 codec.
@@ -58,7 +58,7 @@
  * Sequence parameter set
  */
 typedef struct SPS{
-    
+
     int profile_idc;
     int level_idc;
     int transform_bypass;              ///< qpprime_y_zero_transform_bypass_flag
@@ -90,6 +90,9 @@ typedef struct SPS{
     short offset_for_ref_frame[256]; //FIXME dyn aloc?
     int bitstream_restriction_flag;
     int num_reorder_frames;
+    int scaling_matrix_present;
+    uint8_t scaling_matrix4[6][16];
+    uint8_t scaling_matrix8[2][64];
 }SPS;
 
 /**
@@ -111,6 +114,8 @@ typedef struct PPS{
     int constrained_intra_pred; ///< constrained_intra_pred_flag
     int redundant_pic_cnt_present; ///< redundant_pic_cnt_present_flag
     int transform_8x8_mode;     ///< transform_8x8_mode_flag
+    uint8_t scaling_matrix4[6][16];
+    uint8_t scaling_matrix8[2][64];
 }PPS;
 
 /**
@@ -122,7 +127,7 @@ typedef enum MMCOOpcode{
     MMCO_LONG2UNUSED,
     MMCO_SHORT2LONG,
     MMCO_SET_MAX_LONG,
-    MMCO_RESET, 
+    MMCO_RESET,
     MMCO_LONG,
 } MMCOOpcode;
 
@@ -140,20 +145,24 @@ typedef struct MMCO{
  */
 typedef struct H264Context{
     MpegEncContext s;
-    int nal_ref_idc;	
+    int nal_ref_idc;
     int nal_unit_type;
-#define NAL_SLICE		1
-#define NAL_DPA			2
-#define NAL_DPB			3
-#define NAL_DPC			4
-#define NAL_IDR_SLICE		5
-#define NAL_SEI			6
-#define NAL_SPS			7
-#define NAL_PPS			8
-#define NAL_PICTURE_DELIMITER	9
-#define NAL_FILTER_DATA		10
+#define NAL_SLICE                1
+#define NAL_DPA                  2
+#define NAL_DPB                  3
+#define NAL_DPC                  4
+#define NAL_IDR_SLICE            5
+#define NAL_SEI                  6
+#define NAL_SPS                  7
+#define NAL_PPS                  8
+#define NAL_AUD                  9
+#define NAL_END_SEQUENCE        10
+#define NAL_END_STREAM          11
+#define NAL_FILLER_DATA         12
+#define NAL_SPS_EXT             13
+#define NAL_AUXILIARY_SLICE     19
     uint8_t *rbsp_buffer;
-    int rbsp_buffer_size;
+    unsigned int rbsp_buffer_size;
 
     /**
       * Used to parse AVC variant of h264
@@ -172,7 +181,7 @@ typedef struct H264Context{
 
     int top_mb_xy;
     int left_mb_xy[2];
-    
+
     int8_t intra4x4_pred_mode_cache[5*8];
     int8_t (*intra4x4_pred_mode)[8];
     void (*pred4x4  [9+3])(uint8_t *src, uint8_t *topright, int stride);//FIXME move to dsp?
@@ -190,17 +199,17 @@ typedef struct H264Context{
      * non zero coeff count cache.
      * is 64 if not available.
      */
-    uint8_t non_zero_count_cache[6*8] __align8;
+    DECLARE_ALIGNED_8(uint8_t, non_zero_count_cache[6*8]);
     uint8_t (*non_zero_count)[16];
 
     /**
      * Motion vector cache.
      */
-    int16_t mv_cache[2][5*8][2] __align8;
-    int8_t ref_cache[2][5*8] __align8;
+    DECLARE_ALIGNED_8(int16_t, mv_cache[2][5*8][2]);
+    DECLARE_ALIGNED_8(int8_t, ref_cache[2][5*8]);
 #define LIST_NOT_USED -1 //FIXME rename?
 #define PART_NOT_AVAILABLE -2
-    
+
     /**
      * is 1 if the specific list MV&references are set to 0,0,-2.
      */
@@ -216,7 +225,7 @@ typedef struct H264Context{
      * block_offset[24..47] for field macroblocks
      */
     int block_offset[2*(16+8)];
-    
+
     uint32_t *mb2b_xy; //FIXME are these 4 a good idea?
     uint32_t *mb2b8_xy;
     int b_stride; //FIXME use s->b4_stride
@@ -230,28 +239,31 @@ typedef struct H264Context{
 
     SPS sps_buffer[MAX_SPS_COUNT];
     SPS sps; ///< current sps
-    
+
     PPS pps_buffer[MAX_PPS_COUNT];
     /**
      * current pps
      */
     PPS pps; //FIXME move to Picture perhaps? (->no) do we need that?
 
-    uint16_t (*dequant4_coeff)[16]; // FIXME quant matrices should be per SPS or PPS
-    uint16_t (*dequant8_coeff)[64];
+    uint32_t dequant4_buffer[6][52][16];
+    uint32_t dequant8_buffer[2][52][64];
+    uint32_t (*dequant4_coeff[6])[16];
+    uint32_t (*dequant8_coeff[2])[64];
+    int dequant_coeff_pps;     ///< reinit tables when pps changes
 
     int slice_num;
     uint8_t *slice_table_base;
     uint8_t *slice_table;      ///< slice_table_base + mb_stride + 1
     int slice_type;
     int slice_type_fixed;
-    
+
     //interlacing specific flags
     int mb_aff_frame;
     int mb_field_decoding_flag;
-    
+
     int sub_mb_type[4];
-    
+
     //POC stuff
     int poc_lsb;
     int poc_msb;
@@ -268,7 +280,7 @@ typedef struct H264Context{
      * frame_num for frames or 2*frame_num for field pics.
      */
     int curr_pic_num;
-    
+
     /**
      * max_frame_num or 2*max_frame_num for field pics.
      */
@@ -284,14 +296,14 @@ typedef struct H264Context{
     int chroma_weight[2][16][2];
     int chroma_offset[2][16][2];
     int implicit_weight[16][16];
-   
+
     //deblock
-    int deblocking_filter;         ///< disable_deblocking_filter_idc with 1<->0 
+    int deblocking_filter;         ///< disable_deblocking_filter_idc with 1<->0
     int slice_alpha_c0_offset;
     int slice_beta_offset;
-     
+
     int redundant_pic_count;
-    
+
     int direct_spatial_mv_pred;
     int dist_scale_factor[16];
     int map_col_to_list0[2][16];
@@ -307,23 +319,23 @@ typedef struct H264Context{
     Picture field_ref_list[2][32]; //FIXME size?
     Picture *delayed_pic[16]; //FIXME size?
     Picture *delayed_output_pic;
-    
+
     /**
      * memory management control operations buffer.
      */
     MMCO mmco[MAX_MMCO_COUNT];
     int mmco_index;
-    
+
     int long_ref_count;  ///< number of actual long term references
     int short_ref_count; ///< number of actual short term references
-    
+
     //data partitioning
     GetBitContext intra_gb;
     GetBitContext inter_gb;
     GetBitContext *intra_gb_ptr;
     GetBitContext *inter_gb_ptr;
-    
-    DCTELEM mb[16*24] __align8;
+
+    DECLARE_ALIGNED_8(DCTELEM, mb[16*24]);
 
     /**
      * Cabac
@@ -340,7 +352,7 @@ typedef struct H264Context{
     uint8_t     *chroma_pred_mode_table;
     int         last_qscale_diff;
     int16_t     (*mvd_table[2])[2];
-    int16_t     mvd_cache[2][5*8][2] __align8;
+    DECLARE_ALIGNED_8(int16_t, mvd_cache[2][5*8][2]);
     uint8_t     *direct_table;
     uint8_t     direct_cache[5*8];
 
@@ -348,7 +360,7 @@ typedef struct H264Context{
     uint8_t field_scan[16];
     const uint8_t *zigzag_scan_q0;
     const uint8_t *field_scan_q0;
-    
+
     int x264_build;
 }H264Context;
 
@@ -365,7 +377,7 @@ static void svq3_luma_dc_dequant_idct_c(DCTELEM *block, int qp);
 static void svq3_add_idct_c(uint8_t *dst, DCTELEM *block, int stride, int qp, int dc);
 static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize);
 
-static inline uint32_t pack16to32(int a, int b){
+static always_inline uint32_t pack16to32(int a, int b){
 #ifdef WORDS_BIGENDIAN
    return (b&0xFFFF) + (a<<16);
 #else
@@ -379,14 +391,14 @@ static inline uint32_t pack16to32(int a, int b){
  * @param w width of the rectangle, should be a constant
  * @param size the size of val (1 or 4), should be a constant
  */
-static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){ //FIXME ensure this IS inlined
+static always_inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t val, int size){
     uint8_t *p= (uint8_t*)vp;
     assert(size==1 || size==4);
-    
+
     w      *= size;
     stride *= size;
-    
-    assert((((int)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
+
+    assert((((long)vp)&(FFMIN(w, STRIDE_ALIGN)-1)) == 0);
     assert((stride&(w-1))==0);
 //FIXME check what gcc generates for 64 bit on x86 and possibly write a 32 bit ver of it
     if(w==2 && h==2){
@@ -438,7 +450,7 @@ static inline void fill_rectangle(void *vp, int w, int h, int stride, uint32_t v
         assert(0);
 }
 
-static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
+static void fill_caches(H264Context *h, int mb_type, int for_deblock){
     MpegEncContext * const s = &h->s;
     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
     int topleft_xy, top_xy, topright_xy, left_xy[2];
@@ -452,8 +464,8 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
     if(for_deblock && h->slice_num == 1)
         return;
 
-    //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it 
-    
+    //wow what a mess, why didn't they simplify the interlacing&intra stuff, i can't imagine that these complex rules are worth it
+
     top_xy     = mb_xy  - s->mb_stride;
     topleft_xy = top_xy - 1;
     topright_xy= top_xy + 1;
@@ -550,8 +562,8 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
     }
 
     if(IS_INTRA(mb_type)){
-        h->topleft_samples_available= 
-        h->top_samples_available= 
+        h->topleft_samples_available=
+        h->top_samples_available=
         h->left_samples_available= 0xFFFF;
         h->topright_samples_available= 0xEEEA;
 
@@ -566,13 +578,13 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
                 h->left_samples_available&= 0x5F5F;
             }
         }
-        
+
         if(!IS_INTRA(topleft_type) && (topleft_type==0 || h->pps.constrained_intra_pred))
             h->topleft_samples_available&= 0x7FFF;
-        
+
         if(!IS_INTRA(topright_type) && (topright_type==0 || h->pps.constrained_intra_pred))
             h->topright_samples_available&= 0xFBFF;
-    
+
         if(IS_INTRA4x4(mb_type)){
             if(IS_INTRA4x4(top_type)){
                 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
@@ -608,15 +620,15 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
             }
         }
     }
-    
-    
+
+
 /*
-0 . T T. T T T T 
-1 L . .L . . . . 
-2 L . .L . . . . 
-3 . T TL . . . . 
-4 L . .L . . . . 
-5 L . .. . . . . 
+0 . T T. T T T T
+1 L . .L . . . .
+2 L . .L . . . .
+3 . T TL . . . .
+4 L . .L . . . .
+5 L . .. . . . .
 */
 //FIXME constraint_intra_pred & partitioning & nnz (lets hope this is just a typo in the spec)
     if(top_type){
@@ -624,25 +636,25 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
         h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
         h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
         h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
-    
+
         h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
         h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
-    
+
         h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
         h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
-        
+
     }else{
-        h->non_zero_count_cache[4+8*0]=      
+        h->non_zero_count_cache[4+8*0]=
         h->non_zero_count_cache[5+8*0]=
         h->non_zero_count_cache[6+8*0]=
         h->non_zero_count_cache[7+8*0]=
-    
+
         h->non_zero_count_cache[1+8*0]=
         h->non_zero_count_cache[2+8*0]=
-    
+
         h->non_zero_count_cache[1+8*3]=
         h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
-        
+
     }
 
     for (i=0; i<2; i++) {
@@ -652,9 +664,9 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
             h->non_zero_count_cache[0+8*1 +   8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
             h->non_zero_count_cache[0+8*4 +   8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
         }else{
-            h->non_zero_count_cache[3+8*1 + 2*8*i]= 
-            h->non_zero_count_cache[3+8*2 + 2*8*i]= 
-            h->non_zero_count_cache[0+8*1 +   8*i]= 
+            h->non_zero_count_cache[3+8*1 + 2*8*i]=
+            h->non_zero_count_cache[3+8*2 + 2*8*i]=
+            h->non_zero_count_cache[0+8*1 +   8*i]=
             h->non_zero_count_cache[0+8*4 +   8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
         }
     }
@@ -698,7 +710,7 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
                 continue;
             }
             h->mv_cache_clean[list]= 0;
-            
+
             if(IS_INTER(top_type)){
                 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
                 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
@@ -711,9 +723,9 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
                 h->ref_cache[list][scan8[0] + 2 - 1*8]=
                 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
             }else{
-                *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]= 
-                *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]= 
-                *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]= 
+                *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
+                *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
+                *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
                 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
                 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
             }
@@ -724,7 +736,7 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
                 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
                 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
                 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]];
-                h->ref_cache[list][scan8[0] - 1 + 0*8]= 
+                h->ref_cache[list][scan8[0] - 1 + 0*8]=
                 h->ref_cache[list][scan8[0] - 1 + 1*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0]>>1)];
             }else{
                 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0*8]=
@@ -732,13 +744,13 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
                 h->ref_cache[list][scan8[0] - 1 + 0*8]=
                 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
             }
-            
+
             if(IS_INTER(left_type[1])){
                 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
                 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1;
                 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]];
                 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]];
-                h->ref_cache[list][scan8[0] - 1 + 2*8]= 
+                h->ref_cache[list][scan8[0] - 1 + 2*8]=
                 h->ref_cache[list][scan8[0] - 1 + 3*8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[2]>>1)];
             }else{
                 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 2*8]=
@@ -760,7 +772,7 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
                 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
                 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
             }
-            
+
             if(IS_INTER(topright_type)){
                 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
                 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
@@ -770,12 +782,12 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
                 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
                 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
             }
-            
 
-            h->ref_cache[list][scan8[5 ]+1] = 
-            h->ref_cache[list][scan8[7 ]+1] = 
+
+            h->ref_cache[list][scan8[5 ]+1] =
+            h->ref_cache[list][scan8[7 ]+1] =
             h->ref_cache[list][scan8[13]+1] =  //FIXME remove past 3 (init somewhere else)
-            h->ref_cache[list][scan8[4 ]] = 
+            h->ref_cache[list][scan8[4 ]] =
             h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
             *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
             *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
@@ -799,9 +811,9 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
                     *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
                 }else{
-                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]= 
-                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]= 
-                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]= 
+                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
+                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
+                    *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
                     *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
                 }
                 if(IS_INTER(left_type[0])){
@@ -838,7 +850,7 @@ static inline void fill_caches(H264Context *h, int mb_type, int for_deblock){
                     }else{
                         *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
                     }
-                    
+
                     //FIXME interlacing
                     if(IS_DIRECT(left_type[0])){
                         h->direct_cache[scan8[0] - 1 + 0*8]=
@@ -881,7 +893,7 @@ static inline int check_intra4x4_pred_mode(H264Context *h){
     static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
     static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
     int i;
-    
+
     if(!(h->top_samples_available&0x8000)){
         for(i=0; i<4; i++){
             int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
@@ -893,7 +905,7 @@ static inline int check_intra4x4_pred_mode(H264Context *h){
             }
         }
     }
-    
+
     if(!(h->left_samples_available&0x8000)){
         for(i=0; i<4; i++){
             int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
@@ -916,12 +928,12 @@ static inline int check_intra_pred_mode(H264Context *h, int mode){
     MpegEncContext * const s = &h->s;
     static const int8_t top [7]= {LEFT_DC_PRED8x8, 1,-1,-1};
     static const int8_t left[7]= { TOP_DC_PRED8x8,-1, 2,-1,DC_128_PRED8x8};
-    
+
     if(mode < 0 || mode > 6) {
         av_log(h->s.avctx, AV_LOG_ERROR, "out of range intra chroma pred mode at %d %d\n", s->mb_x, s->mb_y);
         return -1;
     }
-    
+
     if(!(h->top_samples_available&0x8000)){
         mode= top[ mode ];
         if(mode<0){
@@ -929,13 +941,13 @@ static inline int check_intra_pred_mode(H264Context *h, int mode){
             return -1;
         }
     }
-    
+
     if(!(h->left_samples_available&0x8000)){
         mode= left[ mode ];
         if(mode<0){
             av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra mode at %d %d\n", s->mb_x, s->mb_y);
             return -1;
-        } 
+        }
     }
 
     return mode;
@@ -967,7 +979,7 @@ static inline void write_back_non_zero_count(H264Context *h){
     h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
     h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
     h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
-    
+
     h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
     h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
     h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
@@ -986,7 +998,7 @@ static inline int pred_non_zero_count(H264Context *h, int n){
     const int left= h->non_zero_count_cache[index8 - 1];
     const int top = h->non_zero_count_cache[index8 - 8];
     int i= left + top;
-    
+
     if(i<64) i= (i+1)>>1;
 
     tprintf("pred_nnz L%X T%X n%d s%d P%X\n", left, top, n, scan8[n], i&31);
@@ -1027,7 +1039,7 @@ static inline void pred_motion(H264Context * const h, int n, int part_width, int
     assert(part_width==1 || part_width==2 || part_width==4);
 
 /* mv_cache
-  B . . A T T T T 
+  B . . A T T T T
   U . . L . . , .
   U . . L . . . .
   U . . L . . , .
@@ -1043,24 +1055,24 @@ static inline void pred_motion(H264Context * const h, int n, int part_width, int
     }else if(match_count==1){
         if(left_ref==ref){
             *mx= A[0];
-            *my= A[1];        
+            *my= A[1];
         }else if(top_ref==ref){
             *mx= B[0];
-            *my= B[1];        
+            *my= B[1];
         }else{
             *mx= C[0];
-            *my= C[1];        
+            *my= C[1];
         }
     }else{
         if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
             *mx= A[0];
-            *my= A[1];        
+            *my= A[1];
         }else{
             *mx= mid_pred(A[0], B[0], C[0]);
             *my= mid_pred(A[1], B[1], C[1]);
         }
     }
-        
+
     tprintf("pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1],                    diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
 }
 
@@ -1076,7 +1088,7 @@ static inline void pred_16x8_motion(H264Context * const h, int n, int list, int
         const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
 
         tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], h->s.mb_x, h->s.mb_y, n, list);
-        
+
         if(top_ref == ref){
             *mx= B[0];
             *my= B[1];
@@ -1085,7 +1097,7 @@ static inline void pred_16x8_motion(H264Context * const h, int n, int list, int
     }else{
         const int left_ref=     h->ref_cache[list][ scan8[8] - 1 ];
         const int16_t * const A= h->mv_cache[list][ scan8[8] - 1 ];
-        
+
         tprintf("pred_16x8: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
 
         if(left_ref == ref){
@@ -1109,7 +1121,7 @@ static inline void pred_8x16_motion(H264Context * const h, int n, int list, int
     if(n==0){
         const int left_ref=      h->ref_cache[list][ scan8[0] - 1 ];
         const int16_t * const A=  h->mv_cache[list][ scan8[0] - 1 ];
-        
+
         tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", left_ref, A[0], A[1], h->s.mb_x, h->s.mb_y, n, list);
 
         if(left_ref == ref){
@@ -1122,10 +1134,10 @@ static inline void pred_8x16_motion(H264Context * const h, int n, int list, int
         int diagonal_ref;
 
         diagonal_ref= fetch_diagonal_mv(h, &C, scan8[4], list, 2);
-        
+
         tprintf("pred_8x16: (%2d %2d %2d) at %2d %2d %d list %d\n", diagonal_ref, C[0], C[1], h->s.mb_x, h->s.mb_y, n, list);
 
-        if(diagonal_ref == ref){ 
+        if(diagonal_ref == ref){
             *mx= C[0];
             *my= C[1];
             return;
@@ -1145,11 +1157,11 @@ static inline void pred_pskip_motion(H264Context * const h, int * const mx, int
     if(top_ref == PART_NOT_AVAILABLE || left_ref == PART_NOT_AVAILABLE
        || (top_ref == 0  && *(uint32_t*)h->mv_cache[0][ scan8[0] - 8 ] == 0)
        || (left_ref == 0 && *(uint32_t*)h->mv_cache[0][ scan8[0] - 1 ] == 0)){
-       
+
         *mx = *my = 0;
         return;
     }
-        
+
     pred_motion(h, 0, 4, 0, 0, mx, my);
 
     return;
@@ -1230,7 +1242,7 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
         *mb_type |= MB_TYPE_DIRECT2;
 
     tprintf("mb_type = %08x, sub_mb_type = %08x, is_b8x8 = %d, mb_type_col = %08x\n", *mb_type, sub_mb_type, is_b8x8, mb_type_col);
-    
+
     if(h->direct_spatial_mv_pred){
         int ref[2];
         int mv[2][2];
@@ -1274,9 +1286,9 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
         }
 
         if(IS_16X16(*mb_type)){
-            fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, ref[0], 1);
-            fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, ref[1], 1);
-            if(!IS_INTRA(mb_type_col) 
+            fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, (uint8_t)ref[0], 1);
+            fill_rectangle(&h->ref_cache[1][scan8[0]], 4, 4, 8, (uint8_t)ref[1], 1);
+            if(!IS_INTRA(mb_type_col)
                && (   (l1ref0[0] == 0 && ABS(l1mv0[0][0]) <= 1 && ABS(l1mv0[0][1]) <= 1)
                    || (l1ref0[0]  < 0 && l1ref1[0] == 0 && ABS(l1mv1[0][0]) <= 1 && ABS(l1mv1[0][1]) <= 1
                        && (h->x264_build>33 || !h->x264_build)))){
@@ -1296,21 +1308,30 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
             for(i8=0; i8<4; i8++){
                 const int x8 = i8&1;
                 const int y8 = i8>>1;
-    
+
                 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
                     continue;
                 h->sub_mb_type[i8] = sub_mb_type;
-    
+
                 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4);
                 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4);
-                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref[0], 1);
-                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, ref[1], 1);
-    
+                fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
+                fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
+
                 /* col_zero_flag */
-                if(!IS_INTRA(mb_type_col) && (   l1ref0[x8 + y8*h->b8_stride] == 0 
-                                              || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0 
+                if(!IS_INTRA(mb_type_col) && (   l1ref0[x8 + y8*h->b8_stride] == 0
+                                              || (l1ref0[x8 + y8*h->b8_stride] < 0 && l1ref1[x8 + y8*h->b8_stride] == 0
                                                   && (h->x264_build>33 || !h->x264_build)))){
                     const int16_t (*l1mv)[2]= l1ref0[x8 + y8*h->b8_stride] == 0 ? l1mv0 : l1mv1;
+                    if(IS_SUB_8X8(sub_mb_type)){
+                        const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
+                        if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
+                            if(ref[0] == 0)
+                                fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, 0, 4);
+                            if(ref[1] == 0)
+                                fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
+                        }
+                    }else
                     for(i4=0; i4<4; i4++){
                         const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
                         if(ABS(mv_col[0]) <= 1 && ABS(mv_col[1]) <= 1){
@@ -1359,7 +1380,7 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
                     fill_rectangle(&h-> mv_cache[1][scan8[i8*4]], 2, 2, 8, 0, 4);
                     continue;
                 }
-    
+
                 ref0 = l1ref0[x8 + y8*h->b8_stride];
                 if(ref0 >= 0)
                     ref0 = h->map_col_to_list0[0][ref0];
@@ -1368,9 +1389,16 @@ static inline void pred_direct_motion(H264Context * const h, int *mb_type){
                     l1mv= l1mv1;
                 }
                 dist_scale_factor = h->dist_scale_factor[ref0];
-    
+
                 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, ref0, 1);
                 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, 0, 1);
+                if(IS_SUB_8X8(sub_mb_type)){
+                    const int16_t *mv_col = l1mv[x8*3 + y8*3*h->b_stride];
+                    int mx = (dist_scale_factor * mv_col[0] + 128) >> 8;
+                    int my = (dist_scale_factor * mv_col[1] + 128) >> 8;
+                    fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mx,my), 4);
+                    fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mx-mv_col[0],my-mv_col[1]), 4);
+                }else
                 for(i4=0; i4<4; i4++){
                     const int16_t *mv_col = l1mv[x8*2 + (i4&1) + (y8*2 + (i4>>1))*h->b_stride];
                     int16_t *mv_l0 = h->mv_cache[0][scan8[i8*4+i4]];
@@ -1412,7 +1440,7 @@ static inline void write_back_motion(H264Context *h, int mb_type){
             }
             continue;
         }
-        
+
         for(y=0; y<4; y++){
             *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
             *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
@@ -1428,7 +1456,7 @@ static inline void write_back_motion(H264Context *h, int mb_type){
             s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y];
         }
     }
-    
+
     if(h->slice_type == B_TYPE && h->pps.cabac){
         if(IS_8X8(mb_type)){
             h->direct_table[b8_xy+1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
@@ -1443,18 +1471,18 @@ static inline void write_back_motion(H264Context *h, int mb_type){
  * @param consumed is the number of bytes used as input
  * @param length is the length of the array
  * @param dst_length is the number of decoded bytes FIXME here or a decode rbsp tailing?
- * @returns decoded bytes, might be src+1 if no escapes 
+ * @returns decoded bytes, might be src+1 if no escapes
  */
 static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *consumed, int length){
     int i, si, di;
     uint8_t *dst;
 
-//    src[0]&0x80;		//forbidden bit
+//    src[0]&0x80;                //forbidden bit
     h->nal_ref_idc= src[0]>>5;
     h->nal_unit_type= src[0]&0x1F;
 
     src++; length--;
-#if 0    
+#if 0
     for(i=0; i<length; i++)
         printf("%2X ", src[i]);
 #endif
@@ -1473,7 +1501,7 @@ static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *c
     if(i>=length-1){ //no escaped 0
         *dst_length= length;
         *consumed= length+1; //+1 for the header
-        return src; 
+        return src;
     }
 
     h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length);
@@ -1481,7 +1509,7 @@ static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *c
 
 //printf("decoding esc\n");
     si=di=0;
-    while(si<length){ 
+    while(si<length){
         //remove escapes (very rare 1:2^22)
         if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
             if(src[si+2]==3){ //escape
@@ -1513,10 +1541,10 @@ static uint8_t *decode_nal(H264Context *h, uint8_t *src, int *dst_length, int *c
 static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, int dst_length){
     int i, escape_count, si, di;
     uint8_t *temp;
-    
+
     assert(length>=0);
     assert(dst_length>0);
-    
+
     dst[0]= (h->nal_ref_idc<<5) + h->nal_unit_type;
 
     if(length==0) return 1;
@@ -1524,20 +1552,20 @@ static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, in
     escape_count= 0;
     for(i=0; i<length; i+=2){
         if(src[i]) continue;
-        if(i>0 && src[i-1]==0) 
+        if(i>0 && src[i-1]==0)
             i--;
         if(i+2<length && src[i+1]==0 && src[i+2]<=3){
             escape_count++;
             i+=2;
         }
     }
-    
-    if(escape_count==0){ 
+
+    if(escape_count==0){
         if(dst+1 != src)
             memcpy(dst+1, src, length);
         return length + 1;
     }
-    
+
     if(length + escape_count + 1> dst_length)
         return -1;
 
@@ -1546,23 +1574,23 @@ static int encode_nal(H264Context *h, uint8_t *dst, uint8_t *src, int length, in
     h->rbsp_buffer= av_fast_realloc(h->rbsp_buffer, &h->rbsp_buffer_size, length + escape_count);
     temp= h->rbsp_buffer;
 //printf("encoding esc\n");
-    
+
     si= 0;
     di= 0;
     while(si < length){
         if(si+2<length && src[si]==0 && src[si+1]==0 && src[si+2]<=3){
             temp[di++]= 0; si++;
             temp[di++]= 0; si++;
-            temp[di++]= 3; 
+            temp[di++]= 3;
             temp[di++]= src[si++];
         }
         else
             temp[di++]= src[si++];
     }
     memcpy(dst+1, temp, length+escape_count);
-    
+
     assert(di == length+escape_count);
-    
+
     return di + 1;
 }
 
@@ -1598,8 +1626,7 @@ static int decode_rbsp_trailing(uint8_t *src){
  * idct tranforms the 16 dc values and dequantize them.
  * @param qp quantization parameter
  */
-static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp){
-    const int qmul= dequant_coeff[qp][0];
+static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
 #define stride 16
     int i;
     int temp[16]; //FIXME check if this is a good idea
@@ -1628,10 +1655,10 @@ static void h264_luma_dc_dequant_idct_c(DCTELEM *block, int qp){
         const int z2= temp[4*1+i] - temp[4*3+i];
         const int z3= temp[4*1+i] + temp[4*3+i];
 
-        block[stride*0 +offset]= ((z0 + z3)*qmul + 2)>>2; //FIXME think about merging this into decode_resdual
-        block[stride*2 +offset]= ((z1 + z2)*qmul + 2)>>2;
-        block[stride*8 +offset]= ((z1 - z2)*qmul + 2)>>2;
-        block[stride*10+offset]= ((z0 - z3)*qmul + 2)>>2;
+        block[stride*0 +offset]= ((((z0 + z3)*qmul + 128 ) >> 8)); //FIXME think about merging this into decode_resdual
+        block[stride*2 +offset]= ((((z1 + z2)*qmul + 128 ) >> 8));
+        block[stride*8 +offset]= ((((z1 - z2)*qmul + 128 ) >> 8));
+        block[stride*10+offset]= ((((z0 - z3)*qmul + 128 ) >> 8));
     }
 }
 
@@ -1678,8 +1705,7 @@ static void h264_luma_dc_dct_c(DCTELEM *block/*, int qp*/){
 #undef xStride
 #undef stride
 
-static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp){
-    const int qmul= dequant_coeff[qp][0];
+static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp, int qmul){
     const int stride= 16*2;
     const int xStride= 16;
     int a,b,c,d,e;
@@ -1694,10 +1720,10 @@ static void chroma_dc_dequant_idct_c(DCTELEM *block, int qp){
     b= c-d;
     c= c+d;
 
-    block[stride*0 + xStride*0]= ((a+c)*qmul + 0)>>1;
-    block[stride*0 + xStride*1]= ((e+b)*qmul + 0)>>1;
-    block[stride*1 + xStride*0]= ((a-c)*qmul + 0)>>1;
-    block[stride*1 + xStride*1]= ((e-b)*qmul + 0)>>1;
+    block[stride*0 + xStride*0]= ((a+c)*qmul) >> 7;
+    block[stride*0 + xStride*1]= ((e+b)*qmul) >> 7;
+    block[stride*1 + xStride*0]= ((a-c)*qmul) >> 7;
+    block[stride*1 + xStride*1]= ((e-b)*qmul) >> 7;
 }
 
 #if 0
@@ -1727,7 +1753,7 @@ static void chroma_dc_dct_c(DCTELEM *block){
  * gets the chroma qp.
  */
 static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
-    
+
     return chroma_qp[clip(qscale + chroma_qp_index_offset, 0, 51)];
 }
 
@@ -1736,7 +1762,7 @@ static inline int get_chroma_qp(int chroma_qp_index_offset, int qscale){
 static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int stride){
     int i;
     //FIXME try int temp instead of block
-    
+
     for(i=0; i<4; i++){
         const int d0= src1[0 + i*stride] - src2[0 + i*stride];
         const int d1= src1[1 + i*stride] - src2[1 + i*stride];
@@ -1746,19 +1772,19 @@ static void h264_diff_dct_c(DCTELEM *block, uint8_t *src1, uint8_t *src2, int st
         const int z3= d0 - d3;
         const int z1= d1 + d2;
         const int z2= d1 - d2;
-        
+
         block[0 + 4*i]=   z0 +   z1;
         block[1 + 4*i]= 2*z3 +   z2;
         block[2 + 4*i]=   z0 -   z1;
         block[3 + 4*i]=   z3 - 2*z2;
-    }    
+    }
 
     for(i=0; i<4; i++){
         const int z0= block[0*4 + i] + block[3*4 + i];
         const int z3= block[0*4 + i] - block[3*4 + i];
         const int z1= block[1*4 + i] + block[2*4 + i];
         const int z2= block[1*4 + i] - block[2*4 + i];
-        
+
         block[0*4 + i]=   z0 +   z1;
         block[1*4 + i]= 2*z3 +   z2;
         block[2*4 + i]=   z0 -   z1;
@@ -1864,35 +1890,35 @@ static void pred4x4_horizontal_c(uint8_t *src, uint8_t *topright, int stride){
 static void pred4x4_dc_c(uint8_t *src, uint8_t *topright, int stride){
     const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride]
                    + src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 4) >>3;
-    
-    ((uint32_t*)(src+0*stride))[0]= 
-    ((uint32_t*)(src+1*stride))[0]= 
-    ((uint32_t*)(src+2*stride))[0]= 
-    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 
+
+    ((uint32_t*)(src+0*stride))[0]=
+    ((uint32_t*)(src+1*stride))[0]=
+    ((uint32_t*)(src+2*stride))[0]=
+    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
 }
 
 static void pred4x4_left_dc_c(uint8_t *src, uint8_t *topright, int stride){
     const int dc= (  src[-1+0*stride] + src[-1+1*stride] + src[-1+2*stride] + src[-1+3*stride] + 2) >>2;
-    
-    ((uint32_t*)(src+0*stride))[0]= 
-    ((uint32_t*)(src+1*stride))[0]= 
-    ((uint32_t*)(src+2*stride))[0]= 
-    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 
+
+    ((uint32_t*)(src+0*stride))[0]=
+    ((uint32_t*)(src+1*stride))[0]=
+    ((uint32_t*)(src+2*stride))[0]=
+    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
 }
 
 static void pred4x4_top_dc_c(uint8_t *src, uint8_t *topright, int stride){
     const int dc= (  src[-stride] + src[1-stride] + src[2-stride] + src[3-stride] + 2) >>2;
-    
-    ((uint32_t*)(src+0*stride))[0]= 
-    ((uint32_t*)(src+1*stride))[0]= 
-    ((uint32_t*)(src+2*stride))[0]= 
-    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101; 
+
+    ((uint32_t*)(src+0*stride))[0]=
+    ((uint32_t*)(src+1*stride))[0]=
+    ((uint32_t*)(src+2*stride))[0]=
+    ((uint32_t*)(src+3*stride))[0]= dc* 0x01010101;
 }
 
 static void pred4x4_128_dc_c(uint8_t *src, uint8_t *topright, int stride){
-    ((uint32_t*)(src+0*stride))[0]= 
-    ((uint32_t*)(src+1*stride))[0]= 
-    ((uint32_t*)(src+2*stride))[0]= 
+    ((uint32_t*)(src+0*stride))[0]=
+    ((uint32_t*)(src+1*stride))[0]=
+    ((uint32_t*)(src+2*stride))[0]=
     ((uint32_t*)(src+3*stride))[0]= 128U*0x01010101U;
 }
 
@@ -1920,16 +1946,16 @@ static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
     LOAD_TOP_EDGE
     LOAD_LEFT_EDGE
 
-    src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2; 
+    src[0+3*stride]=(l3 + 2*l2 + l1 + 2)>>2;
     src[0+2*stride]=
-    src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2; 
+    src[1+3*stride]=(l2 + 2*l1 + l0 + 2)>>2;
     src[0+1*stride]=
     src[1+2*stride]=
-    src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2; 
+    src[2+3*stride]=(l1 + 2*l0 + lt + 2)>>2;
     src[0+0*stride]=
     src[1+1*stride]=
     src[2+2*stride]=
-    src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2; 
+    src[3+3*stride]=(l0 + 2*lt + t0 + 2)>>2;
     src[1+0*stride]=
     src[2+1*stride]=
     src[3+2*stride]=(lt + 2*t0 + t1 + 2)>>2;
@@ -1939,9 +1965,9 @@ static void pred4x4_down_right_c(uint8_t *src, uint8_t *topright, int stride){
 }
 
 static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
-    LOAD_TOP_EDGE    
-    LOAD_TOP_RIGHT_EDGE    
-//    LOAD_LEFT_EDGE    
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
+//    LOAD_LEFT_EDGE
 
     src[0+0*stride]=(t0 + t2 + 2*t1 + 2)>>2;
     src[1+0*stride]=
@@ -1963,8 +1989,8 @@ static void pred4x4_down_left_c(uint8_t *src, uint8_t *topright, int stride){
 
 static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride){
     const int lt= src[-1-1*stride];
-    LOAD_TOP_EDGE    
-    LOAD_LEFT_EDGE    
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
     const __attribute__((unused)) int unu= l3;
 
     src[0+0*stride]=
@@ -1986,8 +2012,8 @@ static void pred4x4_vertical_right_c(uint8_t *src, uint8_t *topright, int stride
 }
 
 static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride){
-    LOAD_TOP_EDGE    
-    LOAD_TOP_RIGHT_EDGE    
+    LOAD_TOP_EDGE
+    LOAD_TOP_RIGHT_EDGE
     const __attribute__((unused)) int unu= t7;
 
     src[0+0*stride]=(t0 + t1 + 1)>>1;
@@ -2009,7 +2035,7 @@ static void pred4x4_vertical_left_c(uint8_t *src, uint8_t *topright, int stride)
 }
 
 static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride){
-    LOAD_LEFT_EDGE    
+    LOAD_LEFT_EDGE
 
     src[0+0*stride]=(l0 + l1 + 1)>>1;
     src[1+0*stride]=(l0 + 2*l1 + l2 + 2)>>2;
@@ -2028,11 +2054,11 @@ static void pred4x4_horizontal_up_c(uint8_t *src, uint8_t *topright, int stride)
     src[2+3*stride]=
     src[3+3*stride]=l3;
 }
-    
+
 static void pred4x4_horizontal_down_c(uint8_t *src, uint8_t *topright, int stride){
     const int lt= src[-1-1*stride];
-    LOAD_TOP_EDGE    
-    LOAD_LEFT_EDGE    
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
     const __attribute__((unused)) int unu= t3;
 
     src[0+0*stride]=
@@ -2059,7 +2085,7 @@ static void pred16x16_vertical_c(uint8_t *src, int stride){
     const uint32_t b= ((uint32_t*)(src-stride))[1];
     const uint32_t c= ((uint32_t*)(src-stride))[2];
     const uint32_t d= ((uint32_t*)(src-stride))[3];
-    
+
     for(i=0; i<16; i++){
         ((uint32_t*)(src+i*stride))[0]= a;
         ((uint32_t*)(src+i*stride))[1]= b;
@@ -2085,7 +2111,7 @@ static void pred16x16_dc_c(uint8_t *src, int stride){
     for(i=0;i<16; i++){
         dc+= src[-1+i*stride];
     }
-    
+
     for(i=0;i<16; i++){
         dc+= src[i-stride];
     }
@@ -2106,7 +2132,7 @@ static void pred16x16_left_dc_c(uint8_t *src, int stride){
     for(i=0;i<16; i++){
         dc+= src[-1+i*stride];
     }
-    
+
     dc= 0x01010101*((dc + 8)>>4);
 
     for(i=0; i<16; i++){
@@ -2192,7 +2218,7 @@ static void pred8x8_vertical_c(uint8_t *src, int stride){
     int i;
     const uint32_t a= ((uint32_t*)(src-stride))[0];
     const uint32_t b= ((uint32_t*)(src-stride))[1];
-    
+
     for(i=0; i<8; i++){
         ((uint32_t*)(src+i*stride))[0]= a;
         ((uint32_t*)(src+i*stride))[1]= b;
@@ -2212,7 +2238,7 @@ static void pred8x8_128_dc_c(uint8_t *src, int stride){
     int i;
 
     for(i=0; i<8; i++){
-        ((uint32_t*)(src+i*stride))[0]= 
+        ((uint32_t*)(src+i*stride))[0]=
         ((uint32_t*)(src+i*stride))[1]= 0x01010101U*128U;
     }
 }
@@ -2445,7 +2471,7 @@ static void pred8x8l_down_right_c(uint8_t *src, int has_topleft, int has_toprigh
     SRC(5,0)=SRC(6,1)=SRC(7,2)= (t3 + 2*t4 + t5 + 2) >> 2;
     SRC(6,0)=SRC(7,1)= (t4 + 2*t5 + t6 + 2) >> 2;
     SRC(7,0)= (t5 + 2*t6 + t7 + 2) >> 2;
-  
+
 }
 static void pred8x8l_vertical_right_c(uint8_t *src, int has_topleft, int has_topright, int stride)
 {
@@ -2580,28 +2606,28 @@ static inline void mc_dir_part(H264Context *h, Picture *pic, int n, int square,
     const int full_my= my>>2;
     const int pic_width  = 16*s->mb_width;
     const int pic_height = 16*s->mb_height;
-    
+
     assert(pic->data[0]);
-    
+
     if(mx&7) extra_width -= 3;
     if(my&7) extra_height -= 3;
-    
-    if(   full_mx < 0-extra_width 
-       || full_my < 0-extra_height 
-       || full_mx + 16/*FIXME*/ > pic_width + extra_width 
+
+    if(   full_mx < 0-extra_width
+       || full_my < 0-extra_height
+       || full_mx + 16/*FIXME*/ > pic_width + extra_width
        || full_my + 16/*FIXME*/ > pic_height + extra_height){
         ff_emulated_edge_mc(s->edge_emu_buffer, src_y - 2 - 2*s->linesize, s->linesize, 16+5, 16+5/*FIXME*/, full_mx-2, full_my-2, pic_width, pic_height);
             src_y= s->edge_emu_buffer + 2 + 2*s->linesize;
         emu=1;
     }
-    
+
     qpix_op[luma_xy](dest_y, src_y, s->linesize); //FIXME try variable height perhaps?
     if(!square){
         qpix_op[luma_xy](dest_y + delta, src_y + delta, s->linesize);
     }
-    
+
     if(s->flags&CODEC_FLAG_GRAY) return;
-    
+
     if(emu){
         ff_emulated_edge_mc(s->edge_emu_buffer, src_cb, s->uvlinesize, 9, 9/*FIXME*/, (mx>>3), (my>>3), pic_width>>1, pic_height>>1);
             src_cb= s->edge_emu_buffer;
@@ -2624,13 +2650,13 @@ static inline void mc_part_std(H264Context *h, int n, int square, int chroma_hei
     MpegEncContext * const s = &h->s;
     qpel_mc_func *qpix_op=  qpix_put;
     h264_chroma_mc_func chroma_op= chroma_put;
-    
+
     dest_y  += 2*x_offset + 2*y_offset*s->  linesize;
     dest_cb +=   x_offset +   y_offset*s->uvlinesize;
     dest_cr +=   x_offset +   y_offset*s->uvlinesize;
     x_offset += 8*s->mb_x;
     y_offset += 8*s->mb_y;
-    
+
     if(list0){
         Picture *ref= &h->ref_list[0][ h->ref_cache[0][ scan8[n] ] ];
         mc_dir_part(h, ref, n, square, chroma_height, delta, 0,
@@ -2663,7 +2689,7 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
     dest_cr +=   x_offset +   y_offset*s->uvlinesize;
     x_offset += 8*s->mb_x;
     y_offset += 8*s->mb_y;
-    
+
     if(list0 && list1){
         /* don't optimize for luma-only case, since B-frames usually
          * use implicit weights => chroma too. */
@@ -2683,19 +2709,19 @@ static inline void mc_part_weighted(H264Context *h, int n, int square, int chrom
         if(h->use_weight == 2){
             int weight0 = h->implicit_weight[refn0][refn1];
             int weight1 = 64 - weight0;
-            luma_weight_avg(  dest_y,  tmp_y,  s->  linesize, 5, weight0, weight1, 0, 0);
-            chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, 5, weight0, weight1, 0, 0);
-            chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, 5, weight0, weight1, 0, 0);
+            luma_weight_avg(  dest_y,  tmp_y,  s->  linesize, 5, weight0, weight1, 0);
+            chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, 5, weight0, weight1, 0);
+            chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, 5, weight0, weight1, 0);
         }else{
             luma_weight_avg(dest_y, tmp_y, s->linesize, h->luma_log2_weight_denom,
-                            h->luma_weight[0][refn0], h->luma_weight[1][refn1], 
-                            h->luma_offset[0][refn0], h->luma_offset[1][refn1]);
+                            h->luma_weight[0][refn0], h->luma_weight[1][refn1],
+                            h->luma_offset[0][refn0] + h->luma_offset[1][refn1]);
             chroma_weight_avg(dest_cb, tmp_cb, s->uvlinesize, h->chroma_log2_weight_denom,
-                            h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0], 
-                            h->chroma_offset[0][refn0][0], h->chroma_offset[1][refn1][0]);
+                            h->chroma_weight[0][refn0][0], h->chroma_weight[1][refn1][0],
+                            h->chroma_offset[0][refn0][0] + h->chroma_offset[1][refn1][0]);
             chroma_weight_avg(dest_cr, tmp_cr, s->uvlinesize, h->chroma_log2_weight_denom,
-                            h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1], 
-                            h->chroma_offset[0][refn0][1], h->chroma_offset[1][refn1][1]);
+                            h->chroma_weight[0][refn0][1], h->chroma_weight[1][refn1][1],
+                            h->chroma_offset[0][refn0][1] + h->chroma_offset[1][refn1][1]);
         }
     }else{
         int list = list1 ? 1 : 0;
@@ -2721,7 +2747,7 @@ static inline void mc_part(H264Context *h, int n, int square, int chroma_height,
                            int x_offset, int y_offset,
                            qpel_mc_func *qpix_put, h264_chroma_mc_func chroma_put,
                            qpel_mc_func *qpix_avg, h264_chroma_mc_func chroma_avg,
-                           h264_weight_func *weight_op, h264_biweight_func *weight_avg, 
+                           h264_weight_func *weight_op, h264_biweight_func *weight_avg,
                            int list0, int list1){
     if((h->use_weight==2 && list0 && list1
         && (h->implicit_weight[ h->ref_cache[0][scan8[n]] ][ h->ref_cache[1][scan8[n]] ] != 32))
@@ -2741,9 +2767,9 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
     MpegEncContext * const s = &h->s;
     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
     const int mb_type= s->current_picture.mb_type[mb_xy];
-    
+
     assert(IS_INTER(mb_type));
-    
+
     if(IS_16X16(mb_type)){
         mc_part(h, 0, 1, 8, 0, dest_y, dest_cb, dest_cr, 0, 0,
                 qpix_put[0], chroma_put[0], qpix_avg[0], chroma_avg[0],
@@ -2769,7 +2795,7 @@ static void hl_motion(H264Context *h, uint8_t *dest_y, uint8_t *dest_cb, uint8_t
                 IS_DIR(mb_type, 1, 0), IS_DIR(mb_type, 1, 1));
     }else{
         int i;
-        
+
         assert(IS_8X8(mb_type));
 
         for(i=0; i<4; i++){
@@ -2824,12 +2850,12 @@ static void decode_init_vlc(H264Context *h){
         int i;
         done = 1;
 
-        init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5, 
+        init_vlc(&chroma_dc_coeff_token_vlc, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 4*5,
                  &chroma_dc_coeff_token_len [0], 1, 1,
                  &chroma_dc_coeff_token_bits[0], 1, 1, 1);
 
         for(i=0; i<4; i++){
-            init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17, 
+            init_vlc(&coeff_token_vlc[i], COEFF_TOKEN_VLC_BITS, 4*17,
                      &coeff_token_len [i][0], 1, 1,
                      &coeff_token_bits[i][0], 1, 1, 1);
         }
@@ -2840,17 +2866,17 @@ static void decode_init_vlc(H264Context *h){
                      &chroma_dc_total_zeros_bits[i][0], 1, 1, 1);
         }
         for(i=0; i<15; i++){
-            init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16, 
+            init_vlc(&total_zeros_vlc[i], TOTAL_ZEROS_VLC_BITS, 16,
                      &total_zeros_len [i][0], 1, 1,
                      &total_zeros_bits[i][0], 1, 1, 1);
         }
 
         for(i=0; i<6; i++){
-            init_vlc(&run_vlc[i], RUN_VLC_BITS, 7, 
+            init_vlc(&run_vlc[i], RUN_VLC_BITS, 7,
                      &run_len [i][0], 1, 1,
                      &run_bits[i][0], 1, 1, 1);
         }
-        init_vlc(&run7_vlc, RUN7_VLC_BITS, 16, 
+        init_vlc(&run7_vlc, RUN7_VLC_BITS, 16,
                  &run_len [6][0], 1, 1,
                  &run_bits[6][0], 1, 1, 1);
     }
@@ -2921,12 +2947,72 @@ static void free_tables(H264Context *h){
     av_freep(&h->mb2b_xy);
     av_freep(&h->mb2b8_xy);
 
-    av_freep(&h->dequant4_coeff);
-    av_freep(&h->dequant8_coeff);
-
     av_freep(&h->s.obmc_scratchpad);
 }
 
+static void init_dequant8_coeff_table(H264Context *h){
+    int i,q,x;
+    h->dequant8_coeff[0] = h->dequant8_buffer[0];
+    h->dequant8_coeff[1] = h->dequant8_buffer[1];
+
+    for(i=0; i<2; i++ ){
+        if(i && !memcmp(h->pps.scaling_matrix8[0], h->pps.scaling_matrix8[1], 64*sizeof(uint8_t))){
+            h->dequant8_coeff[1] = h->dequant8_buffer[0];
+            break;
+        }
+
+        for(q=0; q<52; q++){
+            int shift = div6[q];
+            int idx = rem6[q];
+            for(x=0; x<64; x++)
+                h->dequant8_coeff[i][q][x] = ((uint32_t)dequant8_coeff_init[idx][
+                    dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] * h->pps.scaling_matrix8[i][x]) << shift;
+        }
+    }
+}
+
+static void init_dequant4_coeff_table(H264Context *h){
+    int i,j,q,x;
+    const int transpose = (h->s.dsp.h264_idct_add != ff_h264_idct_add_c); //FIXME ugly
+    for(i=0; i<6; i++ ){
+        h->dequant4_coeff[i] = h->dequant4_buffer[i];
+        for(j=0; j<i; j++){
+            if(!memcmp(h->pps.scaling_matrix4[j], h->pps.scaling_matrix4[i], 16*sizeof(uint8_t))){
+                h->dequant4_coeff[i] = h->dequant4_buffer[j];
+                break;
+            }
+        }
+        if(j<i)
+            continue;
+
+        for(q=0; q<52; q++){
+            int shift = div6[q] + 2;
+            int idx = rem6[q];
+            for(x=0; x<16; x++)
+                h->dequant4_coeff[i][q][transpose ? (x>>2)|((x<<2)&0xF) : x] =
+                    ((uint32_t)dequant4_coeff_init[idx][(x&1) + ((x>>2)&1)] *
+                    h->pps.scaling_matrix4[i][x]) << shift;
+        }
+    }
+}
+
+static void init_dequant_tables(H264Context *h){
+    int i,x;
+    init_dequant4_coeff_table(h);
+    if(h->pps.transform_8x8_mode)
+        init_dequant8_coeff_table(h);
+    if(h->sps.transform_bypass){
+        for(i=0; i<6; i++)
+            for(x=0; x<16; x++)
+                h->dequant4_coeff[i][0][x] = 1<<6;
+        if(h->pps.transform_8x8_mode)
+            for(i=0; i<2; i++)
+                for(x=0; x<64; x++)
+                    h->dequant8_coeff[i][0][x] = 1<<6;
+    }
+}
+
+
 /**
  * allocates tables.
  * needs width/height
@@ -2934,7 +3020,7 @@ static void free_tables(H264Context *h){
 static int alloc_tables(H264Context *h){
     MpegEncContext * const s = &h->s;
     const int big_mb_num= s->mb_stride * (s->mb_height+1);
-    int x,y,q;
+    int x,y;
 
     CHECKED_ALLOCZ(h->intra4x4_pred_mode, big_mb_num * 8  * sizeof(uint8_t))
 
@@ -2961,33 +3047,17 @@ static int alloc_tables(H264Context *h){
             const int mb_xy= x + y*s->mb_stride;
             const int b_xy = 4*x + 4*y*h->b_stride;
             const int b8_xy= 2*x + 2*y*h->b8_stride;
-        
+
             h->mb2b_xy [mb_xy]= b_xy;
             h->mb2b8_xy[mb_xy]= b8_xy;
         }
     }
 
-    CHECKED_ALLOCZ(h->dequant4_coeff, 52*16 * sizeof(uint16_t));
-    CHECKED_ALLOCZ(h->dequant8_coeff, 52*64 * sizeof(uint16_t));
-    memcpy(h->dequant4_coeff, dequant_coeff, 52*16 * sizeof(uint16_t));
-    for(q=0; q<52; q++){
-        int shift = div6[q];
-        int idx = rem6[q];
-        if(shift >= 2) // qp<12 are shifted during dequant
-            shift -= 2;
-        for(x=0; x<64; x++)
-            h->dequant8_coeff[q][x] = dequant8_coeff_init[idx][
-                dequant8_coeff_init_scan[((x>>1)&12) | (x&3)] ] << shift;
-    }
-    if(h->sps.transform_bypass){
-        for(x=0; x<16; x++)
-            h->dequant4_coeff[0][x] = 1;
-        for(x=0; x<64; x++)
-            h->dequant8_coeff[0][x] = 1<<2;
-    }
-
     s->obmc_scratchpad = NULL;
 
+    if(!h->dequant4_coeff[0])
+        init_dequant_tables(h);
+
     return 0;
 fail:
     free_tables(h);
@@ -3000,11 +3070,15 @@ static void common_init(H264Context *h){
     s->width = s->avctx->width;
     s->height = s->avctx->height;
     s->codec_id= s->avctx->codec->id;
-    
+
     init_pred_ptrs(h);
 
+    h->dequant_coeff_pps= -1;
     s->unrestricted_mv=1;
     s->decode=1; //FIXME
+
+    memset(h->pps.scaling_matrix4, 16, 6*16*sizeof(uint8_t));
+    memset(h->pps.scaling_matrix8, 16, 2*64*sizeof(uint8_t));
 }
 
 static int decode_init(AVCodecContext *avctx){
@@ -3012,7 +3086,7 @@ static int decode_init(AVCodecContext *avctx){
     MpegEncContext * const s = &h->s;
 
     MPV_decode_defaults(s);
-    
+
     s->avctx = avctx;
     common_init(h);
 
@@ -3025,7 +3099,7 @@ static int decode_init(AVCodecContext *avctx){
     avctx->pix_fmt= PIX_FMT_YUV420P;
 
     decode_init_vlc(h);
-    
+
     if(avctx->extradata_size > 0 && avctx->extradata &&
        *(char *)avctx->extradata == 1){
         h->is_avc = 1;
@@ -3037,11 +3111,12 @@ static int decode_init(AVCodecContext *avctx){
     return 0;
 }
 
-static void frame_start(H264Context *h){
+static int frame_start(H264Context *h){
     MpegEncContext * const s = &h->s;
     int i;
 
-    MPV_frame_start(s, s->avctx);
+    if(MPV_frame_start(s, s->avctx) < 0)
+        return -1;
     ff_er_frame_start(s);
 
     assert(s->linesize && s->uvlinesize);
@@ -3063,12 +3138,13 @@ static void frame_start(H264Context *h){
         s->obmc_scratchpad = av_malloc(16*s->linesize + 2*8*s->uvlinesize);
 
 //    s->decode= (s->flags&CODEC_FLAG_PSNR) || !s->encoding || s->current_picture.reference /*|| h->contains_intra*/ || 1;
+    return 0;
 }
 
 static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
     MpegEncContext * const s = &h->s;
     int i;
-    
+
     src_y  -=   linesize;
     src_cb -= uvlinesize;
     src_cr -= uvlinesize;
@@ -3079,7 +3155,7 @@ static inline void backup_mb_border(H264Context *h, uint8_t *src_y, uint8_t *src
     for(i=1; i<17; i++){
         h->left_border[i]= src_y[15+i*  linesize];
     }
-    
+
     *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y +  16*linesize);
     *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+16*linesize);
 
@@ -3143,7 +3219,7 @@ b= t;
 static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, int linesize, int uvlinesize){
     MpegEncContext * const s = &h->s;
     int i;
-    
+
     src_y  -= 2 *   linesize;
     src_cb -= 2 * uvlinesize;
     src_cr -= 2 * uvlinesize;
@@ -3155,7 +3231,7 @@ static inline void backup_pair_border(H264Context *h, uint8_t *src_y, uint8_t *s
     for(i=2; i<34; i++){
         h->left_border[i]= src_y[15+i*  linesize];
     }
-    
+
     *(uint64_t*)(h->top_borders[0][s->mb_x]+0)= *(uint64_t*)(src_y +  32*linesize);
     *(uint64_t*)(h->top_borders[0][s->mb_x]+8)= *(uint64_t*)(src_y +8+32*linesize);
     *(uint64_t*)(h->top_borders[1][s->mb_x]+0)= *(uint64_t*)(src_y +  33*linesize);
@@ -3260,7 +3336,7 @@ static void hl_decode_mb(H264Context *h){
         uvlinesize = s->uvlinesize;
 //        dct_offset = s->linesize * 16;
     }
-    
+
     idct_add = transform_bypass
              ? IS_8x8DCT(mb_type) ? s->dsp.add_pixels8 : s->dsp.add_pixels4
              : IS_8x8DCT(mb_type) ? s->dsp.h264_idct8_add : s->dsp.h264_idct_add;
@@ -3331,7 +3407,7 @@ static void hl_decode_mb(H264Context *h){
                             if(!topright_avail){
                                 tr= ptr[3 - linesize]*0x01010101;
                                 topright= (uint8_t*) &tr;
-                            }else 
+                            }else
                                 topright= ptr + 4 - linesize;
                         }else
                             topright= NULL;
@@ -3349,7 +3425,7 @@ static void hl_decode_mb(H264Context *h){
                 h->pred16x16[ h->intra16x16_pred_mode ](dest_y , linesize);
                 if(s->codec_id == CODEC_ID_H264){
                     if(!transform_bypass)
-                        h264_luma_dc_dequant_idct_c(h->mb, s->qscale);
+                        h264_luma_dc_dequant_idct_c(h->mb, s->qscale, h->dequant4_coeff[IS_INTRA(mb_type) ? 0:3][s->qscale][0]);
                 }else
                     svq3_luma_dc_dequant_idct_c(h->mb, s->qscale);
             }
@@ -3369,7 +3445,7 @@ static void hl_decode_mb(H264Context *h){
             }
         }else if(s->codec_id == CODEC_ID_H264){
             hl_motion(h, dest_y, dest_cb, dest_cr,
-                      s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab, 
+                      s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab,
                       s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab,
                       s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab);
         }
@@ -3397,8 +3473,8 @@ static void hl_decode_mb(H264Context *h){
         if(!(s->flags&CODEC_FLAG_GRAY)){
             idct_add = transform_bypass ? s->dsp.add_pixels4 : s->dsp.h264_idct_add;
             if(!transform_bypass){
-                chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp);
-                chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp);
+                chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 1:4][h->chroma_qp][0]);
+                chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp, h->dequant4_coeff[IS_INTRA(mb_type) ? 2:5][h->chroma_qp][0]);
             }
             if(s->codec_id == CODEC_ID_H264){
                 for(i=16; i<16+4; i++){
@@ -3444,7 +3520,7 @@ static void hl_decode_mb(H264Context *h){
 
             backup_pair_border(h, pair_dest_y, pair_dest_cb, pair_dest_cr, s->linesize, s->uvlinesize);
             // TODO deblock a pair
-            // top 
+            // top
             s->mb_y--;
             tprintf("call mbaff filter_mb mb_x:%d mb_y:%d pair_dest_y = %p, dest_y = %p\n", mb_x, mb_y, pair_dest_y, dest_y);
             fill_caches(h, mb_type_top, 1); //FIXME don't fill stuff which isn't used by filter_mb
@@ -3477,7 +3553,7 @@ static int fill_default_ref_list(H264Context *h){
     int i;
     int smallest_poc_greater_than_current = -1;
     Picture sorted_short_ref[32];
-    
+
     if(h->slice_type==B_TYPE){
         int out_i;
         int limit= INT_MIN;
@@ -3494,9 +3570,9 @@ static int fill_default_ref_list(H264Context *h){
                     best_i= i;
                 }
             }
-            
+
             assert(best_i != INT_MIN);
-            
+
             limit= best_poc;
             sorted_short_ref[out_i]= *h->short_ref[best_i];
             tprintf("sorted poc: %d->%d poc:%d fn:%d\n", best_i, out_i, sorted_short_ref[out_i].poc, sorted_short_ref[out_i].frame_num);
@@ -3538,7 +3614,7 @@ static int fill_default_ref_list(H264Context *h){
                     h->default_ref_list[ list ][index  ]= *h->long_ref[i];
                     h->default_ref_list[ list ][index++].pic_id= i;;
                 }
-                
+
                 if(list && (smallest_poc_greater_than_current<=0 || smallest_poc_greater_than_current>=h->short_ref_count) && (1 < index)){
                     // swap the two first elements of L1 when
                     // L0 and L1 are identical
@@ -3591,11 +3667,11 @@ static void print_long_term(H264Context *h);
 static int decode_ref_pic_list_reordering(H264Context *h){
     MpegEncContext * const s = &h->s;
     int list, index;
-    
+
     print_short_term(h);
     print_long_term(h);
     if(h->slice_type==I_TYPE || h->slice_type==SI_TYPE) return 0; //FIXME move before func
-    
+
     for(list=0; list<2; list++){
         memcpy(h->ref_list[list], h->default_ref_list[list], sizeof(Picture)*h->ref_count[list]);
 
@@ -3607,15 +3683,15 @@ static int decode_ref_pic_list_reordering(H264Context *h){
                 int pic_id;
                 int i;
                 Picture *ref = NULL;
-                
-                if(reordering_of_pic_nums_idc==3) 
+
+                if(reordering_of_pic_nums_idc==3)
                     break;
-                
+
                 if(index >= h->ref_count[list]){
                     av_log(h->s.avctx, AV_LOG_ERROR, "reference count overflow\n");
                     return -1;
                 }
-                
+
                 if(reordering_of_pic_nums_idc<3){
                     if(reordering_of_pic_nums_idc<2){
                         const int abs_diff_pic_num= get_ue_golomb(&s->gb) + 1;
@@ -3628,7 +3704,7 @@ static int decode_ref_pic_list_reordering(H264Context *h){
                         if(reordering_of_pic_nums_idc == 0) pred-= abs_diff_pic_num;
                         else                                pred+= abs_diff_pic_num;
                         pred &= h->max_pic_num - 1;
-                    
+
                         for(i= h->short_ref_count-1; i>=0; i--){
                             ref = h->short_ref[i];
                             assert(ref->reference == 3);
@@ -3676,18 +3752,18 @@ static int decode_ref_pic_list_reordering(H264Context *h){
         }
         if(h->slice_type!=B_TYPE) break;
     }
-    
+
     if(h->slice_type==B_TYPE && !h->direct_spatial_mv_pred)
         direct_dist_scale_factor(h);
     direct_ref_list_init(h);
-    return 0;    
+    return 0;
 }
 
 static int pred_weight_table(H264Context *h){
     MpegEncContext * const s = &h->s;
     int list, i;
     int luma_def, chroma_def;
-    
+
     h->use_weight= 0;
     h->use_weight_chroma= 0;
     h->luma_log2_weight_denom= get_ue_golomb(&s->gb);
@@ -3698,7 +3774,7 @@ static int pred_weight_table(H264Context *h){
     for(list=0; list<2; list++){
         for(i=0; i<h->ref_count[list]; i++){
             int luma_weight_flag, chroma_weight_flag;
-            
+
             luma_weight_flag= get_bits1(&s->gb);
             if(luma_weight_flag){
                 h->luma_weight[list][i]= get_se_golomb(&s->gb);
@@ -3826,10 +3902,10 @@ static void flush_dpb(AVCodecContext *avctx){
 static Picture * remove_short(H264Context *h, int frame_num){
     MpegEncContext * const s = &h->s;
     int i;
-    
+
     if(s->avctx->debug&FF_DEBUG_MMCO)
         av_log(h->s.avctx, AV_LOG_DEBUG, "remove short %d count %d\n", frame_num, h->short_ref_count);
-    
+
     for(i=0; i<h->short_ref_count; i++){
         Picture *pic= h->short_ref[i];
         if(s->avctx->debug&FF_DEBUG_MMCO)
@@ -3896,10 +3972,10 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
     int i, j;
     int current_is_long=0;
     Picture *pic;
-    
+
     if((s->avctx->debug&FF_DEBUG_MMCO) && mmco_count==0)
         av_log(h->s.avctx, AV_LOG_DEBUG, "no mmco here\n");
-        
+
     for(i=0; i<mmco_count; i++){
         if(s->avctx->debug&FF_DEBUG_MMCO)
             av_log(h->s.avctx, AV_LOG_DEBUG, "mmco:%d %d %d\n", h->mmco[i].opcode, h->mmco[i].short_frame_num, h->mmco[i].long_index);
@@ -3915,7 +3991,7 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
         case MMCO_SHORT2LONG:
             pic= remove_long(h, mmco[i].long_index);
             if(pic) unreference_pic(h, pic);
-            
+
             h->long_ref[ mmco[i].long_index ]= remove_short(h, mmco[i].short_frame_num);
             h->long_ref[ mmco[i].long_index ]->long_ref=1;
             h->long_ref_count++;
@@ -3930,11 +4006,11 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
         case MMCO_LONG:
             pic= remove_long(h, mmco[i].long_index);
             if(pic) unreference_pic(h, pic);
-            
+
             h->long_ref[ mmco[i].long_index ]= s->current_picture_ptr;
             h->long_ref[ mmco[i].long_index ]->long_ref=1;
             h->long_ref_count++;
-            
+
             current_is_long=1;
             break;
         case MMCO_SET_MAX_LONG:
@@ -3958,14 +4034,14 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
         default: assert(0);
         }
     }
-    
+
     if(!current_is_long){
         pic= remove_short(h, s->current_picture_ptr->frame_num);
         if(pic){
             unreference_pic(h, pic);
             av_log(h->s.avctx, AV_LOG_ERROR, "illegal short term buffer state detected\n");
         }
-        
+
         if(h->short_ref_count)
             memmove(&h->short_ref[1], &h->short_ref[0], h->short_ref_count*sizeof(Picture*));
 
@@ -3973,16 +4049,16 @@ static int execute_ref_pic_marking(H264Context *h, MMCO *mmco, int mmco_count){
         h->short_ref[0]->long_ref=0;
         h->short_ref_count++;
     }
-    
+
     print_short_term(h);
     print_long_term(h);
-    return 0; 
+    return 0;
 }
 
 static int decode_ref_pic_marking(H264Context *h){
     MpegEncContext * const s = &h->s;
     int i;
-    
+
     if(h->nal_unit_type == NAL_IDR_SLICE){ //FIXME fields
         s->broken_link= get_bits1(&s->gb) -1;
         h->mmco[0].long_index= get_bits1(&s->gb) - 1; // current_long_term_idx
@@ -3991,10 +4067,10 @@ static int decode_ref_pic_marking(H264Context *h){
         else{
             h->mmco[0].opcode= MMCO_LONG;
             h->mmco_index= 1;
-        } 
+        }
     }else{
         if(get_bits1(&s->gb)){ // adaptive_ref_pic_marking_mode_flag
-            for(i= 0; i<MAX_MMCO_COUNT; i++) { 
+            for(i= 0; i<MAX_MMCO_COUNT; i++) {
                 MMCOOpcode opcode= get_ue_golomb(&s->gb);;
 
                 h->mmco[i].opcode= opcode;
@@ -4012,7 +4088,7 @@ static int decode_ref_pic_marking(H264Context *h){
                         return -1;
                     }
                 }
-                    
+
                 if(opcode > MMCO_LONG){
                     av_log(h->s.avctx, AV_LOG_ERROR, "illegal memory management control operation %d\n", opcode);
                     return -1;
@@ -4032,8 +4108,8 @@ static int decode_ref_pic_marking(H264Context *h){
                 h->mmco_index= 0;
         }
     }
-    
-    return 0; 
+
+    return 0;
 }
 
 static int init_poc(H264Context *h){
@@ -4065,9 +4141,9 @@ static int init_poc(H264Context *h){
         else
             h->poc_msb = h->prev_poc_msb;
 //printf("poc: %d %d\n", h->poc_msb, h->poc_lsb);
-        field_poc[0] = 
+        field_poc[0] =
         field_poc[1] = h->poc_msb + h->poc_lsb;
-        if(s->picture_structure == PICT_FRAME) 
+        if(s->picture_structure == PICT_FRAME)
             field_poc[1] += h->delta_poc_bottom;
     }else if(h->sps.poc_type==1){
         int abs_frame_num, expected_delta_per_poc_cycle, expectedpoc;
@@ -4080,7 +4156,7 @@ static int init_poc(H264Context *h){
 
         if(h->nal_ref_idc==0 && abs_frame_num > 0)
             abs_frame_num--;
-            
+
         expected_delta_per_poc_cycle = 0;
         for(i=0; i < h->sps.poc_cycle_length; i++)
             expected_delta_per_poc_cycle += h->sps.offset_for_ref_frame[ i ]; //FIXME integrate during sps parse
@@ -4095,9 +4171,9 @@ static int init_poc(H264Context *h){
         } else
             expectedpoc = 0;
 
-        if(h->nal_ref_idc == 0) 
+        if(h->nal_ref_idc == 0)
             expectedpoc = expectedpoc + h->sps.offset_for_non_ref_pic;
-        
+
         field_poc[0] = expectedpoc + h->delta_poc[0];
         field_poc[1] = field_poc[0] + h->sps.offset_for_top_to_bottom_field;
 
@@ -4114,7 +4190,7 @@ static int init_poc(H264Context *h){
         field_poc[0]= poc;
         field_poc[1]= poc;
     }
-    
+
     if(s->picture_structure != PICT_BOTTOM_FIELD)
         s->current_picture_ptr->field_poc[0]= field_poc[0];
     if(s->picture_structure != PICT_TOP_FIELD)
@@ -4152,7 +4228,7 @@ static int decode_slice_header(H264Context *h){
         h->slice_type_fixed=1;
     }else
         h->slice_type_fixed=0;
-    
+
     slice_type= slice_type_map[ slice_type ];
     if (slice_type == I_TYPE
         || (h->slice_num != 0 && slice_type == h->slice_type) ) {
@@ -4161,7 +4237,7 @@ static int decode_slice_header(H264Context *h){
     h->slice_type= slice_type;
 
     s->pict_type= h->slice_type; // to make a few old func happy, it's wrong though
-        
+
     pps_id= get_ue_golomb(&s->gb);
     if(pps_id>255){
         av_log(h->s.avctx, AV_LOG_ERROR, "pps_id out of range\n");
@@ -4178,10 +4254,15 @@ static int decode_slice_header(H264Context *h){
         av_log(h->s.avctx, AV_LOG_ERROR, "non existing SPS referenced\n");
         return -1;
     }
-    
+
+    if(h->dequant_coeff_pps != pps_id){
+        h->dequant_coeff_pps = pps_id;
+        init_dequant_tables(h);
+    }
+
     s->mb_width= h->sps.mb_width;
     s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
-    
+
     h->b_stride=  s->mb_width*4 + 1;
     h->b8_stride= s->mb_width*2 + 1;
 
@@ -4190,8 +4271,8 @@ static int decode_slice_header(H264Context *h){
         s->height= 16*s->mb_height - 2*(h->sps.crop_top  + h->sps.crop_bottom);
     else
         s->height= 16*s->mb_height - 4*(h->sps.crop_top  + h->sps.crop_bottom); //FIXME recheck
-    
-    if (s->context_initialized 
+
+    if (s->context_initialized
         && (   s->width != s->avctx->width || s->height != s->avctx->height)) {
         free_tables(h);
         MPV_common_end(s);
@@ -4199,7 +4280,7 @@ static int decode_slice_header(H264Context *h){
     if (!s->context_initialized) {
         if (MPV_common_init(s) < 0)
             return -1;
-            
+
         if(s->dsp.h264_idct_add == ff_h264_idct_add_c){ //FIXME little ugly
             memcpy(h->zigzag_scan, zigzag_scan, 16*sizeof(uint8_t));
             memcpy(h-> field_scan,  field_scan, 16*sizeof(uint8_t));
@@ -4228,12 +4309,17 @@ static int decode_slice_header(H264Context *h){
             s->avctx->sample_aspect_ratio.den = 1;
 
         if(h->sps.timing_info_present_flag){
-            s->avctx->time_base= (AVRational){h->sps.num_units_in_tick, h->sps.time_scale};
+            s->avctx->time_base= (AVRational){h->sps.num_units_in_tick * 2, h->sps.time_scale};
+            if(h->x264_build > 0 && h->x264_build < 44)
+                s->avctx->time_base.den *= 2;
+            av_reduce(&s->avctx->time_base.num, &s->avctx->time_base.den,
+                      s->avctx->time_base.num, s->avctx->time_base.den, 1<<30);
         }
     }
 
     if(h->slice_num == 0){
-        frame_start(h);
+        if(frame_start(h) < 0)
+            return -1;
     }
 
     s->current_picture_ptr->frame_num= //FIXME frame_num cleanup
@@ -4257,7 +4343,7 @@ static int decode_slice_header(H264Context *h){
     if(s->mb_y >= s->mb_height){
         return -1;
     }
-    
+
     if(s->picture_structure==PICT_FRAME){
         h->curr_pic_num=   h->frame_num;
         h->max_pic_num= 1<< h->sps.log2_max_frame_num;
@@ -4265,28 +4351,28 @@ static int decode_slice_header(H264Context *h){
         h->curr_pic_num= 2*h->frame_num;
         h->max_pic_num= 1<<(h->sps.log2_max_frame_num + 1);
     }
-        
+
     if(h->nal_unit_type == NAL_IDR_SLICE){
         get_ue_golomb(&s->gb); /* idr_pic_id */
     }
-   
+
     if(h->sps.poc_type==0){
         h->poc_lsb= get_bits(&s->gb, h->sps.log2_max_poc_lsb);
-        
+
         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME){
             h->delta_poc_bottom= get_se_golomb(&s->gb);
         }
     }
-    
+
     if(h->sps.poc_type==1 && !h->sps.delta_pic_order_always_zero_flag){
         h->delta_poc[0]= get_se_golomb(&s->gb);
-        
+
         if(h->pps.pic_order_present==1 && s->picture_structure==PICT_FRAME)
             h->delta_poc[1]= get_se_golomb(&s->gb);
     }
-    
+
     init_poc(h);
-    
+
     if(h->pps.redundant_pic_cnt_present){
         h->redundant_pic_count= get_ue_golomb(&s->gb);
     }
@@ -4300,7 +4386,7 @@ static int decode_slice_header(H264Context *h){
             h->direct_spatial_mv_pred= get_bits1(&s->gb);
         }
         num_ref_idx_active_override_flag= get_bits1(&s->gb);
-    
+
         if(num_ref_idx_active_override_flag){
             h->ref_count[0]= get_ue_golomb(&s->gb) + 1;
             if(h->slice_type==B_TYPE)
@@ -4320,14 +4406,14 @@ static int decode_slice_header(H264Context *h){
     if(decode_ref_pic_list_reordering(h) < 0)
         return -1;
 
-    if(   (h->pps.weighted_pred          && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE )) 
+    if(   (h->pps.weighted_pred          && (h->slice_type == P_TYPE || h->slice_type == SP_TYPE ))
        || (h->pps.weighted_bipred_idc==1 && h->slice_type==B_TYPE ) )
         pred_weight_table(h);
     else if(h->pps.weighted_bipred_idc==2 && h->slice_type==B_TYPE)
         implicit_weight_table(h);
     else
         h->use_weight = 0;
-    
+
     if(s->current_picture.reference)
         decode_ref_pic_marking(h);
 
@@ -4354,7 +4440,7 @@ static int decode_slice_header(H264Context *h){
     h->slice_beta_offset = 0;
     if( h->pps.deblocking_filter_parameters_present ) {
         h->deblocking_filter= get_ue_golomb(&s->gb);
-        if(h->deblocking_filter < 2) 
+        if(h->deblocking_filter < 2)
             h->deblocking_filter^= 1; // 1<->0
 
         if( h->deblocking_filter ) {
@@ -4376,10 +4462,10 @@ static int decode_slice_header(H264Context *h){
     h->slice_num++;
 
     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-        av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n", 
+        av_log(h->s.avctx, AV_LOG_DEBUG, "slice:%d %s mb:%d %c pps:%d frame:%d poc:%d/%d ref:%d/%d qp:%d loop:%d:%d:%d weight:%d%s\n",
                h->slice_num,
                (s->picture_structure==PICT_FRAME ? "F" : s->picture_structure==PICT_TOP_FIELD ? "T" : "B"),
-               first_mb_in_slice, 
+               first_mb_in_slice,
                av_get_pict_type_char(h->slice_type),
                pps_id, h->frame_num,
                s->current_picture_ptr->field_poc[0], s->current_picture_ptr->field_poc[1],
@@ -4400,11 +4486,11 @@ static int decode_slice_header(H264Context *h){
 static inline int get_level_prefix(GetBitContext *gb){
     unsigned int buf;
     int log;
-    
+
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
     buf=GET_CACHE(re, gb);
-    
+
     log= 32 - av_log2(buf);
 #ifdef TRACE
     print_bin(buf>>(32-log), log);
@@ -4434,7 +4520,7 @@ static inline int get_dct8x8_allowed(H264Context *h){
  * @param max_coeff number of coefficients in the block
  * @return <0 if an error occured
  */
-static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff){
+static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff){
     MpegEncContext * const s = &h->s;
     static const int coeff_token_table_index[17]= {0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3};
     int level[16];
@@ -4445,7 +4531,7 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
     if(n == CHROMA_DC_BLOCK_INDEX){
         coeff_token= get_vlc2(gb, chroma_dc_coeff_token_vlc.table, CHROMA_DC_COEFF_TOKEN_VLC_BITS, 1);
         total_coeff= coeff_token>>2;
-    }else{    
+    }else{
         if(n == LUMA_DC_BLOCK_INDEX){
             total_coeff= pred_non_zero_count(h, 0);
             coeff_token= get_vlc2(gb, coeff_token_vlc[ coeff_token_table_index[total_coeff] ].table, COEFF_TOKEN_VLC_BITS, 2);
@@ -4462,11 +4548,11 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
 
     if(total_coeff==0)
         return 0;
-        
+
     trailing_ones= coeff_token&3;
     tprintf("trailing:%d, total:%d\n", trailing_ones, total_coeff);
     assert(total_coeff<=16);
-    
+
     for(i=0; i<trailing_ones; i++){
         level[i]= 1 - 2*get_bits1(gb);
     }
@@ -4551,7 +4637,7 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
             block[j]= level[i];
         }
     }else{
-        block[j] = level[0] * qmul[j];
+        block[j] = (level[0] * qmul[j] + 32)>>6;
         for(i=1;i<total_coeff;i++) {
             if(zeros_left <= 0)
                 run_before = 0;
@@ -4564,8 +4650,7 @@ static int decode_residual(H264Context *h, GetBitContext *gb, DCTELEM *block, in
             coeff_num -= 1 + run_before;
             j= scantable[ coeff_num ];
 
-            block[j]= level[i] * qmul[j];
-//            printf("%d %d  ", block[j], qmul[j]);
+            block[j]= (level[i] * qmul[j] + 32)>>6;
         }
     }
 
@@ -4584,7 +4669,7 @@ static void decode_mb_skip(H264Context *h){
     MpegEncContext * const s = &h->s;
     const int mb_xy= s->mb_x + s->mb_y*s->mb_stride;
     int mb_type=0;
-    
+
     memset(h->non_zero_count[mb_xy], 0, 16);
     memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
 
@@ -4636,7 +4721,7 @@ static int decode_mb_cavlc(H264Context *h){
     int mb_type, partition_count, cbp;
     int dct8x8_allowed= h->pps.transform_8x8_mode;
 
-    s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?    
+    s->dsp.clear_blocks(h->mb); //FIXME avoid if already clear (move after skip handlong?
 
     tprintf("pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
     cbp = 0; /* avoid warning. FIXME: find a solution without slowing
@@ -4644,7 +4729,7 @@ static int decode_mb_cavlc(H264Context *h){
     if(h->slice_type != I_TYPE && h->slice_type != SI_TYPE){
         if(s->mb_skip_run==-1)
             s->mb_skip_run= get_ue_golomb(&s->gb);
-        
+
         if (s->mb_skip_run--) {
             decode_mb_skip(h);
             return 0;
@@ -4655,9 +4740,9 @@ static int decode_mb_cavlc(H264Context *h){
             h->mb_field_decoding_flag = get_bits1(&s->gb);
     }else
         h->mb_field_decoding_flag= (s->picture_structure!=PICT_FRAME);
-    
+
     h->prev_mb_skipped= 0;
-    
+
     mb_type= get_ue_golomb(&s->gb);
     if(h->slice_type == B_TYPE){
         if(mb_type < 23){
@@ -4692,13 +4777,13 @@ decode_intra_mb:
         mb_type |= MB_TYPE_INTERLACED;
 
     h->slice_table[ mb_xy ]= h->slice_num;
-    
+
     if(IS_INTRA_PCM(mb_type)){
         unsigned int x, y;
-        
+
         // we assume these blocks are very rare so we dont optimize it
         align_get_bits(&s->gb);
-        
+
         // The pixels are stored in the same order as levels in h->mb array.
         for(y=0; y<16; y++){
             const int index= 4*(y&3) + 32*((y>>2)&1) + 128*(y>>3);
@@ -4721,17 +4806,17 @@ decode_intra_mb:
                 h->mb[index + (x&3) + 16*(x>>2)]= get_bits(&s->gb, 8);
             }
         }
-    
+
         // In deblocking, the quantizer is 0
         s->current_picture.qscale_table[mb_xy]= 0;
         h->chroma_qp = get_chroma_qp(h->pps.chroma_qp_index_offset, 0);
         // All coeffs are present
         memset(h->non_zero_count[mb_xy], 16, 16);
-        
+
         s->current_picture.mb_type[mb_xy]= mb_type;
         return 0;
     }
-        
+
     fill_caches(h, mb_type, 0);
 
     //mb_pred
@@ -4760,7 +4845,7 @@ decode_intra_mb:
                     }else{
                         mode= predicted_mode;
                     }
-                    
+
                     if(di==4)
                         fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
                     else
@@ -4781,7 +4866,7 @@ decode_intra_mb:
                 return -1;
     }else if(partition_count==4){
         int i, j, sub_partition_count[4], list, ref[2][4];
-        
+
         if(h->slice_type == B_TYPE){
             for(i=0; i<4; i++){
                 h->sub_mb_type[i]= get_ue_golomb(&s->gb);
@@ -4793,8 +4878,13 @@ decode_intra_mb:
                 h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
             }
             if(   IS_DIRECT(h->sub_mb_type[0]) || IS_DIRECT(h->sub_mb_type[1])
-               || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3]))
+               || IS_DIRECT(h->sub_mb_type[2]) || IS_DIRECT(h->sub_mb_type[3])) {
                 pred_direct_motion(h, &mb_type);
+                h->ref_cache[0][scan8[4]] =
+                h->ref_cache[1][scan8[4]] =
+                h->ref_cache[0][scan8[12]] =
+                h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
+            }
         }else{
             assert(h->slice_type == P_TYPE || h->slice_type == SP_TYPE); //FIXME SP correct ?
             for(i=0; i<4; i++){
@@ -4807,7 +4897,7 @@ decode_intra_mb:
                 h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
             }
         }
-        
+
         for(list=0; list<2; list++){
             int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
             if(ref_count == 0) continue;
@@ -4824,16 +4914,19 @@ decode_intra_mb:
                 }
             }
         }
-        
+
         if(dct8x8_allowed)
             dct8x8_allowed = get_dct8x8_allowed(h);
-        
+
         for(list=0; list<2; list++){
             const int ref_count= IS_REF0(mb_type) ? 1 : h->ref_count[list];
             if(ref_count == 0) continue;
 
             for(i=0; i<4; i++){
-                if(IS_DIRECT(h->sub_mb_type[i])) continue;
+                if(IS_DIRECT(h->sub_mb_type[i])) {
+                    h->ref_cache[list][ scan8[4*i] ] = h->ref_cache[list][ scan8[4*i]+1 ];
+                    continue;
+                }
                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ]=
                 h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
 
@@ -4850,9 +4943,9 @@ decode_intra_mb:
                         tprintf("final mv:%d %d\n", mx, my);
 
                         if(IS_SUB_8X8(sub_mb_type)){
-                            mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= 
+                            mv_cache[ 0 ][0]= mv_cache[ 1 ][0]=
                             mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
-                            mv_cache[ 0 ][1]= mv_cache[ 1 ][1]= 
+                            mv_cache[ 0 ][1]= mv_cache[ 1 ][1]=
                             mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;
                         }else if(IS_SUB_8X4(sub_mb_type)){
                             mv_cache[ 0 ][0]= mv_cache[ 1 ][0]= mx;
@@ -4954,17 +5047,17 @@ decode_intra_mb:
             }
         }
     }
-    
+
     if(IS_INTER(mb_type))
         write_back_motion(h, mb_type);
-    
+
     if(!IS_INTRA16x16(mb_type)){
         cbp= get_ue_golomb(&s->gb);
         if(cbp > 47){
             av_log(h->s.avctx, AV_LOG_ERROR, "cbp too large (%d) at %d %d\n", cbp, s->mb_x, s->mb_y);
             return -1;
         }
-        
+
         if(IS_INTRA4x4(mb_type))
             cbp= golomb_to_intra4x4_cbp[cbp];
         else
@@ -4982,7 +5075,7 @@ decode_intra_mb:
         int chroma_qp, dquant;
         GetBitContext *gb= IS_INTRA(mb_type) ? h->intra_gb_ptr : h->inter_gb_ptr;
         const uint8_t *scan, *dc_scan;
-        
+
 //        fill_non_zero_count_cache(h);
 
         if(IS_INTERLACED(mb_type)){
@@ -4999,16 +5092,16 @@ decode_intra_mb:
             av_log(h->s.avctx, AV_LOG_ERROR, "dquant out of range (%d) at %d %d\n", dquant, s->mb_x, s->mb_y);
             return -1;
         }
-        
+
         s->qscale += dquant;
         if(((unsigned)s->qscale) > 51){
             if(s->qscale<0) s->qscale+= 52;
             else            s->qscale-= 52;
         }
-        
+
         h->chroma_qp= chroma_qp= get_chroma_qp(h->pps.chroma_qp_index_offset, s->qscale);
         if(IS_INTRA16x16(mb_type)){
-            if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[s->qscale], 16) < 0){
+            if( decode_residual(h, h->intra_gb_ptr, h->mb, LUMA_DC_BLOCK_INDEX, dc_scan, h->dequant4_coeff[0][s->qscale], 16) < 0){
                 return -1; //FIXME continue if partitioned and other return -1 too
             }
 
@@ -5018,7 +5111,7 @@ decode_intra_mb:
                 for(i8x8=0; i8x8<4; i8x8++){
                     for(i4x4=0; i4x4<4; i4x4++){
                         const int index= i4x4 + 4*i8x8;
-                        if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[s->qscale], 15) < 0 ){
+                        if( decode_residual(h, h->intra_gb_ptr, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 ){
                             return -1;
                         }
                     }
@@ -5034,21 +5127,16 @@ decode_intra_mb:
                         uint8_t *nnz;
                         for(i4x4=0; i4x4<4; i4x4++){
                             if( decode_residual(h, gb, buf, i4x4+4*i8x8, zigzag_scan8x8_cavlc+16*i4x4,
-                                                h->dequant8_coeff[s->qscale], 16) <0 )
+                                                h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 16) <0 )
                                 return -1;
                         }
-                        if(s->qscale < 12){
-                            int i;
-                            for(i=0; i<64; i++)
-                                buf[i] = (buf[i] + 2) >> 2;
-                        }
                         nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
                         nnz[0] |= nnz[1] | nnz[8] | nnz[9];
                     }else{
                         for(i4x4=0; i4x4<4; i4x4++){
                             const int index= i4x4 + 4*i8x8;
-                        
-                            if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[s->qscale], 16) <0 ){
+
+                            if( decode_residual(h, gb, h->mb + 16*index, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) <0 ){
                                 return -1;
                             }
                         }
@@ -5059,10 +5147,10 @@ decode_intra_mb:
                 }
             }
         }
-        
+
         if(cbp&0x30){
             for(chroma_idx=0; chroma_idx<2; chroma_idx++)
-                if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, h->dequant4_coeff[chroma_qp], 4) < 0){
+                if( decode_residual(h, gb, h->mb + 256 + 16*4*chroma_idx, CHROMA_DC_BLOCK_INDEX, chroma_dc_scan, NULL, 4) < 0){
                     return -1;
                 }
         }
@@ -5071,7 +5159,7 @@ decode_intra_mb:
             for(chroma_idx=0; chroma_idx<2; chroma_idx++){
                 for(i4x4=0; i4x4<4; i4x4++){
                     const int index= 16 + 4*chroma_idx + i4x4;
-                    if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_qp], 15) < 0){
+                    if( decode_residual(h, gb, h->mb + 16*index, index, scan + 1, h->dequant4_coeff[chroma_idx+1+(IS_INTRA( mb_type ) ? 0:3)][chroma_qp], 15) < 0){
                         return -1;
                     }
                 }
@@ -5101,7 +5189,7 @@ static int decode_cabac_field_decoding_flag(H264Context *h) {
     const int mbb_xy = mb_x     + (mb_y-2)*s->mb_stride;
 
     unsigned int ctx = 0;
-    
+
     if( h->slice_table[mba_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) ) {
         ctx += 1;
     }
@@ -5115,7 +5203,7 @@ static int decode_cabac_field_decoding_flag(H264Context *h) {
 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
     uint8_t *state= &h->cabac_state[ctx_base];
     int mb_type;
-    
+
     if(intra_slice){
         MpegEncContext * const s = &h->s;
         const int mba_xy = h->left_mb_xy[0];
@@ -5240,12 +5328,10 @@ static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
     if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
         return pred_mode;
 
-    if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
-        mode += 1;
-    if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
-        mode += 2;
-    if( get_cabac( &h->cabac, &h->cabac_state[69] ) )
-        mode += 4;
+    mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
+    mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
+    mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );
+
     if( mode >= pred_mode )
         return mode + 1;
     else
@@ -5510,7 +5596,7 @@ static int inline get_cabac_cbf_ctx( H264Context *h, int cat, int idx ) {
     return ctx + 4 * cat;
 }
 
-static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint16_t *qmul, int max_coeff) {
+static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
     const int mb_xy  = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
     static const int significant_coeff_flag_field_offset[2] = { 105, 277 };
     static const int last_significant_coeff_flag_field_offset[2] = { 166, 338 };
@@ -5616,10 +5702,10 @@ static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat
                 if( get_cabac_bypass( &h->cabac ) ) block[j] = -1;
                 else                                block[j] =  1;
             }else{
-                if( get_cabac_bypass( &h->cabac ) ) block[j] = -qmul[j];
-                else                                block[j] =  qmul[j];
+                if( get_cabac_bypass( &h->cabac ) ) block[j] = (-qmul[j] + 32) >> 6;
+                else                                block[j] = ( qmul[j] + 32) >> 6;
             }
-    
+
             abslevel1++;
         } else {
             int coeff_abs = 2;
@@ -5634,7 +5720,7 @@ static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat
                     coeff_abs += 1 << j;
                     j++;
                 }
-    
+
                 while( j-- ) {
                     if( get_cabac_bypass( &h->cabac ) )
                         coeff_abs += 1 << j ;
@@ -5645,17 +5731,17 @@ static int inline decode_cabac_residual( H264Context *h, DCTELEM *block, int cat
                 if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs;
                 else                                block[j] =  coeff_abs;
             }else{
-                if( get_cabac_bypass( &h->cabac ) ) block[j] = -coeff_abs * qmul[j];
-                else                                block[j] =  coeff_abs * qmul[j];
+                if( get_cabac_bypass( &h->cabac ) ) block[j] = (-coeff_abs * qmul[j] + 32) >> 6;
+                else                                block[j] = ( coeff_abs * qmul[j] + 32) >> 6;
             }
-    
+
             abslevelgt1++;
         }
     }
     return 0;
 }
 
-void inline compute_mb_neighboors(H264Context *h)
+static void inline compute_mb_neighbors(H264Context *h)
 {
     MpegEncContext * const s = &h->s;
     const int mb_xy  = s->mb_x + s->mb_y*s->mb_stride;
@@ -5715,7 +5801,7 @@ static int decode_mb_cabac(H264Context *h) {
 
     h->prev_mb_skipped = 0;
 
-    compute_mb_neighboors(h);
+    compute_mb_neighbors(h);
     if( ( mb_type = decode_cabac_mb_type( h ) ) < 0 ) {
         av_log( h->s.avctx, AV_LOG_ERROR, "decode_cabac_mb_type failed\n" );
         return -1;
@@ -5753,7 +5839,7 @@ decode_intra_mb:
     if(IS_INTRA_PCM(mb_type)) {
         const uint8_t *ptr;
         unsigned int x, y;
-        
+
         // We assume these blocks are very rare so we dont optimize it.
         // FIXME The two following lines get the bitstream position in the cabac
         // decode, I think it should be done by a function in cabac.h (or cabac.c).
@@ -6078,7 +6164,7 @@ decode_intra_mb:
             if( cbp&15 ) {
                 for( i = 0; i < 16; i++ ) {
                     //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
-                    if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[s->qscale], 15) < 0 )
+                    if( decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15) < 0 )
                         return -1;
                 }
             } else {
@@ -6090,18 +6176,13 @@ decode_intra_mb:
                 if( cbp & (1<<i8x8) ) {
                     if( IS_8x8DCT(mb_type) ) {
                         if( decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8,
-                                zigzag_scan8x8, h->dequant8_coeff[s->qscale], 64) < 0 )
+                                zigzag_scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64) < 0 )
                             return -1;
-                        if(s->qscale < 12){
-                            int i;
-                            for(i=0; i<64; i++)
-                                h->mb[64*i8x8+i] = (h->mb[64*i8x8+i] + 2) >> 2;
-                        }
                     } else
                     for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
                         const int index = 4*i8x8 + i4x4;
                         //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
-                        if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[s->qscale], 16) < 0 )
+                        if( decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16) < 0 )
                             return -1;
                     }
                 } else {
@@ -6126,7 +6207,7 @@ decode_intra_mb:
                 for( i = 0; i < 4; i++ ) {
                     const int index = 16 + 4 * c + i;
                     //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
-                    if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[h->chroma_qp], 15) < 0)
+                    if( decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp], 15) < 0)
                         return -1;
                 }
             }
@@ -6232,7 +6313,7 @@ static void filter_mb_mbaff_edgev( H264Context *h, uint8_t *pix, int stride, int
         int index_a;
         int alpha;
         int beta;
-    
+
         int qp_index;
         int bS_index = (i >> 1);
         if (h->mb_field_decoding_flag) {
@@ -6532,8 +6613,18 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
     {
         int edge;
         const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
+        const int mb_type = s->current_picture.mb_type[mb_xy];
+        const int mbm_type = s->current_picture.mb_type[mbm_xy];
         int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
 
+        const int edges = ((mb_type & mbm_type) & (MB_TYPE_16x16|MB_TYPE_SKIP))
+                                               == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
+        // how often to recheck mv-based bS when iterating between edges
+        const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
+                              (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
+        // how often to recheck mv-based bS when iterating along each edge
+        const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
+
         if (first_vertical_edge_done) {
             start = 1;
             first_vertical_edge_done = 0;
@@ -6543,18 +6634,19 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
             start = 1;
 
         /* Calculate bS */
-        for( edge = start; edge < 4; edge++ ) {
+        for( edge = start; edge < edges; edge++ ) {
             /* mbn_xy: neighbor macroblock */
-            int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
+            const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
+            const int mbn_type = s->current_picture.mb_type[mbn_xy];
             int bS[4];
             int qp;
 
-            if( (edge&1) && IS_8x8DCT(s->current_picture.mb_type[mb_xy]) )
+            if( (edge&1) && IS_8x8DCT(mb_type) )
                 continue;
 
             if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0)
-                && !IS_INTERLACED(s->current_picture.mb_type[mb_xy])
-                && IS_INTERLACED(s->current_picture.mb_type[mbn_xy])
+                && !IS_INTERLACED(mb_type)
+                && IS_INTERLACED(mbn_type)
                 ) {
                 // This is a special case in the norm where the filtering must
                 // be done twice (one each of the field) even if we are in a
@@ -6566,8 +6658,8 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
                 int qp, chroma_qp;
 
                 // first filtering
-                if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
-                    IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
+                if( IS_INTRA(mb_type) ||
+                    IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
                     bS[0] = bS[1] = bS[2] = bS[3] = 3;
                 } else {
                     // TODO
@@ -6587,8 +6679,8 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
 
                 // second filtering
                 mbn_xy += s->mb_stride;
-                if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
-                    IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
+                if( IS_INTRA(mb_type) ||
+                    IS_INTRA(mbn_type) ) {
                     bS[0] = bS[1] = bS[2] = bS[3] = 3;
                 } else {
                     // TODO
@@ -6607,11 +6699,11 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
                 filter_mb_edgech( h, &img_cr[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
                 continue;
             }
-            if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) ||
-                IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) {
+            if( IS_INTRA(mb_type) ||
+                IS_INTRA(mbn_type) ) {
                 int value;
                 if (edge == 0) {
-                    if (   (!IS_INTERLACED(s->current_picture.mb_type[mb_xy]) && !IS_INTERLACED(s->current_picture.mb_type[mbm_xy]))
+                    if (   (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
                         || ((h->mb_aff_frame || (s->picture_structure != PICT_FRAME)) && (dir == 0))
                     ) {
                         value = 4;
@@ -6623,7 +6715,28 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
                 }
                 bS[0] = bS[1] = bS[2] = bS[3] = value;
             } else {
-                int i;
+                int i, l;
+                int mv_done;
+
+                if( edge & mask_edge ) {
+                    bS[0] = bS[1] = bS[2] = bS[3] = 0;
+                    mv_done = 1;
+                }
+                else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
+                    int b_idx= 8 + 4 + edge * (dir ? 8:1);
+                    int bn_idx= b_idx - (dir ? 8:1);
+                    int v = 0;
+                    for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
+                        v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
+                             ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
+                             ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4;
+                    }
+                    bS[0] = bS[1] = bS[2] = bS[3] = v;
+                    mv_done = 1;
+                }
+                else
+                    mv_done = 0;
+
                 for( i = 0; i < 4; i++ ) {
                     int x = dir == 0 ? edge : i;
                     int y = dir == 0 ? i    : edge;
@@ -6634,9 +6747,8 @@ static void filter_mb( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8
                         h->non_zero_count_cache[bn_idx] != 0 ) {
                         bS[i] = 2;
                     }
-                    else
+                    else if(!mv_done)
                     {
-                        int l;
                         bS[i] = 0;
                         for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
                             if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
@@ -6813,7 +6925,7 @@ static int decode_slice(H264Context *h){
     for(;s->mb_y < s->mb_height; s->mb_y++){
         for(;s->mb_x < s->mb_width; s->mb_x++){
             int ret= decode_mb(h);
-            
+
             hl_decode_mb(h);
 
             if(ret<0){
@@ -6822,7 +6934,7 @@ static int decode_slice(H264Context *h){
 
                 return -1;
             }
-        
+
             if(++s->mb_x >= s->mb_width){
                 s->mb_x=0;
                 if(++s->mb_y >= s->mb_height){
@@ -6837,7 +6949,7 @@ static int decode_slice(H264Context *h){
                     }
                 }
             }
-        
+
             if(get_bits_count(s->?gb) >= s->gb?.size_in_bits){
                 if(get_bits_count(s->gb) == s->gb.size_in_bits){
                     ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, (AC_END|DC_END|MV_END)&part_mask);
@@ -6861,44 +6973,44 @@ static int decode_unregistered_user_data(H264Context *h, int size){
     MpegEncContext * const s = &h->s;
     uint8_t user_data[16+256];
     int e, build, i;
-    
+
     if(size<16)
         return -1;
-    
+
     for(i=0; i<sizeof(user_data)-1 && i<size; i++){
         user_data[i]= get_bits(&s->gb, 8);
     }
-    
+
     user_data[i]= 0;
     e= sscanf(user_data+16, "x264 - core %d"/*%s - H.264/MPEG-4 AVC codec - Copyleft 2005 - http://www.videolan.org/x264.html*/, &build);
     if(e==1 && build>=0)
         h->x264_build= build;
-        
+
     if(s->avctx->debug & FF_DEBUG_BUGS)
         av_log(s->avctx, AV_LOG_DEBUG, "user data:\"%s\"\n", user_data+16);
 
     for(; i<size; i++)
         skip_bits(&s->gb, 8);
-    
+
     return 0;
 }
 
 static int decode_sei(H264Context *h){
     MpegEncContext * const s = &h->s;
-    
+
     while(get_bits_count(&s->gb) + 16 < s->gb.size_in_bits){
         int size, type;
-        
+
         type=0;
         do{
             type+= show_bits(&s->gb, 8);
         }while(get_bits(&s->gb, 8) == 255);
-        
+
         size=0;
         do{
             size+= show_bits(&s->gb, 8);
         }while(get_bits(&s->gb, 8) == 255);
-        
+
         switch(type){
         case 5:
             if(decode_unregistered_user_data(h, size) < 0);
@@ -6907,7 +7019,7 @@ static int decode_sei(H264Context *h){
         default:
             skip_bits(&s->gb, 8*size);
         }
-        
+
         //FIXME check bits here
         align_get_bits(&s->gb);
     }
@@ -6938,7 +7050,7 @@ static inline int decode_vui_parameters(H264Context *h, SPS *sps){
     int nal_hrd_parameters_present_flag, vcl_hrd_parameters_present_flag;
 
     aspect_ratio_info_present_flag= get_bits1(&s->gb);
-    
+
     if( aspect_ratio_info_present_flag ) {
         aspect_ratio_idc= get_bits(&s->gb, 8);
         if( aspect_ratio_idc == EXTENDED_SAR ) {
@@ -6951,7 +7063,7 @@ static inline int decode_vui_parameters(H264Context *h, SPS *sps){
             return -1;
         }
     }else{
-        sps->sar.num= 
+        sps->sar.num=
         sps->sar.den= 0;
     }
 //            s->avctx->aspect_ratio= sar_width*s->width / (float)(s->height*sar_height);
@@ -7006,12 +7118,59 @@ static inline int decode_vui_parameters(H264Context *h, SPS *sps){
     return 0;
 }
 
+static void decode_scaling_list(H264Context *h, uint8_t *factors, int size,
+                                const uint8_t *jvt_list, const uint8_t *fallback_list){
+    MpegEncContext * const s = &h->s;
+    int i, last = 8, next = 8;
+    const uint8_t *scan = size == 16 ? zigzag_scan : zigzag_scan8x8;
+    if(!get_bits1(&s->gb)) /* matrix not written, we use the predicted one */
+        memcpy(factors, fallback_list, size*sizeof(uint8_t));
+    else
+    for(i=0;i<size;i++){
+        if(next)
+            next = (last + get_se_golomb(&s->gb)) & 0xff;
+        if(!i && !next){ /* matrix not written, we use the preset one */
+            memcpy(factors, jvt_list, size*sizeof(uint8_t));
+            break;
+        }
+        last = factors[scan[i]] = next ? next : last;
+    }
+}
+
+static void decode_scaling_matrices(H264Context *h, SPS *sps, PPS *pps, int is_sps,
+                                   uint8_t (*scaling_matrix4)[16], uint8_t (*scaling_matrix8)[64]){
+    MpegEncContext * const s = &h->s;
+    int fallback_sps = !is_sps && sps->scaling_matrix_present;
+    const uint8_t *fallback[4] = {
+        fallback_sps ? sps->scaling_matrix4[0] : default_scaling4[0],
+        fallback_sps ? sps->scaling_matrix4[3] : default_scaling4[1],
+        fallback_sps ? sps->scaling_matrix8[0] : default_scaling8[0],
+        fallback_sps ? sps->scaling_matrix8[1] : default_scaling8[1]
+    };
+    if(get_bits1(&s->gb)){
+        sps->scaling_matrix_present |= is_sps;
+        decode_scaling_list(h,scaling_matrix4[0],16,default_scaling4[0],fallback[0]); // Intra, Y
+        decode_scaling_list(h,scaling_matrix4[1],16,default_scaling4[0],scaling_matrix4[0]); // Intra, Cr
+        decode_scaling_list(h,scaling_matrix4[2],16,default_scaling4[0],scaling_matrix4[1]); // Intra, Cb
+        decode_scaling_list(h,scaling_matrix4[3],16,default_scaling4[1],fallback[1]); // Inter, Y
+        decode_scaling_list(h,scaling_matrix4[4],16,default_scaling4[1],scaling_matrix4[3]); // Inter, Cr
+        decode_scaling_list(h,scaling_matrix4[5],16,default_scaling4[1],scaling_matrix4[4]); // Inter, Cb
+        if(is_sps || pps->transform_8x8_mode){
+            decode_scaling_list(h,scaling_matrix8[0],64,default_scaling8[0],fallback[2]);  // Intra, Y
+            decode_scaling_list(h,scaling_matrix8[1],64,default_scaling8[1],fallback[3]);  // Inter, Y
+        }
+    } else if(fallback_sps) {
+        memcpy(scaling_matrix4, sps->scaling_matrix4, 6*16*sizeof(uint8_t));
+        memcpy(scaling_matrix8, sps->scaling_matrix8, 2*64*sizeof(uint8_t));
+    }
+}
+
 static inline int decode_seq_parameter_set(H264Context *h){
     MpegEncContext * const s = &h->s;
     int profile_idc, level_idc;
     int sps_id, i;
     SPS *sps;
-    
+
     profile_idc= get_bits(&s->gb, 8);
     get_bits1(&s->gb);   //constraint_set0_flag
     get_bits1(&s->gb);   //constraint_set1_flag
@@ -7020,7 +7179,7 @@ static inline int decode_seq_parameter_set(H264Context *h){
     get_bits(&s->gb, 4); // reserved
     level_idc= get_bits(&s->gb, 8);
     sps_id= get_ue_golomb(&s->gb);
-    
+
     sps= &h->sps_buffer[ sps_id ];
     sps->profile_idc= profile_idc;
     sps->level_idc= level_idc;
@@ -7031,15 +7190,13 @@ static inline int decode_seq_parameter_set(H264Context *h){
         get_ue_golomb(&s->gb);  //bit_depth_luma_minus8
         get_ue_golomb(&s->gb);  //bit_depth_chroma_minus8
         sps->transform_bypass = get_bits1(&s->gb);
-        if(get_bits1(&s->gb)){  //seq_scaling_matrix_present_flag
-            av_log(h->s.avctx, AV_LOG_ERROR, "custom scaling matrix not implemented\n");
-            return -1;
-        }
-    }
+        decode_scaling_matrices(h, sps, NULL, 1, sps->scaling_matrix4, sps->scaling_matrix8);
+    }else
+        sps->scaling_matrix_present = 0;
 
     sps->log2_max_frame_num= get_ue_golomb(&s->gb) + 4;
     sps->poc_type= get_ue_golomb(&s->gb);
-    
+
     if(sps->poc_type == 0){ //FIXME #define
         sps->log2_max_poc_lsb= get_ue_golomb(&s->gb) + 4;
     } else if(sps->poc_type == 1){//FIXME #define
@@ -7047,7 +7204,7 @@ static inline int decode_seq_parameter_set(H264Context *h){
         sps->offset_for_non_ref_pic= get_se_golomb(&s->gb);
         sps->offset_for_top_to_bottom_field= get_se_golomb(&s->gb);
         sps->poc_cycle_length= get_ue_golomb(&s->gb);
-        
+
         for(i=0; i<sps->poc_cycle_length; i++)
             sps->offset_for_ref_frame[i]= get_se_golomb(&s->gb);
     }
@@ -7063,7 +7220,7 @@ static inline int decode_seq_parameter_set(H264Context *h){
     sps->gaps_in_frame_num_allowed_flag= get_bits1(&s->gb);
     sps->mb_width= get_ue_golomb(&s->gb) + 1;
     sps->mb_height= get_ue_golomb(&s->gb) + 1;
-    if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 || 
+    if((unsigned)sps->mb_width >= INT_MAX/16 || (unsigned)sps->mb_height >= INT_MAX/16 ||
        avcodec_check_dimensions(NULL, 16*sps->mb_width, 16*sps->mb_height))
         return -1;
 
@@ -7085,26 +7242,26 @@ static inline int decode_seq_parameter_set(H264Context *h){
             av_log(h->s.avctx, AV_LOG_ERROR, "insane cropping not completely supported, this could look slightly wrong ...\n");
         }
     }else{
-        sps->crop_left  = 
-        sps->crop_right = 
-        sps->crop_top   = 
+        sps->crop_left  =
+        sps->crop_right =
+        sps->crop_top   =
         sps->crop_bottom= 0;
     }
 
     sps->vui_parameters_present_flag= get_bits1(&s->gb);
     if( sps->vui_parameters_present_flag )
         decode_vui_parameters(h, sps);
-    
+
     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-        av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n", 
+        av_log(h->s.avctx, AV_LOG_DEBUG, "sps:%d profile:%d/%d poc:%d ref:%d %dx%d %s %s crop:%d/%d/%d/%d %s\n",
                sps_id, sps->profile_idc, sps->level_idc,
                sps->poc_type,
                sps->ref_frame_count,
                sps->mb_width, sps->mb_height,
                sps->frame_mbs_only_flag ? "FRM" : (sps->mb_aff ? "MB-AFF" : "PIC-AFF"),
                sps->direct_8x8_inference_flag ? "8B8" : "",
-               sps->crop_left, sps->crop_right, 
-               sps->crop_top, sps->crop_bottom, 
+               sps->crop_left, sps->crop_right,
+               sps->crop_top, sps->crop_bottom,
                sps->vui_parameters_present_flag ? "VUI" : ""
                );
     }
@@ -7115,7 +7272,7 @@ static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
     MpegEncContext * const s = &h->s;
     int pps_id= get_ue_golomb(&s->gb);
     PPS *pps= &h->pps_buffer[pps_id];
-    
+
     pps->sps_id= get_ue_golomb(&s->gb);
     pps->cabac= get_bits1(&s->gb);
     pps->pic_order_present= get_bits1(&s->gb);
@@ -7163,7 +7320,7 @@ static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
         av_log(h->s.avctx, AV_LOG_ERROR, "reference overflow (pps)\n");
         return -1;
     }
-    
+
     pps->weighted_pred= get_bits1(&s->gb);
     pps->weighted_bipred_idc= get_bits(&s->gb, 2);
     pps->init_qp= get_se_golomb(&s->gb) + 26;
@@ -7173,17 +7330,17 @@ static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
     pps->constrained_intra_pred= get_bits1(&s->gb);
     pps->redundant_pic_cnt_present = get_bits1(&s->gb);
 
+    memset(pps->scaling_matrix4, 16, 6*16*sizeof(uint8_t));
+    memset(pps->scaling_matrix8, 16, 2*64*sizeof(uint8_t));
+
     if(get_bits_count(&s->gb) < bit_length){
         pps->transform_8x8_mode= get_bits1(&s->gb);
-        if(get_bits1(&s->gb)){  //pic_scaling_matrix_present_flag
-            av_log(h->s.avctx, AV_LOG_ERROR, "custom scaling matrix not implemented\n");
-            return -1;
-        }
+        decode_scaling_matrices(h, &h->sps_buffer[pps->sps_id], pps, 0, pps->scaling_matrix4, pps->scaling_matrix8);
         get_se_golomb(&s->gb);  //second_chroma_qp_index_offset
     }
-    
+
     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-        av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n", 
+        av_log(h->s.avctx, AV_LOG_DEBUG, "pps:%d sps:%d %s slice_groups:%d ref:%d/%d %s qp:%d/%d/%d %s %s %s %s\n",
                pps_id, pps->sps_id,
                pps->cabac ? "CABAC" : "CAVLC",
                pps->slice_group_count,
@@ -7196,7 +7353,7 @@ static inline int decode_picture_parameter_set(H264Context *h, int bit_length){
                pps->transform_8x8_mode ? "8x8DCT" : ""
                );
     }
-    
+
     return 0;
 }
 
@@ -7223,7 +7380,7 @@ static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
                     // first_mb_in_slice is 0, probably the first nal of a new
                     // slice
                     tprintf("find_frame_end frame_end_found, state = %08x, pos = %d\n", state, i);
-                    pc->state=-1; 
+                    pc->state=-1;
                     pc->frame_start_found= 0;
                     return i-4;
                 }
@@ -7232,28 +7389,28 @@ static int find_frame_end(H264Context *h, const uint8_t *buf, int buf_size){
         }
         if((state&0xFFFFFF1F) == 0x107 || (state&0xFFFFFF1F) == 0x108 || (state&0xFFFFFF1F) == 0x109){
            if(pc->frame_start_found){
-                pc->state=-1; 
+                pc->state=-1;
                 pc->frame_start_found= 0;
-                return i-4;               
+                return i-4;
            }
         }
         if (i<buf_size)
             state= (state<<8) | buf[i];
     }
-    
+
     pc->state= state;
     return END_NOT_FOUND;
 }
 
 static int h264_parse(AVCodecParserContext *s,
                       AVCodecContext *avctx,
-                      uint8_t **poutbuf, int *poutbuf_size, 
+                      uint8_t **poutbuf, int *poutbuf_size,
                       const uint8_t *buf, int buf_size)
 {
     H264Context *h = s->priv_data;
     ParseContext *pc = &h->s.parse_context;
     int next;
-    
+
     next= find_frame_end(h, buf, buf_size);
 
     if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
@@ -7310,7 +7467,7 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
         int bit_length;
         uint8_t *ptr;
         int i, nalsize = 0;
-        
+
       if(h->is_avc) {
         if(buf_index >= buf_size) break;
         nalsize = 0;
@@ -7323,12 +7480,12 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
             if(buf[buf_index] == 0 && buf[buf_index+1] == 0 && buf[buf_index+2] == 1)
                 break;
         }
-        
+
         if(buf_index+3 >= buf_size) break;
-        
+
         buf_index+=3;
-      }  
-        
+      }
+
         ptr= decode_nal(h, buf + buf_index, &dst_length, &consumed, h->is_avc ? nalsize : buf_size - buf_index);
         if(ptr[dst_length - 1] == 0) dst_length--;
         bit_length= 8*dst_length - decode_rbsp_trailing(ptr + dst_length - 1);
@@ -7336,7 +7493,7 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
         if(s->avctx->debug&FF_DEBUG_STARTCODE){
             av_log(h->s.avctx, AV_LOG_DEBUG, "NAL %d at %d/%d length %d\n", h->nal_unit_type, buf_index, buf_size, dst_length);
         }
-        
+
         if (h->is_avc && (nalsize != consumed))
             av_log(h->s.avctx, AV_LOG_ERROR, "AVC: Consumed only %d bytes instead of %d\n", consumed, nalsize);
 
@@ -7345,7 +7502,7 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
         if(  (s->hurry_up == 1 && h->nal_ref_idc  == 0) //FIXME dont discard SEI id
            ||(avctx->skip_frame >= AVDISCARD_NONREF && h->nal_ref_idc  == 0))
             continue;
-        
+
         switch(h->nal_unit_type){
         case NAL_IDR_SLICE:
             idr(h); //FIXME ensure we don't loose some frames if there is reordering
@@ -7354,12 +7511,12 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
             h->intra_gb_ptr=
             h->inter_gb_ptr= &s->gb;
             s->data_partitioning = 0;
-            
+
             if(decode_slice_header(h) < 0){
                 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
                 break;
             }
-            if(h->redundant_pic_count==0 && s->hurry_up < 5 
+            if(h->redundant_pic_count==0 && s->hurry_up < 5
                && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
                && (avctx->skip_frame < AVDISCARD_BIDIR  || h->slice_type!=B_TYPE)
                && (avctx->skip_frame < AVDISCARD_NONKEY || h->slice_type==I_TYPE)
@@ -7371,7 +7528,7 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
             h->intra_gb_ptr=
             h->inter_gb_ptr= NULL;
             s->data_partitioning = 1;
-            
+
             if(decode_slice_header(h) < 0){
                 av_log(h->s.avctx, AV_LOG_ERROR, "decode_slice_header error\n");
             }
@@ -7384,7 +7541,7 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
             init_get_bits(&h->inter_gb, ptr, bit_length);
             h->inter_gb_ptr= &h->inter_gb;
 
-            if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning 
+            if(h->redundant_pic_count==0 && h->intra_gb_ptr && s->data_partitioning
                && s->hurry_up < 5
                && (avctx->skip_frame < AVDISCARD_NONREF || h->nal_ref_idc)
                && (avctx->skip_frame < AVDISCARD_BIDIR  || h->slice_type!=B_TYPE)
@@ -7399,33 +7556,37 @@ static int decode_nal_units(H264Context *h, uint8_t *buf, int buf_size){
         case NAL_SPS:
             init_get_bits(&s->gb, ptr, bit_length);
             decode_seq_parameter_set(h);
-            
+
             if(s->flags& CODEC_FLAG_LOW_DELAY)
                 s->low_delay=1;
-      
+
             if(avctx->has_b_frames < 2)
                 avctx->has_b_frames= !s->low_delay;
             break;
         case NAL_PPS:
             init_get_bits(&s->gb, ptr, bit_length);
-            
+
             decode_picture_parameter_set(h, bit_length);
 
             break;
-        case NAL_PICTURE_DELIMITER:
-            break;
-        case NAL_FILTER_DATA:
+        case NAL_AUD:
+        case NAL_END_SEQUENCE:
+        case NAL_END_STREAM:
+        case NAL_FILLER_DATA:
+        case NAL_SPS_EXT:
+        case NAL_AUXILIARY_SLICE:
             break;
-	default:
-	    av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
-        }        
+        default:
+            av_log(avctx, AV_LOG_ERROR, "Unknown NAL code: %d\n", h->nal_unit_type);
+        }
     }
-    
+
     if(!s->current_picture_ptr) return buf_index; //no frame
 
+    s->current_picture_ptr->qscale_type= FF_QSCALE_TYPE_H264;
     s->current_picture_ptr->pict_type= s->pict_type;
     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE && h->nal_unit_type == NAL_IDR_SLICE;
-    
+
     h->prev_frame_num_offset= h->frame_num_offset;
     h->prev_frame_num= h->frame_num;
     if(s->current_picture_ptr->reference){
@@ -7449,7 +7610,7 @@ static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
     if(s->flags&CODEC_FLAG_TRUNCATED){
         pos -= s->parse_context.last_index;
         if(pos<0) pos=0; // FIXME remove (unneeded?)
-        
+
         return pos;
     }else{
         if(pos==0) pos=1; //avoid infinite loops (i doubt thats needed but ...)
@@ -7459,15 +7620,15 @@ static int get_consumed_bytes(MpegEncContext *s, int pos, int buf_size){
     }
 }
 
-static int decode_frame(AVCodecContext *avctx, 
+static int decode_frame(AVCodecContext *avctx,
                              void *data, int *data_size,
                              uint8_t *buf, int buf_size)
 {
     H264Context *h = avctx->priv_data;
     MpegEncContext *s = &h->s;
-    AVFrame *pict = data; 
+    AVFrame *pict = data;
     int buf_index;
-    
+
     s->flags= avctx->flags;
     s->flags2= avctx->flags2;
 
@@ -7475,10 +7636,10 @@ static int decode_frame(AVCodecContext *avctx,
     if (buf_size == 0) {
         return 0;
     }
-    
+
     if(s->flags&CODEC_FLAG_TRUNCATED){
         int next= find_frame_end(h, buf, buf_size);
-        
+
         if( ff_combine_frame(&s->parse_context, next, &buf, &buf_size) < 0 )
             return buf_size;
 //printf("next:%d buf_size:%d last_index:%d\n", next, buf_size, s->parse_context.last_index);
@@ -7508,7 +7669,7 @@ static int decode_frame(AVCodecContext *avctx,
                 return -1;
             }
             p += nalsize;
-        }        
+        }
         // Decode pps from avcC
         cnt = *(p++); // Number of pps
         for (i = 0; i < cnt; i++) {
@@ -7518,7 +7679,7 @@ static int decode_frame(AVCodecContext *avctx,
                 return -1;
             }
             p += nalsize;
-        }        
+        }
         // Now store right nal length size, that will be use to parse all other nals
         h->nal_length_size = ((*(((char*)(avctx->extradata))+4))&0x03)+1;
         // Do not reparse avcC
@@ -7526,16 +7687,16 @@ static int decode_frame(AVCodecContext *avctx,
     }
 
     if(!h->is_avc && s->avctx->extradata_size && s->picture_number==0){
-        if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0) 
+        if(decode_nal_units(h, s->avctx->extradata, s->avctx->extradata_size) < 0)
             return -1;
     }
 
     buf_index=decode_nal_units(h, buf, buf_size);
-    if(buf_index < 0) 
+    if(buf_index < 0)
         return -1;
 
-    //FIXME do something with unavailable reference frames    
- 
+    //FIXME do something with unavailable reference frames
+
 //    if(ret==FRAME_SKIPPED) return get_consumed_bytes(s, buf_index, buf_size);
     if(!s->current_picture_ptr){
         av_log(h->s.avctx, AV_LOG_DEBUG, "error, NO frame\n");
@@ -7582,8 +7743,8 @@ static int decode_frame(AVCodecContext *avctx,
         out_of_order = !cross_idr && prev && out->poc < prev->poc;
         if(prev && pics <= s->avctx->has_b_frames)
             out = prev;
-        else if((out_of_order && pics-1 == s->avctx->has_b_frames)
-           || (s->low_delay && 
+        else if((out_of_order && pics-1 == s->avctx->has_b_frames && pics < 15)
+           || (s->low_delay &&
             ((!cross_idr && prev && out->poc > prev->poc + 2)
              || cur->pict_type == B_TYPE)))
         {
@@ -7609,10 +7770,13 @@ static int decode_frame(AVCodecContext *avctx,
         h->delayed_output_pic = out;
 #endif
 
-        *pict= *(AVFrame*)out;
+        if(out)
+            *pict= *(AVFrame*)out;
+        else
+            av_log(avctx, AV_LOG_DEBUG, "no picture\n");
     }
 
-    assert(pict->data[0]);
+    assert(pict->data[0] || !*data_size);
     ff_print_debug_info(s, pict);
 //printf("out %d\n", (int)pict->data[0]);
 #if 0 //?
@@ -7654,7 +7818,7 @@ int main(){
 //    int int_temp[10000];
     DSPContext dsp;
     AVCodecContext avctx;
-    
+
     dsputil_init(&dsp, &avctx);
 
     init_put_bits(&pb, temp, SIZE);
@@ -7665,13 +7829,13 @@ int main(){
         STOP_TIMER("set_ue_golomb");
     }
     flush_put_bits(&pb);
-    
+
     init_get_bits(&gb, temp, 8*SIZE);
     for(i=0; i<COUNT; i++){
         int j, s;
-        
+
         s= show_bits(&gb, 24);
-        
+
         START_TIMER
         j= get_ue_golomb(&gb);
         if(j != i){
@@ -7680,8 +7844,8 @@ int main(){
         }
         STOP_TIMER("get_ue_golomb");
     }
-    
-    
+
+
     init_put_bits(&pb, temp, SIZE);
     printf("testing signed exp golomb\n");
     for(i=0; i<COUNT; i++){
@@ -7690,13 +7854,13 @@ int main(){
         STOP_TIMER("set_se_golomb");
     }
     flush_put_bits(&pb);
-    
+
     init_get_bits(&gb, temp, 8*SIZE);
     for(i=0; i<COUNT; i++){
         int j, s;
-        
+
         s= show_bits(&gb, 24);
-        
+
         START_TIMER
         j= get_se_golomb(&gb);
         if(j != i - COUNT/2){
@@ -7707,7 +7871,7 @@ int main(){
     }
 
     printf("testing 4x4 (I)DCT\n");
-    
+
     DCTELEM block[16];
     uint8_t src[16], ref[16];
     uint64_t error= 0, max_error=0;
@@ -7721,7 +7885,7 @@ int main(){
         }
 
         h264_diff_dct_c(block, src, ref, 4);
-        
+
         //normalize
         for(j=0; j<16; j++){
 //            printf("%d ", block[j]);
@@ -7730,16 +7894,16 @@ int main(){
             if(j&4) block[j]= (block[j]*4 + 2)/5;
         }
 //        printf("\n");
-        
+
         s->dsp.h264_idct_add(ref, block, 4);
 /*        for(j=0; j<16; j++){
             printf("%d ", ref[j]);
         }
         printf("\n");*/
-            
+
         for(j=0; j<16; j++){
             int diff= ABS(src[j] - ref[j]);
-            
+
             error+= diff*diff;
             max_error= FFMAX(max_error, diff);
         }
@@ -7750,16 +7914,16 @@ int main(){
     for(qp=0; qp<52; qp++){
         for(i=0; i<16; i++)
             src1_block[i]= src2_block[i]= random()%255;
-        
+
     }
 #endif
     printf("Testing NAL layer\n");
-    
+
     uint8_t bitstream[COUNT];
     uint8_t nal[COUNT*2];
     H264Context h;
     memset(&h, 0, sizeof(H264Context));
-    
+
     for(i=0; i<COUNT; i++){
         int zeros= i;
         int nal_length;
@@ -7767,11 +7931,11 @@ int main(){
         int out_length;
         uint8_t *out;
         int j;
-        
+
         for(j=0; j<COUNT; j++){
             bitstream[j]= (random() % 255) + 1;
         }
-        
+
         for(j=0; j<zeros; j++){
             int pos= random() % COUNT;
             while(bitstream[pos] == 0){
@@ -7780,38 +7944,38 @@ int main(){
             }
             bitstream[pos]=0;
         }
-        
+
         START_TIMER
-        
+
         nal_length= encode_nal(&h, nal, bitstream, COUNT, COUNT*2);
         if(nal_length<0){
             printf("encoding failed\n");
             return -1;
         }
-        
+
         out= decode_nal(&h, nal, &out_length, &consumed, nal_length);
 
         STOP_TIMER("NAL")
-        
+
         if(out_length != COUNT){
             printf("incorrect length %d %d\n", out_length, COUNT);
             return -1;
         }
-        
+
         if(consumed != nal_length){
             printf("incorrect consumed length %d %d\n", nal_length, consumed);
             return -1;
         }
-        
+
         if(memcmp(bitstream, out, COUNT)){
             printf("missmatch\n");
             return -1;
         }
     }
-    
+
     printf("Testing RBSP\n");
-    
-    
+
+
     return 0;
 }
 #endif
@@ -7821,13 +7985,13 @@ static int decode_end(AVCodecContext *avctx)
 {
     H264Context *h = avctx->priv_data;
     MpegEncContext *s = &h->s;
-    
+
     av_freep(&h->rbsp_buffer);
     free_tables(h); //FIXME cleanup init stuff perhaps
     MPV_common_end(s);
 
 //    memset(h, 0, sizeof(H264Context));
-        
+
     return 0;
 }
 
diff --git a/src/libffmpeg/libavcodec/h264data.h b/src/libffmpeg/libavcodec/h264data.h
index 3e326bf30..3132102df 100644
--- a/src/libffmpeg/libavcodec/h264data.h
+++ b/src/libffmpeg/libavcodec/h264data.h
@@ -14,13 +14,13 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
 /**
  * @file h264data.h
- * @brief 
+ * @brief
  *     H264 / AVC / MPEG4 part10 codec data table
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
@@ -87,7 +87,7 @@ static const uint8_t golomb_to_intra4x4_cbp[48]={
  16,  3,  5, 10, 12, 19, 21, 26, 28, 35, 37, 42, 44,  1,  2,  4,
   8, 17, 18, 20, 24,  6,  9, 22, 25, 32, 33, 34, 36, 40, 38, 41
 };
- 
+
 static const uint8_t golomb_to_inter_cbp[48]={
   0, 16,  1,  2,  4,  8, 32,  3,  5, 10, 12, 15, 47,  7, 11, 13,
  14,  6,  9, 31, 35, 37, 42, 44, 33, 34, 36, 40, 39, 43, 45, 46,
@@ -99,7 +99,7 @@ static const uint8_t intra4x4_cbp_to_golomb[48]={
  16, 33, 34, 21, 35, 22, 39,  4, 36, 40, 23,  5, 24,  6,  7,  1,
  41, 42, 43, 25, 44, 26, 46, 12, 45, 47, 27, 13, 28, 14, 15,  0
 };
- 
+
 static const uint8_t inter_cbp_to_golomb[48]={
   0,  2,  3,  7,  4,  8, 17, 13,  5, 18,  9, 14, 10, 15, 16, 11,
   1, 32, 33, 36, 34, 37, 44, 40, 35, 45, 38, 41, 39, 42, 43, 19,
@@ -185,21 +185,21 @@ static const uint8_t coeff_token_bits[4][4*17]={
 };
 
 static const uint8_t total_zeros_len[16][16]= {
-    {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},  
-    {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},  
-    {4,3,3,3,4,4,3,3,4,5,5,6,5,6},  
-    {5,3,4,4,3,3,3,4,3,4,5,5,5},  
-    {4,4,4,3,3,3,3,3,4,5,4,5},  
-    {6,5,3,3,3,3,3,3,4,3,6},  
-    {6,5,3,3,3,2,3,4,3,6},  
-    {6,4,5,3,2,2,3,3,6},  
-    {6,6,4,2,2,3,2,5},  
-    {5,5,3,2,2,2,4},  
-    {4,4,3,3,1,3},  
-    {4,4,2,1,3},  
-    {3,3,1,2},  
-    {2,2,1},  
-    {1,1},  
+    {1,3,3,4,4,5,5,6,6,7,7,8,8,9,9,9},
+    {3,3,3,3,3,4,4,4,4,5,5,6,6,6,6},
+    {4,3,3,3,4,4,3,3,4,5,5,6,5,6},
+    {5,3,4,4,3,3,3,4,3,4,5,5,5},
+    {4,4,4,3,3,3,3,3,4,5,4,5},
+    {6,5,3,3,3,3,3,3,4,3,6},
+    {6,5,3,3,3,2,3,4,3,6},
+    {6,4,5,3,2,2,3,3,6},
+    {6,6,4,2,2,3,2,5},
+    {5,5,3,2,2,2,4},
+    {4,4,3,3,1,3},
+    {4,4,2,1,3},
+    {3,3,1,2},
+    {2,2,1},
+    {1,1},
 };
 
 static const uint8_t total_zeros_bits[16][16]= {
@@ -223,7 +223,7 @@ static const uint8_t total_zeros_bits[16][16]= {
 static const uint8_t chroma_dc_total_zeros_len[3][4]= {
     { 1, 2, 3, 3,},
     { 1, 2, 2, 0,},
-    { 1, 1, 0, 0,}, 
+    { 1, 1, 0, 0,},
 };
 
 static const uint8_t chroma_dc_total_zeros_bits[3][4]= {
@@ -274,16 +274,16 @@ static const uint8_t scan8[16 + 2*4]={
 };
 
 static const uint8_t zigzag_scan[16]={
- 0+0*4, 1+0*4, 0+1*4, 0+2*4, 
- 1+1*4, 2+0*4, 3+0*4, 2+1*4, 
- 1+2*4, 0+3*4, 1+3*4, 2+2*4, 
- 3+1*4, 3+2*4, 2+3*4, 3+3*4, 
+ 0+0*4, 1+0*4, 0+1*4, 0+2*4,
+ 1+1*4, 2+0*4, 3+0*4, 2+1*4,
+ 1+2*4, 0+3*4, 1+3*4, 2+2*4,
+ 3+1*4, 3+2*4, 2+3*4, 3+3*4,
 };
 
 static const uint8_t field_scan[16]={
- 0+0*4, 0+1*4, 1+0*4, 0+2*4, 
+ 0+0*4, 0+1*4, 1+0*4, 0+2*4,
  0+3*4, 1+1*4, 1+2*4, 1+3*4,
- 2+0*4, 2+1*4, 2+2*4, 2+3*4, 
+ 2+0*4, 2+1*4, 2+2*4, 2+3*4,
  3+0*4, 3+1*4, 3+2*4, 3+3*4,
 };
 
@@ -295,14 +295,14 @@ static const uint8_t luma_dc_zigzag_scan[16]={
 };
 
 static const uint8_t luma_dc_field_scan[16]={
- 0*16 + 0*64, 2*16 + 0*64, 1*16 + 0*64, 0*16 + 2*64, 
- 2*16 + 2*64, 3*16 + 0*64, 1*16 + 2*64, 3*16 + 2*64, 
- 0*16 + 1*64, 2*16 + 1*64, 0*16 + 3*64, 2*16 + 3*64, 
+ 0*16 + 0*64, 2*16 + 0*64, 1*16 + 0*64, 0*16 + 2*64,
+ 2*16 + 2*64, 3*16 + 0*64, 1*16 + 2*64, 3*16 + 2*64,
+ 0*16 + 1*64, 2*16 + 1*64, 0*16 + 3*64, 2*16 + 3*64,
  1*16 + 1*64, 3*16 + 1*64, 1*16 + 3*64, 3*16 + 3*64,
 };
 
 static const uint8_t chroma_dc_scan[4]={
- (0+0*2)*16, (1+0*2)*16, 
+ (0+0*2)*16, (1+0*2)*16,
  (0+1*2)*16, (1+1*2)*16,  //FIXME
 };
 
@@ -450,68 +450,52 @@ static const PMbInfo b_sub_mb_type_info[13]={
 
 
 static const uint8_t rem6[52]={
-0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 
+0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3,
 };
 
 static const uint8_t div6[52]={
 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8,
 };
 
-static const uint16_t dequant_coeff[52][16]={
-{  10,  13,  10,  13,   13,  16,  13,  16,   10,  13,  10,  13,   13,  16,  13,  16, },
-{  11,  14,  11,  14,   14,  18,  14,  18,   11,  14,  11,  14,   14,  18,  14,  18, },
-{  13,  16,  13,  16,   16,  20,  16,  20,   13,  16,  13,  16,   16,  20,  16,  20, },
-{  14,  18,  14,  18,   18,  23,  18,  23,   14,  18,  14,  18,   18,  23,  18,  23, },
-{  16,  20,  16,  20,   20,  25,  20,  25,   16,  20,  16,  20,   20,  25,  20,  25, },
-{  18,  23,  18,  23,   23,  29,  23,  29,   18,  23,  18,  23,   23,  29,  23,  29, },
-{  20,  26,  20,  26,   26,  32,  26,  32,   20,  26,  20,  26,   26,  32,  26,  32, },
-{  22,  28,  22,  28,   28,  36,  28,  36,   22,  28,  22,  28,   28,  36,  28,  36, },
-{  26,  32,  26,  32,   32,  40,  32,  40,   26,  32,  26,  32,   32,  40,  32,  40, },
-{  28,  36,  28,  36,   36,  46,  36,  46,   28,  36,  28,  36,   36,  46,  36,  46, },
-{  32,  40,  32,  40,   40,  50,  40,  50,   32,  40,  32,  40,   40,  50,  40,  50, },
-{  36,  46,  36,  46,   46,  58,  46,  58,   36,  46,  36,  46,   46,  58,  46,  58, },
-{  40,  52,  40,  52,   52,  64,  52,  64,   40,  52,  40,  52,   52,  64,  52,  64, },
-{  44,  56,  44,  56,   56,  72,  56,  72,   44,  56,  44,  56,   56,  72,  56,  72, },
-{  52,  64,  52,  64,   64,  80,  64,  80,   52,  64,  52,  64,   64,  80,  64,  80, },
-{  56,  72,  56,  72,   72,  92,  72,  92,   56,  72,  56,  72,   72,  92,  72,  92, },
-{  64,  80,  64,  80,   80, 100,  80, 100,   64,  80,  64,  80,   80, 100,  80, 100, },
-{  72,  92,  72,  92,   92, 116,  92, 116,   72,  92,  72,  92,   92, 116,  92, 116, },
-{  80, 104,  80, 104,  104, 128, 104, 128,   80, 104,  80, 104,  104, 128, 104, 128, },
-{  88, 112,  88, 112,  112, 144, 112, 144,   88, 112,  88, 112,  112, 144, 112, 144, },
-{ 104, 128, 104, 128,  128, 160, 128, 160,  104, 128, 104, 128,  128, 160, 128, 160, },
-{ 112, 144, 112, 144,  144, 184, 144, 184,  112, 144, 112, 144,  144, 184, 144, 184, },
-{ 128, 160, 128, 160,  160, 200, 160, 200,  128, 160, 128, 160,  160, 200, 160, 200, },
-{ 144, 184, 144, 184,  184, 232, 184, 232,  144, 184, 144, 184,  184, 232, 184, 232, },
-{ 160, 208, 160, 208,  208, 256, 208, 256,  160, 208, 160, 208,  208, 256, 208, 256, },
-{ 176, 224, 176, 224,  224, 288, 224, 288,  176, 224, 176, 224,  224, 288, 224, 288, },
-{ 208, 256, 208, 256,  256, 320, 256, 320,  208, 256, 208, 256,  256, 320, 256, 320, },
-{ 224, 288, 224, 288,  288, 368, 288, 368,  224, 288, 224, 288,  288, 368, 288, 368, },
-{ 256, 320, 256, 320,  320, 400, 320, 400,  256, 320, 256, 320,  320, 400, 320, 400, },
-{ 288, 368, 288, 368,  368, 464, 368, 464,  288, 368, 288, 368,  368, 464, 368, 464, },
-{ 320, 416, 320, 416,  416, 512, 416, 512,  320, 416, 320, 416,  416, 512, 416, 512, },
-{ 352, 448, 352, 448,  448, 576, 448, 576,  352, 448, 352, 448,  448, 576, 448, 576, },
-{ 416, 512, 416, 512,  512, 640, 512, 640,  416, 512, 416, 512,  512, 640, 512, 640, },
-{ 448, 576, 448, 576,  576, 736, 576, 736,  448, 576, 448, 576,  576, 736, 576, 736, },
-{ 512, 640, 512, 640,  640, 800, 640, 800,  512, 640, 512, 640,  640, 800, 640, 800, },
-{ 576, 736, 576, 736,  736, 928, 736, 928,  576, 736, 576, 736,  736, 928, 736, 928, },
-{ 640, 832, 640, 832,  832,1024, 832,1024,  640, 832, 640, 832,  832,1024, 832,1024, },
-{ 704, 896, 704, 896,  896,1152, 896,1152,  704, 896, 704, 896,  896,1152, 896,1152, },
-{ 832,1024, 832,1024, 1024,1280,1024,1280,  832,1024, 832,1024, 1024,1280,1024,1280, },
-{ 896,1152, 896,1152, 1152,1472,1152,1472,  896,1152, 896,1152, 1152,1472,1152,1472, },
-{1024,1280,1024,1280, 1280,1600,1280,1600, 1024,1280,1024,1280, 1280,1600,1280,1600, },
-{1152,1472,1152,1472, 1472,1856,1472,1856, 1152,1472,1152,1472, 1472,1856,1472,1856, },
-{1280,1664,1280,1664, 1664,2048,1664,2048, 1280,1664,1280,1664, 1664,2048,1664,2048, },
-{1408,1792,1408,1792, 1792,2304,1792,2304, 1408,1792,1408,1792, 1792,2304,1792,2304, },
-{1664,2048,1664,2048, 2048,2560,2048,2560, 1664,2048,1664,2048, 2048,2560,2048,2560, },
-{1792,2304,1792,2304, 2304,2944,2304,2944, 1792,2304,1792,2304, 2304,2944,2304,2944, },
-{2048,2560,2048,2560, 2560,3200,2560,3200, 2048,2560,2048,2560, 2560,3200,2560,3200, },
-{2304,2944,2304,2944, 2944,3712,2944,3712, 2304,2944,2304,2944, 2944,3712,2944,3712, },
-{2560,3328,2560,3328, 3328,4096,3328,4096, 2560,3328,2560,3328, 3328,4096,3328,4096, },
-{2816,3584,2816,3584, 3584,4608,3584,4608, 2816,3584,2816,3584, 3584,4608,3584,4608, },
-{3328,4096,3328,4096, 4096,5120,4096,5120, 3328,4096,3328,4096, 4096,5120,4096,5120, },
-{3584,4608,3584,4608, 4608,5888,4608,5888, 3584,4608,3584,4608, 4608,5888,4608,5888, },
-//{4096,5120,4096,5120, 5120,6400,5120,6400, 4096,5120,4096,5120, 5120,6400,5120,6400, },
-//{4608,5888,4608,5888, 5888,7424,5888,7424, 4608,5888,4608,5888, 5888,7424,5888,7424, },
+static const uint8_t default_scaling4[2][16]={
+{   6,13,20,28,
+   13,20,28,32,
+   20,28,32,37,
+   28,32,37,42
+},{
+   10,14,20,24,
+   14,20,24,27,
+   20,24,27,30,
+   24,27,30,34
+}};
+
+static const uint8_t default_scaling8[2][64]={
+{   6,10,13,16,18,23,25,27,
+   10,11,16,18,23,25,27,29,
+   13,16,18,23,25,27,29,31,
+   16,18,23,25,27,29,31,33,
+   18,23,25,27,29,31,33,36,
+   23,25,27,29,31,33,36,38,
+   25,27,29,31,33,36,38,40,
+   27,29,31,33,36,38,40,42
+},{
+    9,13,15,17,19,21,22,24,
+   13,13,17,19,21,22,24,25,
+   15,17,19,21,22,24,25,27,
+   17,19,21,22,24,25,27,28,
+   19,21,22,24,25,27,28,30,
+   21,22,24,25,27,28,30,32,
+   22,24,25,27,28,30,32,33,
+   24,25,27,28,30,32,33,35
+}};
+
+static const int dequant4_coeff_init[6][3]={
+  {10,13,16},
+  {11,14,18},
+  {13,16,20},
+  {14,18,23},
+  {16,20,25},
+  {18,23,29},
 };
 
 static const int dequant8_coeff_init_scan[16] = {
diff --git a/src/libffmpeg/libavcodec/h264idct.c b/src/libffmpeg/libavcodec/h264idct.c
index e59324d58..a4ddf1d51 100755
--- a/src/libffmpeg/libavcodec/h264idct.c
+++ b/src/libffmpeg/libavcodec/h264idct.c
@@ -14,16 +14,16 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
- 
+
 /**
  * @file h264-idct.c
  * H.264 IDCT.
  * @author Michael Niedermayer <michaelni@gmx.at>
  */
- 
+
 #include "dsputil.h"
 
 static always_inline void idct_internal(uint8_t *dst, DCTELEM *block, int stride, int block_stride, int shift, int add){
diff --git a/src/libffmpeg/libavcodec/huffyuv.c b/src/libffmpeg/libavcodec/huffyuv.c
index ebb1340ac..dc9e123ff 100644
--- a/src/libffmpeg/libavcodec/huffyuv.c
+++ b/src/libffmpeg/libavcodec/huffyuv.c
@@ -15,12 +15,12 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * see http://www.pcisys.net/~melanson/codecs/huffyuv.txt for a description of
- * the algorithm used 
+ * the algorithm used
  */
- 
+
 /**
  * @file huffyuv.c
  * huffyuv codec for libavcodec.
@@ -48,7 +48,7 @@ typedef enum Predictor{
     PLANE,
     MEDIAN,
 } Predictor;
- 
+
 typedef struct HYuvContext{
     AVCodecContext *avctx;
     Predictor predictor;
@@ -72,8 +72,8 @@ typedef struct HYuvContext{
     VLC vlc[3];
     AVFrame picture;
     uint8_t *bitstream_buffer;
-    int bitstream_buffer_size;
-    DSPContext dsp; 
+    unsigned int bitstream_buffer_size;
+    DSPContext dsp;
 }HYuvContext;
 
 static const unsigned char classic_shift_luma[] = {
@@ -156,7 +156,7 @@ static inline void add_median_prediction(uint8_t *dst, uint8_t *src1, uint8_t *d
         l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
         lt= src1[i];
         dst[i]= l;
-    }    
+    }
 
     *left= l;
     *left_top= lt;
@@ -173,7 +173,7 @@ static inline void add_left_prediction_bgr32(uint8_t *dst, uint8_t *src, int w,
         b+= src[4*i+B];
         g+= src[4*i+G];
         r+= src[4*i+R];
-        
+
         dst[4*i+B]= b;
         dst[4*i+G]= g;
         dst[4*i+R]= r;
@@ -206,7 +206,7 @@ static inline int sub_left_prediction(HYuvContext *s, uint8_t *dst, uint8_t *src
 
 static void read_len_table(uint8_t *dst, GetBitContext *gb){
     int i, val, repeat;
-  
+
     for(i=0; i<256;){
         repeat= get_bits(gb, 3);
         val   = get_bits(gb, 5);
@@ -240,19 +240,19 @@ static void generate_len_table(uint8_t *dst, uint64_t *stats, int size){
     uint64_t counts[2*size];
     int up[2*size];
     int offset, i, next;
-    
+
     for(offset=1; ; offset<<=1){
         for(i=0; i<size; i++){
             counts[i]= stats[i] + offset - 1;
         }
-        
+
         for(next=size; next<size*2; next++){
             uint64_t min1, min2;
             int min1_i, min2_i;
-            
+
             min1=min2= INT64_MAX;
             min1_i= min2_i=-1;
-            
+
             for(i=0; i<next; i++){
                 if(min2 > counts[i]){
                     if(min1 > counts[i]){
@@ -266,9 +266,9 @@ static void generate_len_table(uint8_t *dst, uint64_t *stats, int size){
                     }
                 }
             }
-            
+
             if(min2==INT64_MAX) break;
-            
+
             counts[next]= min1 + min2;
             counts[min1_i]=
             counts[min2_i]= INT64_MAX;
@@ -276,16 +276,16 @@ static void generate_len_table(uint8_t *dst, uint64_t *stats, int size){
             up[min2_i]= next;
             up[next]= -1;
         }
-        
+
         for(i=0; i<size; i++){
             int len;
             int index=i;
-            
+
             for(len=0; up[index] != -1; len++)
                 index= up[index];
-                
+
             if(len >= 32) break;
-            
+
             dst[i]= len;
         }
         if(i==size) break;
@@ -295,12 +295,12 @@ static void generate_len_table(uint8_t *dst, uint64_t *stats, int size){
 static int read_huffman_tables(HYuvContext *s, uint8_t *src, int length){
     GetBitContext gb;
     int i;
-    
+
     init_get_bits(&gb, src, length*8);
-    
+
     for(i=0; i<3; i++){
         read_len_table(s->len[i], &gb);
-        
+
         if(generate_bits_table(s->bits[i], s->len[i])<0){
             return -1;
         }
@@ -312,7 +312,7 @@ printf("%6X, %2d,  %3d\n", s->bits[i][j], s->len[i][j], j);
         free_vlc(&s->vlc[i]);
         init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4, 0);
     }
-    
+
     return (get_bits_count(&gb)+7)/8;
 }
 
@@ -325,7 +325,7 @@ static int read_old_huffman_tables(HYuvContext *s){
     read_len_table(s->len[0], &gb);
     init_get_bits(&gb, classic_shift_chroma, sizeof(classic_shift_chroma)*8);
     read_len_table(s->len[1], &gb);
-    
+
     for(i=0; i<256; i++) s->bits[0][i] = classic_add_luma  [i];
     for(i=0; i<256; i++) s->bits[1][i] = classic_add_chroma[i];
 
@@ -335,12 +335,12 @@ static int read_old_huffman_tables(HYuvContext *s){
     }
     memcpy(s->bits[2], s->bits[1], 256*sizeof(uint32_t));
     memcpy(s->len[2] , s->len [1], 256*sizeof(uint8_t));
-    
+
     for(i=0; i<3; i++){
         free_vlc(&s->vlc[i]);
         init_vlc(&s->vlc[i], VLC_BITS, 256, s->len[i], 1, 1, s->bits[i], 4, 4, 0);
     }
-    
+
     return 0;
 #else
     fprintf(stderr, "v1 huffyuv is not supported \n");
@@ -350,7 +350,7 @@ static int read_old_huffman_tables(HYuvContext *s){
 
 static void alloc_temp(HYuvContext *s){
     int i;
-    
+
     if(s->bitstream_bpp<24){
         for(i=0; i<3; i++){
             s->temp[i]= av_malloc(s->width + 16);
@@ -365,13 +365,13 @@ static int common_init(AVCodecContext *avctx){
 
     s->avctx= avctx;
     s->flags= avctx->flags;
-        
+
     dsputil_init(&s->dsp, avctx);
-    
+
     s->width= avctx->width;
     s->height= avctx->height;
     assert(s->width>0 && s->height>0);
-        
+
     return 0;
 }
 
@@ -381,7 +381,7 @@ static int decode_init(AVCodecContext *avctx)
 
     common_init(avctx);
     memset(s->vlc, 0, 3*sizeof(VLC));
-    
+
     avctx->coded_frame= &s->picture;
     s->interlaced= s->height > 288;
 
@@ -395,7 +395,7 @@ s->bgr32=1;
             s->version=2;
     }else
         s->version=0;
-    
+
     if(s->version==2){
         int method, interlace;
 
@@ -403,12 +403,12 @@ s->bgr32=1;
         s->decorrelate= method&64 ? 1 : 0;
         s->predictor= method&63;
         s->bitstream_bpp= ((uint8_t*)avctx->extradata)[1];
-        if(s->bitstream_bpp==0) 
+        if(s->bitstream_bpp==0)
             s->bitstream_bpp= avctx->bits_per_sample&~7;
         interlace= (((uint8_t*)avctx->extradata)[2] & 0x30) >> 4;
         s->interlaced= (interlace==1) ? 1 : (interlace==2) ? 0 : s->interlaced;
         s->context= ((uint8_t*)avctx->extradata)[2] & 0x40 ? 1 : 0;
-            
+
         if(read_huffman_tables(s, ((uint8_t*)avctx->extradata)+4, avctx->extradata_size) < 0)
             return -1;
     }else{
@@ -436,11 +436,11 @@ s->bgr32=1;
         }
         s->bitstream_bpp= avctx->bits_per_sample & ~7;
         s->context= 0;
-        
+
         if(read_old_huffman_tables(s) < 0)
             return -1;
     }
-    
+
     switch(s->bitstream_bpp){
     case 12:
         avctx->pix_fmt = PIX_FMT_YUV420P;
@@ -463,9 +463,9 @@ s->bgr32=1;
     default:
         assert(0);
     }
-    
+
     alloc_temp(s);
-    
+
 //    av_log(NULL, AV_LOG_DEBUG, "pred:%d bpp:%d hbpp:%d il:%d\n", s->predictor, s->bitstream_bpp, avctx->bits_per_sample, s->interlaced);
 
     return 0;
@@ -478,10 +478,10 @@ static int store_table(HYuvContext *s, uint8_t *len, uint8_t *buf){
     for(i=0; i<256;){
         int val= len[i];
         int repeat=0;
-        
+
         for(; i<256 && len[i]==val && repeat<255; i++)
             repeat++;
-        
+
         assert(val < 32 && val >0 && repeat<256 && repeat>0);
         if(repeat>7){
             buf[index++]= val;
@@ -490,7 +490,7 @@ static int store_table(HYuvContext *s, uint8_t *len, uint8_t *buf){
             buf[index++]= val | (repeat<<5);
         }
     }
-    
+
     return index;
 }
 
@@ -500,13 +500,13 @@ static int encode_init(AVCodecContext *avctx)
     int i, j;
 
     common_init(avctx);
-    
+
     avctx->extradata= av_mallocz(1024*30); // 256*3+4 == 772
     avctx->stats_out= av_mallocz(1024*30); // 21*256*3(%llu ) + 3(\n) + 1(0) = 16132
     s->version=2;
-    
+
     avctx->coded_frame= &s->picture;
-    
+
     switch(avctx->pix_fmt){
     case PIX_FMT_YUV420P:
         s->bitstream_bpp= 12;
@@ -529,7 +529,7 @@ static int encode_init(AVCodecContext *avctx)
             return -1;
         }
     }else s->context= 0;
-    
+
     if(avctx->codec->id==CODEC_ID_HUFFYUV){
         if(avctx->pix_fmt==PIX_FMT_YUV420P){
             av_log(avctx, AV_LOG_ERROR, "Error: YV12 is not supported by huffyuv; use vcodec=ffvhuff or format=422p\n");
@@ -545,7 +545,7 @@ static int encode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "This codec is under development; files encoded with it may not be decodable with future versions!!! Set vstrict=-2 / -strict -2 to use it anyway.\n");
         return -1;
     }
-    
+
     ((uint8_t*)avctx->extradata)[0]= s->predictor;
     ((uint8_t*)avctx->extradata)[1]= s->bitstream_bpp;
     ((uint8_t*)avctx->extradata)[2]= s->interlaced ? 0x10 : 0x20;
@@ -553,10 +553,10 @@ static int encode_init(AVCodecContext *avctx)
         ((uint8_t*)avctx->extradata)[2]|= 0x40;
     ((uint8_t*)avctx->extradata)[3]= 0;
     s->avctx->extradata_size= 4;
-    
+
     if(avctx->stats_in){
         char *p= avctx->stats_in;
-    
+
         for(i=0; i<3; i++)
             for(j=0; j<256; j++)
                 s->stats[i][j]= 1;
@@ -569,7 +569,7 @@ static int encode_init(AVCodecContext *avctx)
                     s->stats[i][j]+= strtol(p, &next, 0);
                     if(next==p) return -1;
                     p=next;
-                }        
+                }
             }
             if(p[0]==0 || p[1]==0 || p[2]==0) break;
         }
@@ -577,18 +577,18 @@ static int encode_init(AVCodecContext *avctx)
         for(i=0; i<3; i++)
             for(j=0; j<256; j++){
                 int d= FFMIN(j, 256-j);
-                
+
                 s->stats[i][j]= 100000000/(d+1);
             }
     }
-    
+
     for(i=0; i<3; i++){
         generate_len_table(s->len[i], s->stats[i], 256);
 
         if(generate_bits_table(s->bits[i], s->len[i])<0){
             return -1;
         }
-        
+
         s->avctx->extradata_size+=
         store_table(s, s->len[i], &((uint8_t*)s->avctx->extradata)[s->avctx->extradata_size]);
     }
@@ -606,7 +606,7 @@ static int encode_init(AVCodecContext *avctx)
             for(j=0; j<256; j++)
                 s->stats[i][j]= 0;
     }
-    
+
 //    printf("pred:%d bpp:%d hbpp:%d il:%d\n", s->predictor, s->bitstream_bpp, avctx->bits_per_sample, s->interlaced);
 
     alloc_temp(s);
@@ -620,34 +620,34 @@ static void decode_422_bitstream(HYuvContext *s, int count){
     int i;
 
     count/=2;
-    
+
     for(i=0; i<count; i++){
-        s->temp[0][2*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); 
-        s->temp[1][  i  ]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); 
-        s->temp[0][2*i+1]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); 
-        s->temp[2][  i  ]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); 
+        s->temp[0][2*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
+        s->temp[1][  i  ]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
+        s->temp[0][2*i+1]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
+        s->temp[2][  i  ]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3);
     }
 }
 
 static void decode_gray_bitstream(HYuvContext *s, int count){
     int i;
-    
+
     count/=2;
-    
+
     for(i=0; i<count; i++){
-        s->temp[0][2*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); 
-        s->temp[0][2*i+1]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3); 
+        s->temp[0][2*i  ]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
+        s->temp[0][2*i+1]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
     }
 }
 
 static int encode_422_bitstream(HYuvContext *s, int count){
     int i;
-    
+
     if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 2*4*count){
         av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
         return -1;
     }
-    
+
     count/=2;
     if(s->flags&CODEC_FLAG_PASS1){
         for(i=0; i<count; i++){
@@ -683,7 +683,7 @@ static int encode_422_bitstream(HYuvContext *s, int count){
 
 static int encode_gray_bitstream(HYuvContext *s, int count){
     int i;
-    
+
     if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 4*count){
         av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
         return -1;
@@ -698,7 +698,7 @@ static int encode_gray_bitstream(HYuvContext *s, int count){
     }
     if(s->avctx->flags2&CODEC_FLAG2_NO_OUTPUT)
         return 0;
-    
+
     if(s->context){
         for(i=0; i<count; i++){
             s->stats[0][ s->temp[0][2*i  ] ]++;
@@ -721,15 +721,15 @@ static void decode_bgr_bitstream(HYuvContext *s, int count){
     if(s->decorrelate){
         if(s->bitstream_bpp==24){
             for(i=0; i<count; i++){
-                s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); 
+                s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
                 s->temp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3) + s->temp[0][4*i+G];
                 s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) + s->temp[0][4*i+G];
             }
         }else{
             for(i=0; i<count; i++){
-                s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); 
+                s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
                 s->temp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3) + s->temp[0][4*i+G];
-                s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) + s->temp[0][4*i+G]; 
+                s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3) + s->temp[0][4*i+G];
                                    get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); //?!
             }
         }
@@ -737,14 +737,14 @@ static void decode_bgr_bitstream(HYuvContext *s, int count){
         if(s->bitstream_bpp==24){
             for(i=0; i<count; i++){
                 s->temp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
-                s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); 
-                s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); 
+                s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
+                s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3);
             }
         }else{
             for(i=0; i<count; i++){
                 s->temp[0][4*i+B]= get_vlc2(&s->gb, s->vlc[0].table, VLC_BITS, 3);
-                s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3); 
-                s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); 
+                s->temp[0][4*i+G]= get_vlc2(&s->gb, s->vlc[1].table, VLC_BITS, 3);
+                s->temp[0][4*i+R]= get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3);
                                    get_vlc2(&s->gb, s->vlc[2].table, VLC_BITS, 3); //?!
             }
         }
@@ -754,13 +754,13 @@ static void decode_bgr_bitstream(HYuvContext *s, int count){
 static void draw_slice(HYuvContext *s, int y){
     int h, cy;
     int offset[4];
-    
-    if(s->avctx->draw_horiz_band==NULL) 
+
+    if(s->avctx->draw_horiz_band==NULL)
         return;
-        
+
     h= y - s->last_slice_end;
     y -= h;
-    
+
     if(s->bitstream_bpp==12){
         cy= y>>1;
     }else{
@@ -774,7 +774,7 @@ static void draw_slice(HYuvContext *s, int y){
     emms_c();
 
     s->avctx->draw_horiz_band(s->avctx, &s->picture, offset, y, 3, h);
-    
+
     s->last_slice_end= y + h;
 }
 
@@ -792,7 +792,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
     s->bitstream_buffer= av_fast_realloc(s->bitstream_buffer, &s->bitstream_buffer_size, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
 
     s->dsp.bswap_buf((uint32_t*)s->bitstream_buffer, (uint32_t*)buf, buf_size/4);
-    
+
     if(p->data[0])
         avctx->release_buffer(avctx, p);
 
@@ -801,7 +801,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
         return -1;
     }
-    
+
     if(s->context){
         table_size = read_huffman_tables(s, s->bitstream_buffer, buf_size);
         if(table_size < 0)
@@ -813,29 +813,29 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
     fake_ystride= s->interlaced ? p->linesize[0]*2  : p->linesize[0];
     fake_ustride= s->interlaced ? p->linesize[1]*2  : p->linesize[1];
     fake_vstride= s->interlaced ? p->linesize[2]*2  : p->linesize[2];
-    
+
     s->last_slice_end= 0;
-        
+
     if(s->bitstream_bpp<24){
         int y, cy;
         int lefty, leftu, leftv;
         int lefttopy, lefttopu, lefttopv;
-        
+
         if(s->yuy2){
             p->data[0][3]= get_bits(&s->gb, 8);
             p->data[0][2]= get_bits(&s->gb, 8);
             p->data[0][1]= get_bits(&s->gb, 8);
             p->data[0][0]= get_bits(&s->gb, 8);
-            
+
             av_log(avctx, AV_LOG_ERROR, "YUY2 output is not implemented yet\n");
             return -1;
         }else{
-        
+
             leftv= p->data[2][0]= get_bits(&s->gb, 8);
             lefty= p->data[0][1]= get_bits(&s->gb, 8);
             leftu= p->data[1][0]= get_bits(&s->gb, 8);
                    p->data[0][0]= get_bits(&s->gb, 8);
-        
+
             switch(s->predictor){
             case LEFT:
             case PLANE:
@@ -848,10 +848,10 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
 
                 for(cy=y=1; y<s->height; y++,cy++){
                     uint8_t *ydst, *udst, *vdst;
-                    
+
                     if(s->bitstream_bpp==12){
                         decode_gray_bitstream(s, width);
-                    
+
                         ydst= p->data[0] + p->linesize[0]*y;
 
                         lefty= add_left_prediction(ydst, s->temp[0], width, lefty);
@@ -862,13 +862,13 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
                         y++;
                         if(y>=s->height) break;
                     }
-                    
+
                     draw_slice(s, y);
-                    
+
                     ydst= p->data[0] + p->linesize[0]*y;
                     udst= p->data[1] + p->linesize[1]*cy;
                     vdst= p->data[2] + p->linesize[2]*cy;
-                    
+
                     decode_422_bitstream(s, width);
                     lefty= add_left_prediction(ydst, s->temp[0], width, lefty);
                     if(!(s->flags&CODEC_FLAG_GRAY)){
@@ -886,7 +886,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
                     }
                 }
                 draw_slice(s, height);
-                
+
                 break;
             case MEDIAN:
                 /* first line except first 2 pixels is left predicted */
@@ -896,9 +896,9 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
                     leftu= add_left_prediction(p->data[1] + 1, s->temp[1], width2-1, leftu);
                     leftv= add_left_prediction(p->data[2] + 1, s->temp[2], width2-1, leftv);
                 }
-                
+
                 cy=y=1;
-                
+
                 /* second line is left predicted for interlaced case */
                 if(s->interlaced){
                     decode_422_bitstream(s, width);
@@ -929,7 +929,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
                     add_median_prediction(p->data[2] + fake_vstride+2, p->data[2]+2, s->temp[2], width2-2, &leftv, &lefttopv);
                 }
                 y++; cy++;
-                
+
                 for(; y<height; y++,cy++){
                     uint8_t *ydst, *udst, *vdst;
 
@@ -965,7 +965,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
         int y;
         int leftr, leftg, leftb;
         const int last_line= (height-1)*p->linesize[0];
-        
+
         if(s->bitstream_bpp==32){
             skip_bits(&s->gb, 8);
             leftr= p->data[0][last_line+R]= get_bits(&s->gb, 8);
@@ -977,7 +977,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
             leftb= p->data[0][last_line+B]= get_bits(&s->gb, 8);
             skip_bits(&s->gb, 8);
         }
-        
+
         if(s->bgr32){
             switch(s->predictor){
             case LEFT:
@@ -987,11 +987,11 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
 
                 for(y=s->height-2; y>=0; y--){ //yes its stored upside down
                     decode_bgr_bitstream(s, width);
-                    
+
                     add_left_prediction_bgr32(p->data[0] + p->linesize[0]*y, s->temp[0], width, &leftr, &leftg, &leftb);
                     if(s->predictor == PLANE){
                         if((y&s->interlaced)==0 && y<s->height-1-s->interlaced){
-                            s->dsp.add_bytes(p->data[0] + p->linesize[0]*y, 
+                            s->dsp.add_bytes(p->data[0] + p->linesize[0]*y,
                                              p->data[0] + p->linesize[0]*y + fake_ystride, fake_ystride);
                         }
                     }
@@ -1008,16 +1008,16 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
         }
     }
     emms_c();
-    
+
     *picture= *p;
     *data_size = sizeof(AVFrame);
-    
+
     return (get_bits_count(&s->gb)+31)/32*4;
 }
 
 static int common_end(HYuvContext *s){
     int i;
-    
+
     for(i=0; i<3; i++){
         av_freep(&s->temp[i]);
     }
@@ -1028,10 +1028,10 @@ static int decode_end(AVCodecContext *avctx)
 {
     HYuvContext *s = avctx->priv_data;
     int i;
-    
+
     common_end(s);
     av_freep(&s->bitstream_buffer);
-    
+
     for(i=0; i<3; i++){
         free_vlc(&s->vlc[i]);
     }
@@ -1054,7 +1054,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
     *p = *pict;
     p->pict_type= FF_I_TYPE;
     p->key_frame= 1;
-    
+
     if(s->context){
         for(i=0; i<3; i++){
             generate_len_table(s->len[i], s->stats[i], 256);
@@ -1077,13 +1077,13 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
         put_bits(&s->pb, 8, lefty= p->data[0][1]);
         put_bits(&s->pb, 8, leftu= p->data[1][0]);
         put_bits(&s->pb, 8,        p->data[0][0]);
-        
+
         lefty= sub_left_prediction(s, s->temp[0], p->data[0]+2, width-2 , lefty);
         leftu= sub_left_prediction(s, s->temp[1], p->data[1]+1, width2-1, leftu);
         leftv= sub_left_prediction(s, s->temp[2], p->data[2]+1, width2-1, leftv);
-        
+
         encode_422_bitstream(s, width-2);
-        
+
         if(s->predictor==MEDIAN){
             int lefttopy, lefttopu, lefttopv;
             cy=y=1;
@@ -1091,15 +1091,15 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
                 lefty= sub_left_prediction(s, s->temp[0], p->data[0]+p->linesize[0], width , lefty);
                 leftu= sub_left_prediction(s, s->temp[1], p->data[1]+p->linesize[1], width2, leftu);
                 leftv= sub_left_prediction(s, s->temp[2], p->data[2]+p->linesize[2], width2, leftv);
-        
+
                 encode_422_bitstream(s, width);
                 y++; cy++;
             }
-            
+
             lefty= sub_left_prediction(s, s->temp[0], p->data[0]+fake_ystride, 4, lefty);
             leftu= sub_left_prediction(s, s->temp[1], p->data[1]+fake_ustride, 2, leftu);
             leftv= sub_left_prediction(s, s->temp[2], p->data[2]+fake_vstride, 2, leftv);
-        
+
             encode_422_bitstream(s, 4);
 
             lefttopy= p->data[0][3];
@@ -1113,7 +1113,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
 
             for(; y<height; y++,cy++){
                 uint8_t *ydst, *udst, *vdst;
-                    
+
                 if(s->bitstream_bpp==12){
                     while(2*cy > y){
                         ydst= p->data[0] + p->linesize[0]*y;
@@ -1136,7 +1136,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
         }else{
             for(cy=y=1; y<height; y++,cy++){
                 uint8_t *ydst, *udst, *vdst;
-                
+
                 /* encode a luma only line & y++ */
                 if(s->bitstream_bpp==12){
                     ydst= p->data[0] + p->linesize[0]*y;
@@ -1152,7 +1152,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
                     y++;
                     if(y>=height) break;
                 }
-                
+
                 ydst= p->data[0] + p->linesize[0]*y;
                 udst= p->data[1] + p->linesize[1]*cy;
                 vdst= p->data[2] + p->linesize[2]*cy;
@@ -1173,22 +1173,22 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
 
                 encode_422_bitstream(s, width);
             }
-        }        
+        }
     }else{
         av_log(avctx, AV_LOG_ERROR, "Format not supported!\n");
     }
     emms_c();
-    
+
     size+= (put_bits_count(&s->pb)+31)/8;
     size/= 4;
-    
+
     if((s->flags&CODEC_FLAG_PASS1) && (s->picture_number&31)==0){
         int j;
         char *p= avctx->stats_out;
         char *end= p + 1024*30;
         for(i=0; i<3; i++){
             for(j=0; j<256; j++){
-                snprintf(p, end-p, "%llu ", s->stats[i][j]);
+                snprintf(p, end-p, "%"PRIu64" ", s->stats[i][j]);
                 p+= strlen(p);
                 s->stats[i][j]= 0;
             }
@@ -1201,7 +1201,7 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
         s->dsp.bswap_buf((uint32_t*)buf, (uint32_t*)buf, size);
         avctx->stats_out[0] = '\0';
     }
-    
+
     s->picture_number++;
 
     return size*4;
@@ -1210,12 +1210,12 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
 static int encode_end(AVCodecContext *avctx)
 {
     HYuvContext *s = avctx->priv_data;
-    
+
     common_end(s);
 
     av_freep(&avctx->extradata);
     av_freep(&avctx->stats_out);
-    
+
     return 0;
 }
 
diff --git a/src/libffmpeg/libavcodec/i386/cputest.c b/src/libffmpeg/libavcodec/i386/cputest.c
index 593e0550d..64656c65a 100644
--- a/src/libffmpeg/libavcodec/i386/cputest.c
+++ b/src/libffmpeg/libavcodec/i386/cputest.c
@@ -15,7 +15,7 @@
 /* ebx saving is necessary for PIC. gcc seems unable to see it alone */
 #define cpuid(index,eax,ebx,ecx,edx)\
     __asm __volatile\
-	("mov %%"REG_b", %%"REG_S"\n\t"\
+        ("mov %%"REG_b", %%"REG_S"\n\t"\
          "cpuid\n\t"\
          "xchg %%"REG_b", %%"REG_S\
          : "=a" (eax), "=S" (ebx),\
@@ -29,28 +29,28 @@ int mm_support(void)
     int eax, ebx, ecx, edx;
     int max_std_level, max_ext_level, std_caps=0, ext_caps=0;
     long a, c;
-    
+
     __asm__ __volatile__ (
                           /* See if CPUID instruction is supported ... */
                           /* ... Get copies of EFLAGS into eax and ecx */
                           "pushf\n\t"
                           "pop %0\n\t"
                           "mov %0, %1\n\t"
-                          
+
                           /* ... Toggle the ID bit in one copy and store */
                           /*     to the EFLAGS reg */
                           "xor $0x200000, %0\n\t"
                           "push %0\n\t"
                           "popf\n\t"
-                          
+
                           /* ... Get the (hopefully modified) EFLAGS */
                           "pushf\n\t"
                           "pop %0\n\t"
                           : "=a" (a), "=c" (c)
                           :
-                          : "cc" 
+                          : "cc"
                           );
-    
+
     if (a == c)
         return 0; /* CPUID not supported */
 
@@ -60,9 +60,9 @@ int mm_support(void)
         cpuid(1, eax, ebx, ecx, std_caps);
         if (std_caps & (1<<23))
             rval |= MM_MMX;
-        if (std_caps & (1<<25)) 
+        if (std_caps & (1<<25))
             rval |= MM_MMXEXT | MM_SSE;
-        if (std_caps & (1<<26)) 
+        if (std_caps & (1<<26))
             rval |= MM_SSE2;
     }
 
@@ -89,8 +89,8 @@ int mm_support(void)
                edx == 0x48727561 &&
                ecx == 0x736c7561) {  /*  "CentaurHauls" */
         /* VIA C3 */
-	if(ext_caps & (1<<24))
-	  rval |= MM_MMXEXT;
+        if(ext_caps & (1<<24))
+          rval |= MM_MMXEXT;
     } else if (ebx == 0x69727943 &&
                edx == 0x736e4978 &&
                ecx == 0x64616574) {
@@ -103,18 +103,18 @@ int mm_support(void)
            According to the table, the only CPU which supports level
            2 is also the only one which supports extended CPUID levels.
         */
-        if (eax < 2) 
+        if (eax < 2)
             return rval;
         if (ext_caps & (1<<24))
             rval |= MM_MMXEXT;
     }
 #if 0
-    av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s\n", 
-        (rval&MM_MMX) ? "MMX ":"", 
-        (rval&MM_MMXEXT) ? "MMX2 ":"", 
-        (rval&MM_SSE) ? "SSE ":"", 
-        (rval&MM_SSE2) ? "SSE2 ":"", 
-        (rval&MM_3DNOW) ? "3DNow ":"", 
+    av_log(NULL, AV_LOG_DEBUG, "%s%s%s%s%s%s\n",
+        (rval&MM_MMX) ? "MMX ":"",
+        (rval&MM_MMXEXT) ? "MMX2 ":"",
+        (rval&MM_SSE) ? "SSE ":"",
+        (rval&MM_SSE2) ? "SSE2 ":"",
+        (rval&MM_3DNOW) ? "3DNow ":"",
         (rval&MM_3DNOWEXT) ? "3DNowExt ":"");
 #endif
     return rval;
diff --git a/src/libffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c b/src/libffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c
index 4cd4d52d8..d52938ccf 100644
--- a/src/libffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/dsputil_h264_template_mmx.c
@@ -1,5 +1,6 @@
 /*
- * Copyright (c) 2005 Zoltan Hidvegi <hzoli -a- hzoli -d- com>
+ * Copyright (c) 2005 Zoltan Hidvegi <hzoli -a- hzoli -d- com>,
+ *                    Loren Merritt
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -13,27 +14,143 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * MMX optimized version of (put|avg)_h264_chroma_mc8.
- * H264_CHROMA_MC8_TMPL must be defined to the desired function name and
- * H264_CHROMA_OP must be defined to empty for put and pavgb/pavgusb for avg.
+ * H264_CHROMA_MC8_TMPL must be defined to the desired function name
+ * H264_CHROMA_OP must be defined to empty for put and pavgb/pavgusb for avg
+ * H264_CHROMA_MC8_MV0 must be defined to a (put|avg)_pixels8 function
  */
 static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
 {
-    uint64_t AA __align8;
-    uint64_t DD __align8;
-    unsigned long srcos = (long)src & 7;
-    uint64_t sh1 __align8 = srcos * 8;
-    uint64_t sh2 __align8 = 56 - sh1;
+    DECLARE_ALIGNED_8(uint64_t, AA);
+    DECLARE_ALIGNED_8(uint64_t, DD);
     int i;
 
+    if(y==0 && x==0) {
+        /* no filter needed */
+        H264_CHROMA_MC8_MV0(dst, src, stride, h);
+        return;
+    }
+
     assert(x<8 && y<8 && x>=0 && y>=0);
 
-    asm volatile("movd %1, %%mm4\n\t"
-                 "movd %2, %%mm6\n\t"
+    if(y==0)
+    {
+        /* horizontal filter only */
+        asm volatile("movd %0, %%mm5\n\t"
+                     "punpcklwd %%mm5, %%mm5\n\t"
+                     "punpckldq %%mm5, %%mm5\n\t" /* mm5 = B = x */
+                     "movq %1, %%mm4\n\t"
+                     "pxor %%mm7, %%mm7\n\t"
+                     "psubw %%mm5, %%mm4\n\t"     /* mm4 = A = 8-x */
+                     : : "rm" (x), "m" (ff_pw_8));
+
+        for(i=0; i<h; i++) {
+            asm volatile(
+                /* mm0 = src[0..7], mm1 = src[1..8] */
+                "movq %0, %%mm0\n\t"
+                "movq %1, %%mm1\n\t"
+                : : "m" (src[0]), "m" (src[1]));
+
+            asm volatile(
+                /* [mm2,mm3] = A * src[0..7] */
+                "movq %%mm0, %%mm2\n\t"
+                "punpcklbw %%mm7, %%mm2\n\t"
+                "pmullw %%mm4, %%mm2\n\t"
+                "movq %%mm0, %%mm3\n\t"
+                "punpckhbw %%mm7, %%mm3\n\t"
+                "pmullw %%mm4, %%mm3\n\t"
+
+                /* [mm2,mm3] += B * src[1..8] */
+                "movq %%mm1, %%mm0\n\t"
+                "punpcklbw %%mm7, %%mm0\n\t"
+                "pmullw %%mm5, %%mm0\n\t"
+                "punpckhbw %%mm7, %%mm1\n\t"
+                "pmullw %%mm5, %%mm1\n\t"
+                "paddw %%mm0, %%mm2\n\t"
+                "paddw %%mm1, %%mm3\n\t"
+
+                /* dst[0..7] = pack(([mm2,mm3] + 32) >> 6) */
+                "paddw %1, %%mm2\n\t"
+                "paddw %1, %%mm3\n\t"
+                "psrlw $3, %%mm2\n\t"
+                "psrlw $3, %%mm3\n\t"
+                "packuswb %%mm3, %%mm2\n\t"
+                H264_CHROMA_OP(%0, %%mm2)
+                "movq %%mm2, %0\n\t"
+                : "=m" (dst[0]) : "m" (ff_pw_4));
+
+            src += stride;
+            dst += stride;
+        }
+        return;
+    }
+
+    if(x==0)
+    {
+        /* vertical filter only */
+        asm volatile("movd %0, %%mm6\n\t"
+                     "punpcklwd %%mm6, %%mm6\n\t"
+                     "punpckldq %%mm6, %%mm6\n\t" /* mm6 = C = y */
+                     "movq %1, %%mm4\n\t"
+                     "pxor %%mm7, %%mm7\n\t"
+                     "psubw %%mm6, %%mm4\n\t"     /* mm4 = A = 8-y */
+                     : : "rm" (y), "m" (ff_pw_8));
+
+        asm volatile(
+            /* mm0 = src[0..7] */
+            "movq %0, %%mm0\n\t"
+            : : "m" (src[0]));
+
+        for(i=0; i<h; i++) {
+            asm volatile(
+                /* [mm2,mm3] = A * src[0..7] */
+                "movq %mm0, %mm2\n\t"
+                "punpcklbw %mm7, %mm2\n\t"
+                "pmullw %mm4, %mm2\n\t"
+                "movq %mm0, %mm3\n\t"
+                "punpckhbw %mm7, %mm3\n\t"
+                "pmullw %mm4, %mm3\n\t");
+
+            src += stride;
+            asm volatile(
+                /* mm0 = src[0..7] */
+                "movq %0, %%mm0\n\t"
+                : : "m" (src[0]));
+
+            asm volatile(
+                /* [mm2,mm3] += C * src[0..7] */
+                "movq %mm0, %mm1\n\t"
+                "punpcklbw %mm7, %mm1\n\t"
+                "pmullw %mm6, %mm1\n\t"
+                "paddw %mm1, %mm2\n\t"
+                "movq %mm0, %mm5\n\t"
+                "punpckhbw %mm7, %mm5\n\t"
+                "pmullw %mm6, %mm5\n\t"
+                "paddw %mm5, %mm3\n\t");
+
+            asm volatile(
+                /* dst[0..7] = pack(([mm2,mm3] + 32) >> 6) */
+                "paddw %1, %%mm2\n\t"
+                "paddw %1, %%mm3\n\t"
+                "psrlw $3, %%mm2\n\t"
+                "psrlw $3, %%mm3\n\t"
+                "packuswb %%mm3, %%mm2\n\t"
+                H264_CHROMA_OP(%0, %%mm2)
+                "movq %%mm2, %0\n\t"
+                : "=m" (dst[0]) : "m" (ff_pw_4));
+
+            dst += stride;
+        }
+        return;
+    }
+
+    /* general case, bilinear */
+    asm volatile("movd %2, %%mm4\n\t"
+                 "movd %3, %%mm6\n\t"
                  "punpcklwd %%mm4, %%mm4\n\t"
                  "punpcklwd %%mm6, %%mm6\n\t"
                  "punpckldq %%mm4, %%mm4\n\t" /* mm4 = x words */
@@ -44,29 +161,20 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
                  "psllw $3, %%mm6\n\t"
                  "movq %%mm5, %%mm7\n\t"
                  "paddw %%mm6, %%mm7\n\t"
-                 "movq %%mm4, %0\n\t"         /* DD = x * y */
+                 "movq %%mm4, %1\n\t"         /* DD = x * y */
                  "psubw %%mm4, %%mm5\n\t"     /* mm5 = B = 8x - xy */
                  "psubw %%mm4, %%mm6\n\t"     /* mm6 = C = 8y - xy */
-                 "paddw %3, %%mm4\n\t"
+                 "paddw %4, %%mm4\n\t"
                  "psubw %%mm7, %%mm4\n\t"     /* mm4 = A = xy - (8x+8y) + 64 */
                  "pxor %%mm7, %%mm7\n\t"
-                 : "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64));
-
-    asm volatile("movq %%mm4, %0" : "=m" (AA));
+                 "movq %%mm4, %0\n\t"
+                 : "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64));
 
-    src -= srcos;
     asm volatile(
         /* mm0 = src[0..7], mm1 = src[1..8] */
-        "movq %0, %%mm1\n\t"
-        "movq %1, %%mm0\n\t"
-        "psrlq %2, %%mm1\n\t"
-        "psllq %3, %%mm0\n\t"
-        "movq %%mm0, %%mm4\n\t"
-        "psllq $8, %%mm0\n\t"
-        "por %%mm1, %%mm0\n\t"
-        "psrlq $8, %%mm1\n\t"
-        "por %%mm4, %%mm1\n\t"
-        : : "m" (src[0]), "m" (src[8]), "m" (sh1), "m" (sh2));
+        "movq %0, %%mm0\n\t"
+        "movq %1, %%mm1\n\t"
+        : : "m" (src[0]), "m" (src[1]));
 
     for(i=0; i<h; i++) {
         asm volatile(
@@ -91,16 +199,9 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
         src += stride;
         asm volatile(
             /* mm0 = src[0..7], mm1 = src[1..8] */
-            "movq %0, %%mm1\n\t"
-            "movq %1, %%mm0\n\t"
-            "psrlq %2, %%mm1\n\t"
-            "psllq %3, %%mm0\n\t"
-            "movq %%mm0, %%mm4\n\t"
-            "psllq $8, %%mm0\n\t"
-            "por %%mm1, %%mm0\n\t"
-            "psrlq $8, %%mm1\n\t"
-            "por %%mm4, %%mm1\n\t"
-            : : "m" (src[0]), "m" (src[8]), "m" (sh1), "m" (sh2));
+            "movq %0, %%mm0\n\t"
+            "movq %1, %%mm1\n\t"
+            : : "m" (src[0]), "m" (src[1]));
 
         asm volatile(
             /* [mm2,mm3] += C *  src[0..7] */
@@ -138,3 +239,83 @@ static void H264_CHROMA_MC8_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*
         dst+= stride;
     }
 }
+
+static void H264_CHROMA_MC4_TMPL(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y)
+{
+    DECLARE_ALIGNED_8(uint64_t, AA);
+    DECLARE_ALIGNED_8(uint64_t, DD);
+    int i;
+
+    /* no special case for mv=(0,0) in 4x*, since it's much less common than in 8x*.
+     * could still save a few cycles, but maybe not worth the complexity. */
+
+    assert(x<8 && y<8 && x>=0 && y>=0);
+
+    asm volatile("movd %2, %%mm4\n\t"
+                 "movd %3, %%mm6\n\t"
+                 "punpcklwd %%mm4, %%mm4\n\t"
+                 "punpcklwd %%mm6, %%mm6\n\t"
+                 "punpckldq %%mm4, %%mm4\n\t" /* mm4 = x words */
+                 "punpckldq %%mm6, %%mm6\n\t" /* mm6 = y words */
+                 "movq %%mm4, %%mm5\n\t"
+                 "pmullw %%mm6, %%mm4\n\t"    /* mm4 = x * y */
+                 "psllw $3, %%mm5\n\t"
+                 "psllw $3, %%mm6\n\t"
+                 "movq %%mm5, %%mm7\n\t"
+                 "paddw %%mm6, %%mm7\n\t"
+                 "movq %%mm4, %1\n\t"         /* DD = x * y */
+                 "psubw %%mm4, %%mm5\n\t"     /* mm5 = B = 8x - xy */
+                 "psubw %%mm4, %%mm6\n\t"     /* mm6 = C = 8y - xy */
+                 "paddw %4, %%mm4\n\t"
+                 "psubw %%mm7, %%mm4\n\t"     /* mm4 = A = xy - (8x+8y) + 64 */
+                 "pxor %%mm7, %%mm7\n\t"
+                 "movq %%mm4, %0\n\t"
+                 : "=m" (AA), "=m" (DD) : "rm" (x), "rm" (y), "m" (ff_pw_64));
+
+    asm volatile(
+        /* mm0 = src[0..3], mm1 = src[1..4] */
+        "movd %0, %%mm0\n\t"
+        "movd %1, %%mm1\n\t"
+        "punpcklbw %%mm7, %%mm0\n\t"
+        "punpcklbw %%mm7, %%mm1\n\t"
+        : : "m" (src[0]), "m" (src[1]));
+
+    for(i=0; i<h; i++) {
+        asm volatile(
+            /* mm2 = A * src[0..3] + B * src[1..4] */
+            "movq %%mm0, %%mm2\n\t"
+            "pmullw %0, %%mm2\n\t"
+            "pmullw %%mm5, %%mm1\n\t"
+            "paddw %%mm1, %%mm2\n\t"
+            : : "m" (AA));
+
+        src += stride;
+        asm volatile(
+            /* mm0 = src[0..3], mm1 = src[1..4] */
+            "movd %0, %%mm0\n\t"
+            "movd %1, %%mm1\n\t"
+            "punpcklbw %%mm7, %%mm0\n\t"
+            "punpcklbw %%mm7, %%mm1\n\t"
+            : : "m" (src[0]), "m" (src[1]));
+
+        asm volatile(
+            /* mm2 += C * src[0..3] + D * src[1..4] */
+            "movq %%mm0, %%mm3\n\t"
+            "movq %%mm1, %%mm4\n\t"
+            "pmullw %%mm6, %%mm3\n\t"
+            "pmullw %0, %%mm4\n\t"
+            "paddw %%mm3, %%mm2\n\t"
+            "paddw %%mm4, %%mm2\n\t"
+            : : "m" (DD));
+
+        asm volatile(
+            /* dst[0..3] = pack((mm2 + 32) >> 6) */
+            "paddw %1, %%mm2\n\t"
+            "psrlw $6, %%mm2\n\t"
+            "packuswb %%mm7, %%mm2\n\t"
+            H264_CHROMA_OP4(%0, %%mm2, %%mm3)
+            "movd %%mm2, %0\n\t"
+            : "=m" (dst[0]) : "m" (ff_pw_32));
+        dst += stride;
+    }
+}
diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c
index c1dd2176a..7d69859a6 100644
--- a/src/libffmpeg/libavcodec/i386/dsputil_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx.c
@@ -15,7 +15,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
  */
@@ -43,6 +43,7 @@ static const uint64_t ff_pw_20 attribute_used __attribute__ ((aligned(8))) = 0x0
 static const uint64_t ff_pw_3  attribute_used __attribute__ ((aligned(8))) = 0x0003000300030003ULL;
 static const uint64_t ff_pw_4  attribute_used __attribute__ ((aligned(8))) = 0x0004000400040004ULL;
 static const uint64_t ff_pw_5  attribute_used __attribute__ ((aligned(8))) = 0x0005000500050005ULL;
+static const uint64_t ff_pw_8  attribute_used __attribute__ ((aligned(8))) = 0x0008000800080008ULL;
 static const uint64_t ff_pw_16 attribute_used __attribute__ ((aligned(8))) = 0x0010001000100010ULL;
 static const uint64_t ff_pw_32 attribute_used __attribute__ ((aligned(8))) = 0x0020002000200020ULL;
 static const uint64_t ff_pw_64 attribute_used __attribute__ ((aligned(8))) = 0x0040004000400040ULL;
@@ -88,56 +89,56 @@ static const uint64_t ff_pb_FC attribute_used __attribute__ ((aligned(8))) = 0xF
 // first argument is unmodifed and second is trashed
 // regfe is supposed to contain 0xfefefefefefefefe
 #define PAVGB_MMX_NO_RND(rega, regb, regr, regfe) \
-    "movq " #rega ", " #regr "	\n\t"\
-    "pand " #regb ", " #regr "	\n\t"\
-    "pxor " #rega ", " #regb "	\n\t"\
-    "pand " #regfe "," #regb "	\n\t"\
-    "psrlq $1, " #regb " 	\n\t"\
-    "paddb " #regb ", " #regr "	\n\t"
+    "movq " #rega ", " #regr "  \n\t"\
+    "pand " #regb ", " #regr "  \n\t"\
+    "pxor " #rega ", " #regb "  \n\t"\
+    "pand " #regfe "," #regb "  \n\t"\
+    "psrlq $1, " #regb "        \n\t"\
+    "paddb " #regb ", " #regr " \n\t"
 
 #define PAVGB_MMX(rega, regb, regr, regfe) \
-    "movq " #rega ", " #regr "	\n\t"\
-    "por  " #regb ", " #regr "	\n\t"\
-    "pxor " #rega ", " #regb "	\n\t"\
-    "pand " #regfe "," #regb "	\n\t"\
-    "psrlq $1, " #regb "	\n\t"\
-    "psubb " #regb ", " #regr "	\n\t"
+    "movq " #rega ", " #regr "  \n\t"\
+    "por  " #regb ", " #regr "  \n\t"\
+    "pxor " #rega ", " #regb "  \n\t"\
+    "pand " #regfe "," #regb "  \n\t"\
+    "psrlq $1, " #regb "        \n\t"\
+    "psubb " #regb ", " #regr " \n\t"
 
 // mm6 is supposed to contain 0xfefefefefefefefe
 #define PAVGBP_MMX_NO_RND(rega, regb, regr,  regc, regd, regp) \
-    "movq " #rega ", " #regr "	\n\t"\
-    "movq " #regc ", " #regp "	\n\t"\
-    "pand " #regb ", " #regr "	\n\t"\
-    "pand " #regd ", " #regp "	\n\t"\
-    "pxor " #rega ", " #regb "	\n\t"\
-    "pxor " #regc ", " #regd "	\n\t"\
-    "pand %%mm6, " #regb "	\n\t"\
-    "pand %%mm6, " #regd "	\n\t"\
-    "psrlq $1, " #regb " 	\n\t"\
-    "psrlq $1, " #regd " 	\n\t"\
-    "paddb " #regb ", " #regr "	\n\t"\
-    "paddb " #regd ", " #regp "	\n\t"
+    "movq " #rega ", " #regr "  \n\t"\
+    "movq " #regc ", " #regp "  \n\t"\
+    "pand " #regb ", " #regr "  \n\t"\
+    "pand " #regd ", " #regp "  \n\t"\
+    "pxor " #rega ", " #regb "  \n\t"\
+    "pxor " #regc ", " #regd "  \n\t"\
+    "pand %%mm6, " #regb "      \n\t"\
+    "pand %%mm6, " #regd "      \n\t"\
+    "psrlq $1, " #regb "        \n\t"\
+    "psrlq $1, " #regd "        \n\t"\
+    "paddb " #regb ", " #regr " \n\t"\
+    "paddb " #regd ", " #regp " \n\t"
 
 #define PAVGBP_MMX(rega, regb, regr, regc, regd, regp) \
-    "movq " #rega ", " #regr "	\n\t"\
-    "movq " #regc ", " #regp "	\n\t"\
-    "por  " #regb ", " #regr "	\n\t"\
-    "por  " #regd ", " #regp "	\n\t"\
-    "pxor " #rega ", " #regb "	\n\t"\
-    "pxor " #regc ", " #regd "	\n\t"\
-    "pand %%mm6, " #regb "     	\n\t"\
-    "pand %%mm6, " #regd "     	\n\t"\
-    "psrlq $1, " #regd "	\n\t"\
-    "psrlq $1, " #regb "	\n\t"\
-    "psubb " #regb ", " #regr "	\n\t"\
-    "psubb " #regd ", " #regp "	\n\t"
+    "movq " #rega ", " #regr "  \n\t"\
+    "movq " #regc ", " #regp "  \n\t"\
+    "por  " #regb ", " #regr "  \n\t"\
+    "por  " #regd ", " #regp "  \n\t"\
+    "pxor " #rega ", " #regb "  \n\t"\
+    "pxor " #regc ", " #regd "  \n\t"\
+    "pand %%mm6, " #regb "      \n\t"\
+    "pand %%mm6, " #regd "      \n\t"\
+    "psrlq $1, " #regd "        \n\t"\
+    "psrlq $1, " #regb "        \n\t"\
+    "psubb " #regb ", " #regr " \n\t"\
+    "psubb " #regd ", " #regp " \n\t"
 
 /***********************************/
 /* MMX no rounding */
 #define DEF(x, y) x ## _no_rnd_ ## y ##_mmx
 #define SET_RND  MOVQ_WONE
-#define PAVGBP(a, b, c, d, e, f)	PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
-#define PAVGB(a, b, c, e)		PAVGB_MMX_NO_RND(a, b, c, e)
+#define PAVGBP(a, b, c, d, e, f)        PAVGBP_MMX_NO_RND(a, b, c, d, e, f)
+#define PAVGB(a, b, c, e)               PAVGB_MMX_NO_RND(a, b, c, e)
 
 #include "dsputil_mmx_rnd.h"
 
@@ -150,8 +151,8 @@ static const uint64_t ff_pb_FC attribute_used __attribute__ ((aligned(8))) = 0xF
 
 #define DEF(x, y) x ## _ ## y ##_mmx
 #define SET_RND  MOVQ_WTWO
-#define PAVGBP(a, b, c, d, e, f)	PAVGBP_MMX(a, b, c, d, e, f)
-#define PAVGB(a, b, c, e)		PAVGB_MMX(a, b, c, e)
+#define PAVGBP(a, b, c, d, e, f)        PAVGBP_MMX(a, b, c, d, e, f)
+#define PAVGB(a, b, c, e)               PAVGB_MMX(a, b, c, e)
 
 #include "dsputil_mmx_rnd.h"
 
@@ -192,25 +193,25 @@ static const uint64_t ff_pb_FC attribute_used __attribute__ ((aligned(8))) = 0xF
 static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size)
 {
     asm volatile(
-        "mov $-128, %%"REG_a"	\n\t"
-        "pxor %%mm7, %%mm7	\n\t"
-        ".balign 16		\n\t"
-        "1:			\n\t"
-        "movq (%0), %%mm0	\n\t"
-        "movq (%0, %2), %%mm2	\n\t"
-        "movq %%mm0, %%mm1	\n\t"
-        "movq %%mm2, %%mm3	\n\t"
-        "punpcklbw %%mm7, %%mm0	\n\t"
-        "punpckhbw %%mm7, %%mm1	\n\t"
-        "punpcklbw %%mm7, %%mm2	\n\t"
-        "punpckhbw %%mm7, %%mm3	\n\t"
-        "movq %%mm0, (%1, %%"REG_a")\n\t"
-        "movq %%mm1, 8(%1, %%"REG_a")\n\t"
-        "movq %%mm2, 16(%1, %%"REG_a")\n\t"
-        "movq %%mm3, 24(%1, %%"REG_a")\n\t"
-        "add %3, %0		\n\t"
-        "add $32, %%"REG_a"	\n\t"
-        "js 1b			\n\t"
+        "mov $-128, %%"REG_a"           \n\t"
+        "pxor %%mm7, %%mm7              \n\t"
+        ".balign 16                     \n\t"
+        "1:                             \n\t"
+        "movq (%0), %%mm0               \n\t"
+        "movq (%0, %2), %%mm2           \n\t"
+        "movq %%mm0, %%mm1              \n\t"
+        "movq %%mm2, %%mm3              \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpckhbw %%mm7, %%mm1         \n\t"
+        "punpcklbw %%mm7, %%mm2         \n\t"
+        "punpckhbw %%mm7, %%mm3         \n\t"
+        "movq %%mm0, (%1, %%"REG_a")    \n\t"
+        "movq %%mm1, 8(%1, %%"REG_a")   \n\t"
+        "movq %%mm2, 16(%1, %%"REG_a")  \n\t"
+        "movq %%mm3, 24(%1, %%"REG_a")  \n\t"
+        "add %3, %0                     \n\t"
+        "add $32, %%"REG_a"             \n\t"
+        "js 1b                          \n\t"
         : "+r" (pixels)
         : "r" (block+64), "r" ((long)line_size), "r" ((long)line_size*2)
         : "%"REG_a
@@ -220,26 +221,26 @@ static void get_pixels_mmx(DCTELEM *block, const uint8_t *pixels, int line_size)
 static inline void diff_pixels_mmx(DCTELEM *block, const uint8_t *s1, const uint8_t *s2, int stride)
 {
     asm volatile(
-        "pxor %%mm7, %%mm7	\n\t"
-        "mov $-128, %%"REG_a"	\n\t"
-        ".balign 16		\n\t"
-        "1:			\n\t"
-        "movq (%0), %%mm0	\n\t"
-        "movq (%1), %%mm2	\n\t"
-        "movq %%mm0, %%mm1	\n\t"
-        "movq %%mm2, %%mm3	\n\t"
-        "punpcklbw %%mm7, %%mm0	\n\t"
-        "punpckhbw %%mm7, %%mm1	\n\t"
-        "punpcklbw %%mm7, %%mm2	\n\t"
-        "punpckhbw %%mm7, %%mm3	\n\t"
-        "psubw %%mm2, %%mm0	\n\t"
-        "psubw %%mm3, %%mm1	\n\t"
-        "movq %%mm0, (%2, %%"REG_a")\n\t"
-        "movq %%mm1, 8(%2, %%"REG_a")\n\t"
-        "add %3, %0		\n\t"
-        "add %3, %1		\n\t"
-        "add $16, %%"REG_a"	\n\t"
-        "jnz 1b			\n\t"
+        "pxor %%mm7, %%mm7              \n\t"
+        "mov $-128, %%"REG_a"           \n\t"
+        ".balign 16                     \n\t"
+        "1:                             \n\t"
+        "movq (%0), %%mm0               \n\t"
+        "movq (%1), %%mm2               \n\t"
+        "movq %%mm0, %%mm1              \n\t"
+        "movq %%mm2, %%mm3              \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpckhbw %%mm7, %%mm1         \n\t"
+        "punpcklbw %%mm7, %%mm2         \n\t"
+        "punpckhbw %%mm7, %%mm3         \n\t"
+        "psubw %%mm2, %%mm0             \n\t"
+        "psubw %%mm3, %%mm1             \n\t"
+        "movq %%mm0, (%2, %%"REG_a")    \n\t"
+        "movq %%mm1, 8(%2, %%"REG_a")   \n\t"
+        "add %3, %0                     \n\t"
+        "add %3, %1                     \n\t"
+        "add $16, %%"REG_a"             \n\t"
+        "jnz 1b                         \n\t"
         : "+r" (s1), "+r" (s2)
         : "r" (block+64), "r" ((long)stride)
         : "%"REG_a
@@ -256,25 +257,25 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size
     p = block;
     pix = pixels;
     /* unrolled loop */
-	__asm __volatile(
-		"movq	%3, %%mm0\n\t"
-		"movq	8%3, %%mm1\n\t"
-		"movq	16%3, %%mm2\n\t"
-		"movq	24%3, %%mm3\n\t"
-		"movq	32%3, %%mm4\n\t"
-		"movq	40%3, %%mm5\n\t"
-		"movq	48%3, %%mm6\n\t"
-		"movq	56%3, %%mm7\n\t"
-		"packuswb %%mm1, %%mm0\n\t"
-		"packuswb %%mm3, %%mm2\n\t"
-		"packuswb %%mm5, %%mm4\n\t"
-		"packuswb %%mm7, %%mm6\n\t"
-		"movq	%%mm0, (%0)\n\t"
-		"movq	%%mm2, (%0, %1)\n\t"
-		"movq	%%mm4, (%0, %1, 2)\n\t"
-		"movq	%%mm6, (%0, %2)\n\t"
-		::"r" (pix), "r" ((long)line_size), "r" ((long)line_size*3), "m"(*p)
-		:"memory");
+        __asm __volatile(
+                "movq   %3, %%mm0               \n\t"
+                "movq   8%3, %%mm1              \n\t"
+                "movq   16%3, %%mm2             \n\t"
+                "movq   24%3, %%mm3             \n\t"
+                "movq   32%3, %%mm4             \n\t"
+                "movq   40%3, %%mm5             \n\t"
+                "movq   48%3, %%mm6             \n\t"
+                "movq   56%3, %%mm7             \n\t"
+                "packuswb %%mm1, %%mm0          \n\t"
+                "packuswb %%mm3, %%mm2          \n\t"
+                "packuswb %%mm5, %%mm4          \n\t"
+                "packuswb %%mm7, %%mm6          \n\t"
+                "movq   %%mm0, (%0)             \n\t"
+                "movq   %%mm2, (%0, %1)         \n\t"
+                "movq   %%mm4, (%0, %1, 2)      \n\t"
+                "movq   %%mm6, (%0, %2)         \n\t"
+                ::"r" (pix), "r" ((long)line_size), "r" ((long)line_size*3), "m"(*p)
+                :"memory");
         pix += line_size*4;
         p += 32;
 
@@ -282,27 +283,27 @@ void put_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size
     // compiler would generate some very strange code
     // thus using "r"
     __asm __volatile(
-	    "movq	(%3), %%mm0\n\t"
-	    "movq	8(%3), %%mm1\n\t"
-	    "movq	16(%3), %%mm2\n\t"
-	    "movq	24(%3), %%mm3\n\t"
-	    "movq	32(%3), %%mm4\n\t"
-	    "movq	40(%3), %%mm5\n\t"
-	    "movq	48(%3), %%mm6\n\t"
-	    "movq	56(%3), %%mm7\n\t"
-	    "packuswb %%mm1, %%mm0\n\t"
-	    "packuswb %%mm3, %%mm2\n\t"
-	    "packuswb %%mm5, %%mm4\n\t"
-	    "packuswb %%mm7, %%mm6\n\t"
-	    "movq	%%mm0, (%0)\n\t"
-	    "movq	%%mm2, (%0, %1)\n\t"
-	    "movq	%%mm4, (%0, %1, 2)\n\t"
-	    "movq	%%mm6, (%0, %2)\n\t"
-	    ::"r" (pix), "r" ((long)line_size), "r" ((long)line_size*3), "r"(p)
-	    :"memory");
+            "movq       (%3), %%mm0             \n\t"
+            "movq       8(%3), %%mm1            \n\t"
+            "movq       16(%3), %%mm2           \n\t"
+            "movq       24(%3), %%mm3           \n\t"
+            "movq       32(%3), %%mm4           \n\t"
+            "movq       40(%3), %%mm5           \n\t"
+            "movq       48(%3), %%mm6           \n\t"
+            "movq       56(%3), %%mm7           \n\t"
+            "packuswb %%mm1, %%mm0              \n\t"
+            "packuswb %%mm3, %%mm2              \n\t"
+            "packuswb %%mm5, %%mm4              \n\t"
+            "packuswb %%mm7, %%mm6              \n\t"
+            "movq       %%mm0, (%0)             \n\t"
+            "movq       %%mm2, (%0, %1)         \n\t"
+            "movq       %%mm4, (%0, %1, 2)      \n\t"
+            "movq       %%mm6, (%0, %2)         \n\t"
+            ::"r" (pix), "r" ((long)line_size), "r" ((long)line_size*3), "r"(p)
+            :"memory");
 }
 
-static const unsigned char __align8 vector128[8] =
+static DECLARE_ALIGNED_8(const unsigned char, vector128[8]) =
   { 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80 };
 
 void put_signed_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size)
@@ -332,30 +333,30 @@ void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size
     MOVQ_ZERO(mm7);
     i = 4;
     do {
-	__asm __volatile(
-		"movq	(%2), %%mm0\n\t"
-		"movq	8(%2), %%mm1\n\t"
-		"movq	16(%2), %%mm2\n\t"
-		"movq	24(%2), %%mm3\n\t"
-		"movq	%0, %%mm4\n\t"
-		"movq	%1, %%mm6\n\t"
-		"movq	%%mm4, %%mm5\n\t"
-		"punpcklbw %%mm7, %%mm4\n\t"
-		"punpckhbw %%mm7, %%mm5\n\t"
-		"paddsw	%%mm4, %%mm0\n\t"
-		"paddsw	%%mm5, %%mm1\n\t"
-		"movq	%%mm6, %%mm5\n\t"
-		"punpcklbw %%mm7, %%mm6\n\t"
-		"punpckhbw %%mm7, %%mm5\n\t"
-		"paddsw	%%mm6, %%mm2\n\t"
-		"paddsw	%%mm5, %%mm3\n\t"
-		"packuswb %%mm1, %%mm0\n\t"
-		"packuswb %%mm3, %%mm2\n\t"
-		"movq	%%mm0, %0\n\t"
-		"movq	%%mm2, %1\n\t"
-		:"+m"(*pix), "+m"(*(pix+line_size))
-		:"r"(p)
-		:"memory");
+        __asm __volatile(
+                "movq   (%2), %%mm0     \n\t"
+                "movq   8(%2), %%mm1    \n\t"
+                "movq   16(%2), %%mm2   \n\t"
+                "movq   24(%2), %%mm3   \n\t"
+                "movq   %0, %%mm4       \n\t"
+                "movq   %1, %%mm6       \n\t"
+                "movq   %%mm4, %%mm5    \n\t"
+                "punpcklbw %%mm7, %%mm4 \n\t"
+                "punpckhbw %%mm7, %%mm5 \n\t"
+                "paddsw %%mm4, %%mm0    \n\t"
+                "paddsw %%mm5, %%mm1    \n\t"
+                "movq   %%mm6, %%mm5    \n\t"
+                "punpcklbw %%mm7, %%mm6 \n\t"
+                "punpckhbw %%mm7, %%mm5 \n\t"
+                "paddsw %%mm6, %%mm2    \n\t"
+                "paddsw %%mm5, %%mm3    \n\t"
+                "packuswb %%mm1, %%mm0  \n\t"
+                "packuswb %%mm3, %%mm2  \n\t"
+                "movq   %%mm0, %0       \n\t"
+                "movq   %%mm2, %1       \n\t"
+                :"+m"(*pix), "+m"(*(pix+line_size))
+                :"r"(p)
+                :"memory");
         pix += line_size*2;
         p += 16;
     } while (--i);
@@ -364,101 +365,101 @@ void add_pixels_clamped_mmx(const DCTELEM *block, uint8_t *pixels, int line_size
 static void put_pixels4_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
     __asm __volatile(
-	 "lea (%3, %3), %%"REG_a"	\n\t"
-	 ".balign 8			\n\t"
-	 "1:				\n\t"
-	 "movd (%1), %%mm0		\n\t"
-	 "movd (%1, %3), %%mm1		\n\t"
-	 "movd %%mm0, (%2)		\n\t"
-	 "movd %%mm1, (%2, %3)		\n\t"
-	 "add %%"REG_a", %1		\n\t"
-	 "add %%"REG_a", %2		\n\t"
-	 "movd (%1), %%mm0		\n\t"
-	 "movd (%1, %3), %%mm1		\n\t"
-	 "movd %%mm0, (%2)		\n\t"
-	 "movd %%mm1, (%2, %3)		\n\t"
-	 "add %%"REG_a", %1		\n\t"
-	 "add %%"REG_a", %2		\n\t"
-	 "subl $4, %0			\n\t"
-	 "jnz 1b			\n\t"
-	 : "+g"(h), "+r" (pixels),  "+r" (block)
-	 : "r"((long)line_size)
-	 : "%"REG_a, "memory"
-	);
+         "lea (%3, %3), %%"REG_a"       \n\t"
+         ".balign 8                     \n\t"
+         "1:                            \n\t"
+         "movd (%1), %%mm0              \n\t"
+         "movd (%1, %3), %%mm1          \n\t"
+         "movd %%mm0, (%2)              \n\t"
+         "movd %%mm1, (%2, %3)          \n\t"
+         "add %%"REG_a", %1             \n\t"
+         "add %%"REG_a", %2             \n\t"
+         "movd (%1), %%mm0              \n\t"
+         "movd (%1, %3), %%mm1          \n\t"
+         "movd %%mm0, (%2)              \n\t"
+         "movd %%mm1, (%2, %3)          \n\t"
+         "add %%"REG_a", %1             \n\t"
+         "add %%"REG_a", %2             \n\t"
+         "subl $4, %0                   \n\t"
+         "jnz 1b                        \n\t"
+         : "+g"(h), "+r" (pixels),  "+r" (block)
+         : "r"((long)line_size)
+         : "%"REG_a, "memory"
+        );
 }
 
 static void put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
     __asm __volatile(
-	 "lea (%3, %3), %%"REG_a"	\n\t"
-	 ".balign 8			\n\t"
-	 "1:				\n\t"
-	 "movq (%1), %%mm0		\n\t"
-	 "movq (%1, %3), %%mm1		\n\t"
-     	 "movq %%mm0, (%2)		\n\t"
-	 "movq %%mm1, (%2, %3)		\n\t"
-	 "add %%"REG_a", %1		\n\t"
-	 "add %%"REG_a", %2		\n\t"
-	 "movq (%1), %%mm0		\n\t"
-	 "movq (%1, %3), %%mm1		\n\t"
-	 "movq %%mm0, (%2)		\n\t"
-	 "movq %%mm1, (%2, %3)		\n\t"
-	 "add %%"REG_a", %1		\n\t"
-	 "add %%"REG_a", %2		\n\t"
-	 "subl $4, %0			\n\t"
-	 "jnz 1b			\n\t"
-	 : "+g"(h), "+r" (pixels),  "+r" (block)
-	 : "r"((long)line_size)
-	 : "%"REG_a, "memory"
-	);
+         "lea (%3, %3), %%"REG_a"       \n\t"
+         ".balign 8                     \n\t"
+         "1:                            \n\t"
+         "movq (%1), %%mm0              \n\t"
+         "movq (%1, %3), %%mm1          \n\t"
+         "movq %%mm0, (%2)              \n\t"
+         "movq %%mm1, (%2, %3)          \n\t"
+         "add %%"REG_a", %1             \n\t"
+         "add %%"REG_a", %2             \n\t"
+         "movq (%1), %%mm0              \n\t"
+         "movq (%1, %3), %%mm1          \n\t"
+         "movq %%mm0, (%2)              \n\t"
+         "movq %%mm1, (%2, %3)          \n\t"
+         "add %%"REG_a", %1             \n\t"
+         "add %%"REG_a", %2             \n\t"
+         "subl $4, %0                   \n\t"
+         "jnz 1b                        \n\t"
+         : "+g"(h), "+r" (pixels),  "+r" (block)
+         : "r"((long)line_size)
+         : "%"REG_a, "memory"
+        );
 }
 
 static void put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
     __asm __volatile(
-	 "lea (%3, %3), %%"REG_a"	\n\t"
-	 ".balign 8			\n\t"
-	 "1:				\n\t"
-	 "movq (%1), %%mm0		\n\t"
-	 "movq 8(%1), %%mm4		\n\t"
-	 "movq (%1, %3), %%mm1		\n\t"
-	 "movq 8(%1, %3), %%mm5		\n\t"
-     	 "movq %%mm0, (%2)		\n\t"
-     	 "movq %%mm4, 8(%2)		\n\t"
-	 "movq %%mm1, (%2, %3)		\n\t"
-	 "movq %%mm5, 8(%2, %3)		\n\t"
-	 "add %%"REG_a", %1		\n\t"
-	 "add %%"REG_a", %2       	\n\t"
-	 "movq (%1), %%mm0		\n\t"
-	 "movq 8(%1), %%mm4		\n\t"
-	 "movq (%1, %3), %%mm1		\n\t"
-	 "movq 8(%1, %3), %%mm5		\n\t"
-	 "movq %%mm0, (%2)		\n\t"
-	 "movq %%mm4, 8(%2)		\n\t"
-	 "movq %%mm1, (%2, %3)		\n\t"
-	 "movq %%mm5, 8(%2, %3)		\n\t"
-	 "add %%"REG_a", %1		\n\t"
-	 "add %%"REG_a", %2       	\n\t"
-	 "subl $4, %0			\n\t"
-	 "jnz 1b			\n\t"
-	 : "+g"(h), "+r" (pixels),  "+r" (block)
-	 : "r"((long)line_size)
-	 : "%"REG_a, "memory"
-	);
+         "lea (%3, %3), %%"REG_a"       \n\t"
+         ".balign 8                     \n\t"
+         "1:                            \n\t"
+         "movq (%1), %%mm0              \n\t"
+         "movq 8(%1), %%mm4             \n\t"
+         "movq (%1, %3), %%mm1          \n\t"
+         "movq 8(%1, %3), %%mm5         \n\t"
+         "movq %%mm0, (%2)              \n\t"
+         "movq %%mm4, 8(%2)             \n\t"
+         "movq %%mm1, (%2, %3)          \n\t"
+         "movq %%mm5, 8(%2, %3)         \n\t"
+         "add %%"REG_a", %1             \n\t"
+         "add %%"REG_a", %2             \n\t"
+         "movq (%1), %%mm0              \n\t"
+         "movq 8(%1), %%mm4             \n\t"
+         "movq (%1, %3), %%mm1          \n\t"
+         "movq 8(%1, %3), %%mm5         \n\t"
+         "movq %%mm0, (%2)              \n\t"
+         "movq %%mm4, 8(%2)             \n\t"
+         "movq %%mm1, (%2, %3)          \n\t"
+         "movq %%mm5, 8(%2, %3)         \n\t"
+         "add %%"REG_a", %1             \n\t"
+         "add %%"REG_a", %2             \n\t"
+         "subl $4, %0                   \n\t"
+         "jnz 1b                        \n\t"
+         : "+g"(h), "+r" (pixels),  "+r" (block)
+         : "r"((long)line_size)
+         : "%"REG_a, "memory"
+        );
 }
 
 static void clear_blocks_mmx(DCTELEM *blocks)
 {
     __asm __volatile(
-                "pxor %%mm7, %%mm7		\n\t"
-                "mov $-128*6, %%"REG_a"	\n\t"
-                "1:				\n\t"
-                "movq %%mm7, (%0, %%"REG_a")	\n\t"
-                "movq %%mm7, 8(%0, %%"REG_a")	\n\t"
-                "movq %%mm7, 16(%0, %%"REG_a")	\n\t"
-                "movq %%mm7, 24(%0, %%"REG_a")	\n\t"
-                "add $32, %%"REG_a"		\n\t"
-                " js 1b				\n\t"
+                "pxor %%mm7, %%mm7              \n\t"
+                "mov $-128*6, %%"REG_a"         \n\t"
+                "1:                             \n\t"
+                "movq %%mm7, (%0, %%"REG_a")    \n\t"
+                "movq %%mm7, 8(%0, %%"REG_a")   \n\t"
+                "movq %%mm7, 16(%0, %%"REG_a")  \n\t"
+                "movq %%mm7, 24(%0, %%"REG_a")  \n\t"
+                "add $32, %%"REG_a"             \n\t"
+                " js 1b                         \n\t"
                 : : "r" (((uint8_t *)blocks)+128*6)
                 : "%"REG_a
         );
@@ -471,31 +472,31 @@ static int pix_sum16_mmx(uint8_t * pix, int line_size){
     long index= -line_size*h;
 
     __asm __volatile(
-                "pxor %%mm7, %%mm7		\n\t"
-                "pxor %%mm6, %%mm6		\n\t"
-                "1:				\n\t"
-                "movq (%2, %1), %%mm0		\n\t"
-                "movq (%2, %1), %%mm1		\n\t"
-                "movq 8(%2, %1), %%mm2		\n\t"
-                "movq 8(%2, %1), %%mm3		\n\t"
-                "punpcklbw %%mm7, %%mm0		\n\t"
-                "punpckhbw %%mm7, %%mm1		\n\t"
-                "punpcklbw %%mm7, %%mm2		\n\t"
-                "punpckhbw %%mm7, %%mm3		\n\t"
-                "paddw %%mm0, %%mm1		\n\t"
-                "paddw %%mm2, %%mm3		\n\t"
-                "paddw %%mm1, %%mm3		\n\t"
-                "paddw %%mm3, %%mm6		\n\t"
-                "add %3, %1			\n\t"
-                " js 1b				\n\t"
-                "movq %%mm6, %%mm5		\n\t"
-                "psrlq $32, %%mm6		\n\t"
-                "paddw %%mm5, %%mm6		\n\t"
-                "movq %%mm6, %%mm5		\n\t"
-                "psrlq $16, %%mm6		\n\t"
-                "paddw %%mm5, %%mm6		\n\t"
-                "movd %%mm6, %0			\n\t"
-                "andl $0xFFFF, %0		\n\t"
+                "pxor %%mm7, %%mm7              \n\t"
+                "pxor %%mm6, %%mm6              \n\t"
+                "1:                             \n\t"
+                "movq (%2, %1), %%mm0           \n\t"
+                "movq (%2, %1), %%mm1           \n\t"
+                "movq 8(%2, %1), %%mm2          \n\t"
+                "movq 8(%2, %1), %%mm3          \n\t"
+                "punpcklbw %%mm7, %%mm0         \n\t"
+                "punpckhbw %%mm7, %%mm1         \n\t"
+                "punpcklbw %%mm7, %%mm2         \n\t"
+                "punpckhbw %%mm7, %%mm3         \n\t"
+                "paddw %%mm0, %%mm1             \n\t"
+                "paddw %%mm2, %%mm3             \n\t"
+                "paddw %%mm1, %%mm3             \n\t"
+                "paddw %%mm3, %%mm6             \n\t"
+                "add %3, %1                     \n\t"
+                " js 1b                         \n\t"
+                "movq %%mm6, %%mm5              \n\t"
+                "psrlq $32, %%mm6               \n\t"
+                "paddw %%mm5, %%mm6             \n\t"
+                "movq %%mm6, %%mm5              \n\t"
+                "psrlq $16, %%mm6               \n\t"
+                "paddw %%mm5, %%mm6             \n\t"
+                "movd %%mm6, %0                 \n\t"
+                "andl $0xFFFF, %0               \n\t"
                 : "=&r" (sum), "+r" (index)
                 : "r" (pix - index), "r" ((long)line_size)
         );
@@ -507,18 +508,18 @@ static int pix_sum16_mmx(uint8_t * pix, int line_size){
 static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
     long i=0;
     asm volatile(
-        "1:				\n\t"
-        "movq  (%1, %0), %%mm0		\n\t"
-        "movq  (%2, %0), %%mm1		\n\t"
-        "paddb %%mm0, %%mm1		\n\t"
-        "movq %%mm1, (%2, %0)		\n\t"
-        "movq 8(%1, %0), %%mm0		\n\t"
-        "movq 8(%2, %0), %%mm1		\n\t"
-        "paddb %%mm0, %%mm1		\n\t"
-        "movq %%mm1, 8(%2, %0)		\n\t"
-        "add $16, %0			\n\t"
-        "cmp %3, %0			\n\t"
-        " jb 1b				\n\t"
+        "1:                             \n\t"
+        "movq  (%1, %0), %%mm0          \n\t"
+        "movq  (%2, %0), %%mm1          \n\t"
+        "paddb %%mm0, %%mm1             \n\t"
+        "movq %%mm1, (%2, %0)           \n\t"
+        "movq 8(%1, %0), %%mm0          \n\t"
+        "movq 8(%2, %0), %%mm1          \n\t"
+        "paddb %%mm0, %%mm1             \n\t"
+        "movq %%mm1, 8(%2, %0)          \n\t"
+        "add $16, %0                    \n\t"
+        "cmp %3, %0                     \n\t"
+        " jb 1b                         \n\t"
         : "+r" (i)
         : "r"(src), "r"(dst), "r"((long)w-15)
     );
@@ -527,87 +528,87 @@ static void add_bytes_mmx(uint8_t *dst, uint8_t *src, int w){
 }
 
 #define H263_LOOP_FILTER \
-        "pxor %%mm7, %%mm7		\n\t"\
-        "movq  %0, %%mm0		\n\t"\
-        "movq  %0, %%mm1		\n\t"\
-        "movq  %3, %%mm2		\n\t"\
-        "movq  %3, %%mm3		\n\t"\
-        "punpcklbw %%mm7, %%mm0		\n\t"\
-        "punpckhbw %%mm7, %%mm1		\n\t"\
-        "punpcklbw %%mm7, %%mm2		\n\t"\
-        "punpckhbw %%mm7, %%mm3		\n\t"\
-        "psubw %%mm2, %%mm0		\n\t"\
-        "psubw %%mm3, %%mm1		\n\t"\
-        "movq  %1, %%mm2		\n\t"\
-        "movq  %1, %%mm3		\n\t"\
-        "movq  %2, %%mm4		\n\t"\
-        "movq  %2, %%mm5		\n\t"\
-        "punpcklbw %%mm7, %%mm2		\n\t"\
-        "punpckhbw %%mm7, %%mm3		\n\t"\
-        "punpcklbw %%mm7, %%mm4		\n\t"\
-        "punpckhbw %%mm7, %%mm5		\n\t"\
-        "psubw %%mm2, %%mm4		\n\t"\
-        "psubw %%mm3, %%mm5		\n\t"\
-        "psllw $2, %%mm4		\n\t"\
-        "psllw $2, %%mm5		\n\t"\
-        "paddw %%mm0, %%mm4		\n\t"\
-        "paddw %%mm1, %%mm5		\n\t"\
-        "pxor %%mm6, %%mm6		\n\t"\
-        "pcmpgtw %%mm4, %%mm6		\n\t"\
-        "pcmpgtw %%mm5, %%mm7		\n\t"\
-        "pxor %%mm6, %%mm4		\n\t"\
-        "pxor %%mm7, %%mm5		\n\t"\
-        "psubw %%mm6, %%mm4		\n\t"\
-        "psubw %%mm7, %%mm5		\n\t"\
-        "psrlw $3, %%mm4		\n\t"\
-        "psrlw $3, %%mm5		\n\t"\
-        "packuswb %%mm5, %%mm4		\n\t"\
-        "packsswb %%mm7, %%mm6		\n\t"\
-        "pxor %%mm7, %%mm7		\n\t"\
-        "movd %4, %%mm2			\n\t"\
-        "punpcklbw %%mm2, %%mm2		\n\t"\
-        "punpcklbw %%mm2, %%mm2		\n\t"\
-        "punpcklbw %%mm2, %%mm2		\n\t"\
-        "psubusb %%mm4, %%mm2		\n\t"\
-        "movq %%mm2, %%mm3		\n\t"\
-        "psubusb %%mm4, %%mm3		\n\t"\
-        "psubb %%mm3, %%mm2		\n\t"\
-        "movq %1, %%mm3			\n\t"\
-        "movq %2, %%mm4			\n\t"\
-        "pxor %%mm6, %%mm3		\n\t"\
-        "pxor %%mm6, %%mm4		\n\t"\
-        "paddusb %%mm2, %%mm3		\n\t"\
-        "psubusb %%mm2, %%mm4		\n\t"\
-        "pxor %%mm6, %%mm3		\n\t"\
-        "pxor %%mm6, %%mm4		\n\t"\
-        "paddusb %%mm2, %%mm2		\n\t"\
-        "packsswb %%mm1, %%mm0		\n\t"\
-        "pcmpgtb %%mm0, %%mm7		\n\t"\
-        "pxor %%mm7, %%mm0		\n\t"\
-        "psubb %%mm7, %%mm0		\n\t"\
-        "movq %%mm0, %%mm1		\n\t"\
-        "psubusb %%mm2, %%mm0		\n\t"\
-        "psubb %%mm0, %%mm1		\n\t"\
-        "pand %5, %%mm1			\n\t"\
-        "psrlw $2, %%mm1		\n\t"\
-        "pxor %%mm7, %%mm1		\n\t"\
-        "psubb %%mm7, %%mm1		\n\t"\
-        "movq %0, %%mm5			\n\t"\
-        "movq %3, %%mm6			\n\t"\
-        "psubb %%mm1, %%mm5		\n\t"\
-        "paddb %%mm1, %%mm6		\n\t"
+        "pxor %%mm7, %%mm7              \n\t"\
+        "movq  %0, %%mm0                \n\t"\
+        "movq  %0, %%mm1                \n\t"\
+        "movq  %3, %%mm2                \n\t"\
+        "movq  %3, %%mm3                \n\t"\
+        "punpcklbw %%mm7, %%mm0         \n\t"\
+        "punpckhbw %%mm7, %%mm1         \n\t"\
+        "punpcklbw %%mm7, %%mm2         \n\t"\
+        "punpckhbw %%mm7, %%mm3         \n\t"\
+        "psubw %%mm2, %%mm0             \n\t"\
+        "psubw %%mm3, %%mm1             \n\t"\
+        "movq  %1, %%mm2                \n\t"\
+        "movq  %1, %%mm3                \n\t"\
+        "movq  %2, %%mm4                \n\t"\
+        "movq  %2, %%mm5                \n\t"\
+        "punpcklbw %%mm7, %%mm2         \n\t"\
+        "punpckhbw %%mm7, %%mm3         \n\t"\
+        "punpcklbw %%mm7, %%mm4         \n\t"\
+        "punpckhbw %%mm7, %%mm5         \n\t"\
+        "psubw %%mm2, %%mm4             \n\t"\
+        "psubw %%mm3, %%mm5             \n\t"\
+        "psllw $2, %%mm4                \n\t"\
+        "psllw $2, %%mm5                \n\t"\
+        "paddw %%mm0, %%mm4             \n\t"\
+        "paddw %%mm1, %%mm5             \n\t"\
+        "pxor %%mm6, %%mm6              \n\t"\
+        "pcmpgtw %%mm4, %%mm6           \n\t"\
+        "pcmpgtw %%mm5, %%mm7           \n\t"\
+        "pxor %%mm6, %%mm4              \n\t"\
+        "pxor %%mm7, %%mm5              \n\t"\
+        "psubw %%mm6, %%mm4             \n\t"\
+        "psubw %%mm7, %%mm5             \n\t"\
+        "psrlw $3, %%mm4                \n\t"\
+        "psrlw $3, %%mm5                \n\t"\
+        "packuswb %%mm5, %%mm4          \n\t"\
+        "packsswb %%mm7, %%mm6          \n\t"\
+        "pxor %%mm7, %%mm7              \n\t"\
+        "movd %4, %%mm2                 \n\t"\
+        "punpcklbw %%mm2, %%mm2         \n\t"\
+        "punpcklbw %%mm2, %%mm2         \n\t"\
+        "punpcklbw %%mm2, %%mm2         \n\t"\
+        "psubusb %%mm4, %%mm2           \n\t"\
+        "movq %%mm2, %%mm3              \n\t"\
+        "psubusb %%mm4, %%mm3           \n\t"\
+        "psubb %%mm3, %%mm2             \n\t"\
+        "movq %1, %%mm3                 \n\t"\
+        "movq %2, %%mm4                 \n\t"\
+        "pxor %%mm6, %%mm3              \n\t"\
+        "pxor %%mm6, %%mm4              \n\t"\
+        "paddusb %%mm2, %%mm3           \n\t"\
+        "psubusb %%mm2, %%mm4           \n\t"\
+        "pxor %%mm6, %%mm3              \n\t"\
+        "pxor %%mm6, %%mm4              \n\t"\
+        "paddusb %%mm2, %%mm2           \n\t"\
+        "packsswb %%mm1, %%mm0          \n\t"\
+        "pcmpgtb %%mm0, %%mm7           \n\t"\
+        "pxor %%mm7, %%mm0              \n\t"\
+        "psubb %%mm7, %%mm0             \n\t"\
+        "movq %%mm0, %%mm1              \n\t"\
+        "psubusb %%mm2, %%mm0           \n\t"\
+        "psubb %%mm0, %%mm1             \n\t"\
+        "pand %5, %%mm1                 \n\t"\
+        "psrlw $2, %%mm1                \n\t"\
+        "pxor %%mm7, %%mm1              \n\t"\
+        "psubb %%mm7, %%mm1             \n\t"\
+        "movq %0, %%mm5                 \n\t"\
+        "movq %3, %%mm6                 \n\t"\
+        "psubb %%mm1, %%mm5             \n\t"\
+        "paddb %%mm1, %%mm6             \n\t"
 
 static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale){
     const int strength= ff_h263_loop_filter_strength[qscale];
 
     asm volatile(
-    
+
         H263_LOOP_FILTER
-        
-        "movq %%mm3, %1			\n\t"
-        "movq %%mm4, %2			\n\t"
-        "movq %%mm5, %0			\n\t"
-        "movq %%mm6, %3			\n\t"
+
+        "movq %%mm3, %1                 \n\t"
+        "movq %%mm4, %2                 \n\t"
+        "movq %%mm5, %0                 \n\t"
+        "movq %%mm6, %3                 \n\t"
         : "+m" (*(uint64_t*)(src - 2*stride)),
           "+m" (*(uint64_t*)(src - 1*stride)),
           "+m" (*(uint64_t*)(src + 0*stride)),
@@ -617,32 +618,31 @@ static void h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale){
 }
 
 static inline void transpose4x4(uint8_t *dst, uint8_t *src, int dst_stride, int src_stride){
-    void *dst_reg = dst, *src_reg = src;
-
     asm volatile( //FIXME could save 1 instruction if done as 8x4 ...
-        "movd  (%1), %%mm0		\n\t"
-        "movd  (%1,%5), %%mm1		\n\t"
-        "lea (%1, %5, 2), %1		\n\t"
-        "movd  (%1), %%mm2		\n\t"
-        "movd  (%1,%5), %%mm3		\n\t"
-        "punpcklbw %%mm1, %%mm0		\n\t"
-        "punpcklbw %%mm3, %%mm2		\n\t"
-        "movq %%mm0, %%mm1		\n\t"
-        "punpcklwd %%mm2, %%mm0		\n\t"
-        "punpckhwd %%mm2, %%mm1		\n\t"
-        "movd  %%mm0, (%0)		\n\t"
-        "punpckhdq %%mm0, %%mm0		\n\t"
-        "movd  %%mm0, (%0,%4)		\n\t"
-        "lea (%0, %4, 2), %0		\n\t"
-        "movd  %%mm1, (%0)		\n\t"
-        "punpckhdq %%mm1, %%mm1		\n\t"
-        "movd  %%mm1, (%0,%4)		\n\t"
-        : "=&r" (dst_reg),
-          "=&r" (src_reg)
-        : "0"   (dst_reg),
-          "1"   (src_reg),
-          "r"   (dst_stride),
-          "r"   (src_stride)
+        "movd  %4, %%mm0                \n\t"
+        "movd  %5, %%mm1                \n\t"
+        "movd  %6, %%mm2                \n\t"
+        "movd  %7, %%mm3                \n\t"
+        "punpcklbw %%mm1, %%mm0         \n\t"
+        "punpcklbw %%mm3, %%mm2         \n\t"
+        "movq %%mm0, %%mm1              \n\t"
+        "punpcklwd %%mm2, %%mm0         \n\t"
+        "punpckhwd %%mm2, %%mm1         \n\t"
+        "movd  %%mm0, %0                \n\t"
+        "punpckhdq %%mm0, %%mm0         \n\t"
+        "movd  %%mm0, %1                \n\t"
+        "movd  %%mm1, %2                \n\t"
+        "punpckhdq %%mm1, %%mm1         \n\t"
+        "movd  %%mm1, %3                \n\t"
+
+        : "=m" (*(uint32_t*)(dst + 0*dst_stride)),
+          "=m" (*(uint32_t*)(dst + 1*dst_stride)),
+          "=m" (*(uint32_t*)(dst + 2*dst_stride)),
+          "=m" (*(uint32_t*)(dst + 3*dst_stride))
+        :  "m" (*(uint32_t*)(src + 0*src_stride)),
+           "m" (*(uint32_t*)(src + 1*src_stride)),
+           "m" (*(uint32_t*)(src + 2*src_stride)),
+           "m" (*(uint32_t*)(src + 3*src_stride))
     );
 }
 
@@ -650,14 +650,14 @@ static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
     const int strength= ff_h263_loop_filter_strength[qscale];
     uint64_t temp[4] __attribute__ ((aligned(8)));
     uint8_t *btemp= (uint8_t*)temp;
-    
+
     src -= 2;
 
     transpose4x4(btemp  , src           , 8, stride);
     transpose4x4(btemp+4, src + 4*stride, 8, stride);
     asm volatile(
         H263_LOOP_FILTER // 5 3 4 6
-        
+
         : "+m" (temp[0]),
           "+m" (temp[1]),
           "+m" (temp[2]),
@@ -666,30 +666,30 @@ static void h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale){
     );
 
     asm volatile(
-        "movq %%mm5, %%mm1		\n\t"
-        "movq %%mm4, %%mm0		\n\t"
-        "punpcklbw %%mm3, %%mm5		\n\t"
-        "punpcklbw %%mm6, %%mm4		\n\t"
-        "punpckhbw %%mm3, %%mm1		\n\t"
-        "punpckhbw %%mm6, %%mm0		\n\t"
-        "movq %%mm5, %%mm3		\n\t"
-        "movq %%mm1, %%mm6		\n\t"
-        "punpcklwd %%mm4, %%mm5		\n\t"
-        "punpcklwd %%mm0, %%mm1		\n\t"
-        "punpckhwd %%mm4, %%mm3		\n\t"
-        "punpckhwd %%mm0, %%mm6		\n\t"
-        "movd %%mm5, (%0)		\n\t"
-        "punpckhdq %%mm5, %%mm5		\n\t"
-        "movd %%mm5, (%0,%2)		\n\t"
-        "movd %%mm3, (%0,%2,2)		\n\t"
-        "punpckhdq %%mm3, %%mm3		\n\t"
-        "movd %%mm3, (%0,%3)		\n\t"
-        "movd %%mm1, (%1)		\n\t"
-        "punpckhdq %%mm1, %%mm1		\n\t"
-        "movd %%mm1, (%1,%2)		\n\t"
-        "movd %%mm6, (%1,%2,2)		\n\t"
-        "punpckhdq %%mm6, %%mm6		\n\t"
-        "movd %%mm6, (%1,%3)		\n\t"
+        "movq %%mm5, %%mm1              \n\t"
+        "movq %%mm4, %%mm0              \n\t"
+        "punpcklbw %%mm3, %%mm5         \n\t"
+        "punpcklbw %%mm6, %%mm4         \n\t"
+        "punpckhbw %%mm3, %%mm1         \n\t"
+        "punpckhbw %%mm6, %%mm0         \n\t"
+        "movq %%mm5, %%mm3              \n\t"
+        "movq %%mm1, %%mm6              \n\t"
+        "punpcklwd %%mm4, %%mm5         \n\t"
+        "punpcklwd %%mm0, %%mm1         \n\t"
+        "punpckhwd %%mm4, %%mm3         \n\t"
+        "punpckhwd %%mm0, %%mm6         \n\t"
+        "movd %%mm5, (%0)               \n\t"
+        "punpckhdq %%mm5, %%mm5         \n\t"
+        "movd %%mm5, (%0,%2)            \n\t"
+        "movd %%mm3, (%0,%2,2)          \n\t"
+        "punpckhdq %%mm3, %%mm3         \n\t"
+        "movd %%mm3, (%0,%3)            \n\t"
+        "movd %%mm1, (%1)               \n\t"
+        "punpckhdq %%mm1, %%mm1         \n\t"
+        "movd %%mm1, (%1,%2)            \n\t"
+        "movd %%mm6, (%1,%2,2)          \n\t"
+        "punpckhdq %%mm6, %%mm6         \n\t"
+        "movd %%mm6, (%1,%3)            \n\t"
         :: "r" (src),
            "r" (src + 4*stride),
            "r" ((long)   stride ),
@@ -705,26 +705,26 @@ static int pix_norm1_mmx(uint8_t *pix, int line_size) {
       "pxor %%mm0,%%mm0\n"
       "pxor %%mm7,%%mm7\n"
       "1:\n"
-      "movq (%0),%%mm2\n"	/* mm2 = pix[0-7] */
-      "movq 8(%0),%%mm3\n"	/* mm3 = pix[8-15] */
+      "movq (%0),%%mm2\n"       /* mm2 = pix[0-7] */
+      "movq 8(%0),%%mm3\n"      /* mm3 = pix[8-15] */
 
-      "movq %%mm2,%%mm1\n"	/* mm1 = mm2 = pix[0-7] */
+      "movq %%mm2,%%mm1\n"      /* mm1 = mm2 = pix[0-7] */
 
-      "punpckhbw %%mm0,%%mm1\n"	/* mm1 = [pix4-7] */
-      "punpcklbw %%mm0,%%mm2\n"	/* mm2 = [pix0-3] */
+      "punpckhbw %%mm0,%%mm1\n" /* mm1 = [pix4-7] */
+      "punpcklbw %%mm0,%%mm2\n" /* mm2 = [pix0-3] */
 
-      "movq %%mm3,%%mm4\n"	/* mm4 = mm3 = pix[8-15] */
-      "punpckhbw %%mm0,%%mm3\n"	/* mm3 = [pix12-15] */
-      "punpcklbw %%mm0,%%mm4\n"	/* mm4 = [pix8-11] */
+      "movq %%mm3,%%mm4\n"      /* mm4 = mm3 = pix[8-15] */
+      "punpckhbw %%mm0,%%mm3\n" /* mm3 = [pix12-15] */
+      "punpcklbw %%mm0,%%mm4\n" /* mm4 = [pix8-11] */
 
-      "pmaddwd %%mm1,%%mm1\n"	/* mm1 = (pix0^2+pix1^2,pix2^2+pix3^2) */
-      "pmaddwd %%mm2,%%mm2\n"	/* mm2 = (pix4^2+pix5^2,pix6^2+pix7^2) */
+      "pmaddwd %%mm1,%%mm1\n"   /* mm1 = (pix0^2+pix1^2,pix2^2+pix3^2) */
+      "pmaddwd %%mm2,%%mm2\n"   /* mm2 = (pix4^2+pix5^2,pix6^2+pix7^2) */
 
       "pmaddwd %%mm3,%%mm3\n"
       "pmaddwd %%mm4,%%mm4\n"
 
-      "paddd %%mm1,%%mm2\n"	/* mm2 = (pix0^2+pix1^2+pix4^2+pix5^2,
-					  pix2^2+pix3^2+pix6^2+pix7^2) */
+      "paddd %%mm1,%%mm2\n"     /* mm2 = (pix0^2+pix1^2+pix4^2+pix5^2,
+                                          pix2^2+pix3^2+pix6^2+pix7^2) */
       "paddd %%mm3,%%mm4\n"
       "paddd %%mm2,%%mm7\n"
 
@@ -734,7 +734,7 @@ static int pix_norm1_mmx(uint8_t *pix, int line_size) {
       "jnz 1b\n"
 
       "movq %%mm7,%%mm1\n"
-      "psrlq $32, %%mm7\n"	/* shift hi dword to lo */
+      "psrlq $32, %%mm7\n"      /* shift hi dword to lo */
       "paddd %%mm7,%%mm1\n"
       "movd %%mm1,%1\n"
       : "+r" (pix), "=r"(tmp) : "r" ((long)line_size) : "%ecx" );
@@ -746,13 +746,13 @@ static int sse8_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int
   asm volatile (
       "movl %4,%%ecx\n"
       "shr $1,%%ecx\n"
-      "pxor %%mm0,%%mm0\n"	/* mm0 = 0 */
-      "pxor %%mm7,%%mm7\n"	/* mm7 holds the sum */
+      "pxor %%mm0,%%mm0\n"      /* mm0 = 0 */
+      "pxor %%mm7,%%mm7\n"      /* mm7 holds the sum */
       "1:\n"
-      "movq (%0),%%mm1\n"	/* mm1 = pix1[0][0-7] */
-      "movq (%1),%%mm2\n"	/* mm2 = pix2[0][0-7] */
-      "movq (%0,%3),%%mm3\n"	/* mm3 = pix1[1][0-7] */
-      "movq (%1,%3),%%mm4\n"	/* mm4 = pix2[1][0-7] */
+      "movq (%0),%%mm1\n"       /* mm1 = pix1[0][0-7] */
+      "movq (%1),%%mm2\n"       /* mm2 = pix2[0][0-7] */
+      "movq (%0,%3),%%mm3\n"    /* mm3 = pix1[1][0-7] */
+      "movq (%1,%3),%%mm4\n"    /* mm4 = pix2[1][0-7] */
 
       /* todo: mm1-mm2, mm3-mm4 */
       /* algo: substract mm1 from mm2 with saturation and vice versa */
@@ -773,16 +773,16 @@ static int sse8_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int
 
       "punpckhbw %%mm0,%%mm2\n"
       "punpckhbw %%mm0,%%mm4\n"
-      "punpcklbw %%mm0,%%mm1\n"	/* mm1 now spread over (mm1,mm2) */
-      "punpcklbw %%mm0,%%mm3\n"	/* mm4 now spread over (mm3,mm4) */
+      "punpcklbw %%mm0,%%mm1\n" /* mm1 now spread over (mm1,mm2) */
+      "punpcklbw %%mm0,%%mm3\n" /* mm4 now spread over (mm3,mm4) */
 
       "pmaddwd %%mm2,%%mm2\n"
       "pmaddwd %%mm4,%%mm4\n"
       "pmaddwd %%mm1,%%mm1\n"
       "pmaddwd %%mm3,%%mm3\n"
 
-      "lea (%0,%3,2), %0\n"	/* pix1 += 2*line_size */
-      "lea (%1,%3,2), %1\n"	/* pix2 += 2*line_size */
+      "lea (%0,%3,2), %0\n"     /* pix1 += 2*line_size */
+      "lea (%1,%3,2), %1\n"     /* pix2 += 2*line_size */
 
       "paddd %%mm2,%%mm1\n"
       "paddd %%mm4,%%mm3\n"
@@ -793,10 +793,10 @@ static int sse8_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int
       "jnz 1b\n"
 
       "movq %%mm7,%%mm1\n"
-      "psrlq $32, %%mm7\n"	/* shift hi dword to lo */
+      "psrlq $32, %%mm7\n"      /* shift hi dword to lo */
       "paddd %%mm7,%%mm1\n"
       "movd %%mm1,%2\n"
-      : "+r" (pix1), "+r" (pix2), "=r"(tmp) 
+      : "+r" (pix1), "+r" (pix2), "=r"(tmp)
       : "r" ((long)line_size) , "m" (h)
       : "%ecx");
     return tmp;
@@ -806,13 +806,13 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int
     int tmp;
   asm volatile (
       "movl %4,%%ecx\n"
-      "pxor %%mm0,%%mm0\n"	/* mm0 = 0 */
-      "pxor %%mm7,%%mm7\n"	/* mm7 holds the sum */
+      "pxor %%mm0,%%mm0\n"      /* mm0 = 0 */
+      "pxor %%mm7,%%mm7\n"      /* mm7 holds the sum */
       "1:\n"
-      "movq (%0),%%mm1\n"	/* mm1 = pix1[0-7] */
-      "movq (%1),%%mm2\n"	/* mm2 = pix2[0-7] */
-      "movq 8(%0),%%mm3\n"	/* mm3 = pix1[8-15] */
-      "movq 8(%1),%%mm4\n"	/* mm4 = pix2[8-15] */
+      "movq (%0),%%mm1\n"       /* mm1 = pix1[0-7] */
+      "movq (%1),%%mm2\n"       /* mm2 = pix2[0-7] */
+      "movq 8(%0),%%mm3\n"      /* mm3 = pix1[8-15] */
+      "movq 8(%1),%%mm4\n"      /* mm4 = pix2[8-15] */
 
       /* todo: mm1-mm2, mm3-mm4 */
       /* algo: substract mm1 from mm2 with saturation and vice versa */
@@ -833,8 +833,8 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int
 
       "punpckhbw %%mm0,%%mm2\n"
       "punpckhbw %%mm0,%%mm4\n"
-      "punpcklbw %%mm0,%%mm1\n"	/* mm1 now spread over (mm1,mm2) */
-      "punpcklbw %%mm0,%%mm3\n"	/* mm4 now spread over (mm3,mm4) */
+      "punpcklbw %%mm0,%%mm1\n" /* mm1 now spread over (mm1,mm2) */
+      "punpcklbw %%mm0,%%mm3\n" /* mm4 now spread over (mm3,mm4) */
 
       "pmaddwd %%mm2,%%mm2\n"
       "pmaddwd %%mm4,%%mm4\n"
@@ -853,10 +853,10 @@ static int sse16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int
       "jnz 1b\n"
 
       "movq %%mm7,%%mm1\n"
-      "psrlq $32, %%mm7\n"	/* shift hi dword to lo */
+      "psrlq $32, %%mm7\n"      /* shift hi dword to lo */
       "paddd %%mm7,%%mm1\n"
       "movd %%mm1,%2\n"
-      : "+r" (pix1), "+r" (pix2), "=r"(tmp) 
+      : "+r" (pix1), "+r" (pix2), "=r"(tmp)
       : "r" ((long)line_size) , "m" (h)
       : "%ecx");
     return tmp;
@@ -866,13 +866,13 @@ static int sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
     int tmp;
   asm volatile (
       "shr $1,%2\n"
-      "pxor %%xmm0,%%xmm0\n"	/* mm0 = 0 */
-      "pxor %%xmm7,%%xmm7\n"	/* mm7 holds the sum */
+      "pxor %%xmm0,%%xmm0\n"    /* mm0 = 0 */
+      "pxor %%xmm7,%%xmm7\n"    /* mm7 holds the sum */
       "1:\n"
-      "movdqu (%0),%%xmm1\n"	/* mm1 = pix1[0][0-15] */
-      "movdqu (%1),%%xmm2\n"	/* mm2 = pix2[0][0-15] */
-      "movdqu (%0,%4),%%xmm3\n"	/* mm3 = pix1[1][0-15] */
-      "movdqu (%1,%4),%%xmm4\n"	/* mm4 = pix2[1][0-15] */
+      "movdqu (%0),%%xmm1\n"    /* mm1 = pix1[0][0-15] */
+      "movdqu (%1),%%xmm2\n"    /* mm2 = pix2[0][0-15] */
+      "movdqu (%0,%4),%%xmm3\n" /* mm3 = pix1[1][0-15] */
+      "movdqu (%1,%4),%%xmm4\n" /* mm4 = pix2[1][0-15] */
 
       /* todo: mm1-mm2, mm3-mm4 */
       /* algo: substract mm1 from mm2 with saturation and vice versa */
@@ -893,16 +893,16 @@ static int sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
 
       "punpckhbw %%xmm0,%%xmm2\n"
       "punpckhbw %%xmm0,%%xmm4\n"
-      "punpcklbw %%xmm0,%%xmm1\n"	/* mm1 now spread over (mm1,mm2) */
-      "punpcklbw %%xmm0,%%xmm3\n"	/* mm4 now spread over (mm3,mm4) */
+      "punpcklbw %%xmm0,%%xmm1\n"  /* mm1 now spread over (mm1,mm2) */
+      "punpcklbw %%xmm0,%%xmm3\n"  /* mm4 now spread over (mm3,mm4) */
 
       "pmaddwd %%xmm2,%%xmm2\n"
       "pmaddwd %%xmm4,%%xmm4\n"
       "pmaddwd %%xmm1,%%xmm1\n"
       "pmaddwd %%xmm3,%%xmm3\n"
 
-      "lea (%0,%4,2), %0\n"	/* pix1 += 2*line_size */
-      "lea (%1,%4,2), %1\n"	/* pix2 += 2*line_size */
+      "lea (%0,%4,2), %0\n"        /* pix1 += 2*line_size */
+      "lea (%1,%4,2), %1\n"        /* pix2 += 2*line_size */
 
       "paddd %%xmm2,%%xmm1\n"
       "paddd %%xmm4,%%xmm3\n"
@@ -913,13 +913,13 @@ static int sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
       "jnz 1b\n"
 
       "movdqa %%xmm7,%%xmm1\n"
-      "psrldq $8, %%xmm7\n"	/* shift hi qword to lo */
+      "psrldq $8, %%xmm7\n"        /* shift hi qword to lo */
       "paddd %%xmm1,%%xmm7\n"
       "movdqa %%xmm7,%%xmm1\n"
-      "psrldq $4, %%xmm7\n"	/* shift hi dword to lo */
+      "psrldq $4, %%xmm7\n"        /* shift hi dword to lo */
       "paddd %%xmm1,%%xmm7\n"
       "movd %%xmm7,%3\n"
-      : "+r" (pix1), "+r" (pix2), "+r"(h), "=r"(tmp) 
+      : "+r" (pix1), "+r" (pix2), "+r"(h), "=r"(tmp)
       : "r" ((long)line_size));
     return tmp;
 }
@@ -930,7 +930,7 @@ static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) {
       "movl %3,%%ecx\n"
       "pxor %%mm7,%%mm7\n"
       "pxor %%mm6,%%mm6\n"
-      
+
       "movq (%0),%%mm0\n"
       "movq %%mm0, %%mm1\n"
       "psllq $8, %%mm0\n"
@@ -944,9 +944,9 @@ static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) {
       "punpckhbw %%mm7,%%mm3\n"
       "psubw %%mm1, %%mm0\n"
       "psubw %%mm3, %%mm2\n"
-      
+
       "add %2,%0\n"
-      
+
       "movq (%0),%%mm4\n"
       "movq %%mm4, %%mm1\n"
       "psllq $8, %%mm4\n"
@@ -968,14 +968,14 @@ static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) {
       "pcmpgtw %%mm2, %%mm1\n\t"
       "pxor %%mm3, %%mm0\n"
       "pxor %%mm1, %%mm2\n"
-      "psubw %%mm3, %%mm0\n" 
+      "psubw %%mm3, %%mm0\n"
       "psubw %%mm1, %%mm2\n"
       "paddw %%mm0, %%mm2\n"
       "paddw %%mm2, %%mm6\n"
 
       "add %2,%0\n"
       "1:\n"
-  
+
       "movq (%0),%%mm0\n"
       "movq %%mm0, %%mm1\n"
       "psllq $8, %%mm0\n"
@@ -997,13 +997,13 @@ static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) {
       "pcmpgtw %%mm5, %%mm1\n\t"
       "pxor %%mm3, %%mm4\n"
       "pxor %%mm1, %%mm5\n"
-      "psubw %%mm3, %%mm4\n" 
+      "psubw %%mm3, %%mm4\n"
       "psubw %%mm1, %%mm5\n"
       "paddw %%mm4, %%mm5\n"
       "paddw %%mm5, %%mm6\n"
-      
+
       "add %2,%0\n"
-      
+
       "movq (%0),%%mm4\n"
       "movq %%mm4, %%mm1\n"
       "psllq $8, %%mm4\n"
@@ -1025,7 +1025,7 @@ static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) {
       "pcmpgtw %%mm2, %%mm1\n\t"
       "pxor %%mm3, %%mm0\n"
       "pxor %%mm1, %%mm2\n"
-      "psubw %%mm3, %%mm0\n" 
+      "psubw %%mm3, %%mm0\n"
       "psubw %%mm1, %%mm2\n"
       "paddw %%mm0, %%mm2\n"
       "paddw %%mm2, %%mm6\n"
@@ -1038,12 +1038,12 @@ static int hf_noise8_mmx(uint8_t * pix1, int line_size, int h) {
       "punpcklwd %%mm7,%%mm0\n"
       "punpckhwd %%mm7,%%mm6\n"
       "paddd %%mm0, %%mm6\n"
-      
+
       "movq %%mm6,%%mm0\n"
       "psrlq $32, %%mm6\n"
       "paddd %%mm6,%%mm0\n"
       "movd %%mm0,%1\n"
-      : "+r" (pix1), "=r"(tmp) 
+      : "+r" (pix1), "=r"(tmp)
       : "r" ((long)line_size) , "g" (h-2)
       : "%ecx");
       return tmp;
@@ -1056,7 +1056,7 @@ static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) {
       "movl %3,%%ecx\n"
       "pxor %%mm7,%%mm7\n"
       "pxor %%mm6,%%mm6\n"
-      
+
       "movq (%0),%%mm0\n"
       "movq 1(%0),%%mm1\n"
       "movq %%mm0, %%mm2\n"
@@ -1067,9 +1067,9 @@ static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) {
       "punpckhbw %%mm7,%%mm3\n"
       "psubw %%mm1, %%mm0\n"
       "psubw %%mm3, %%mm2\n"
-      
+
       "add %2,%0\n"
-      
+
       "movq (%0),%%mm4\n"
       "movq 1(%0),%%mm1\n"
       "movq %%mm4, %%mm5\n"
@@ -1088,14 +1088,14 @@ static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) {
       "pcmpgtw %%mm2, %%mm1\n\t"
       "pxor %%mm3, %%mm0\n"
       "pxor %%mm1, %%mm2\n"
-      "psubw %%mm3, %%mm0\n" 
+      "psubw %%mm3, %%mm0\n"
       "psubw %%mm1, %%mm2\n"
       "paddw %%mm0, %%mm2\n"
       "paddw %%mm2, %%mm6\n"
 
       "add %2,%0\n"
       "1:\n"
-  
+
       "movq (%0),%%mm0\n"
       "movq 1(%0),%%mm1\n"
       "movq %%mm0, %%mm2\n"
@@ -1118,9 +1118,9 @@ static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) {
       "psubw %%mm1, %%mm5\n"
       "paddw %%mm4, %%mm5\n"
       "paddw %%mm5, %%mm6\n"
-      
+
       "add %2,%0\n"
-      
+
       "movq (%0),%%mm4\n"
       "movq 1(%0),%%mm1\n"
       "movq %%mm4, %%mm5\n"
@@ -1139,7 +1139,7 @@ static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) {
       "pcmpgtw %%mm2, %%mm1\n\t"
       "pxor %%mm3, %%mm0\n"
       "pxor %%mm1, %%mm2\n"
-      "psubw %%mm3, %%mm0\n" 
+      "psubw %%mm3, %%mm0\n"
       "psubw %%mm1, %%mm2\n"
       "paddw %%mm0, %%mm2\n"
       "paddw %%mm2, %%mm6\n"
@@ -1152,12 +1152,12 @@ static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) {
       "punpcklwd %%mm7,%%mm0\n"
       "punpckhwd %%mm7,%%mm6\n"
       "paddd %%mm0, %%mm6\n"
-      
+
       "movq %%mm6,%%mm0\n"
       "psrlq $32, %%mm6\n"
       "paddd %%mm6,%%mm0\n"
       "movd %%mm0,%1\n"
-      : "+r" (pix1), "=r"(tmp) 
+      : "+r" (pix1), "=r"(tmp)
       : "r" ((long)line_size) , "g" (h-2)
       : "%ecx");
       return tmp + hf_noise8_mmx(pix+8, line_size, h);
@@ -1165,8 +1165,11 @@ static int hf_noise16_mmx(uint8_t * pix1, int line_size, int h) {
 
 static int nsse16_mmx(void *p, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
     MpegEncContext *c = p;
-    int score1= sse16_mmx(c, pix1, pix2, line_size, h);
-    int score2= hf_noise16_mmx(pix1, line_size, h) - hf_noise16_mmx(pix2, line_size, h);
+    int score1, score2;
+
+    if(c) score1 = c->dsp.sse[0](c, pix1, pix2, line_size, h);
+    else  score1 = sse16_mmx(c, pix1, pix2, line_size, h);
+    score2= hf_noise16_mmx(pix1, line_size, h) - hf_noise16_mmx(pix2, line_size, h);
 
     if(c) return score1 + ABS(score2)*c->avctx->nsse_weight;
     else  return score1 + ABS(score2)*8;
@@ -1183,10 +1186,10 @@ static int nsse8_mmx(void *p, uint8_t * pix1, uint8_t * pix2, int line_size, int
 
 static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) {
     int tmp;
-    
+
     assert( (((int)pix) & 7) == 0);
     assert((line_size &7) ==0);
-    
+
 #define SUM(in0, in1, out0, out1) \
       "movq (%0), %%mm2\n"\
       "movq 8(%0), %%mm3\n"\
@@ -1210,7 +1213,7 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si
       "paddw %%mm2, " #in0 "\n"\
       "paddw " #in0 ", %%mm6\n"
 
-    
+
   asm volatile (
       "movl %3,%%ecx\n"
       "pxor %%mm6,%%mm6\n"
@@ -1221,11 +1224,11 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si
       "subl $2, %%ecx\n"
       SUM(%%mm0, %%mm1, %%mm4, %%mm5)
       "1:\n"
-      
+
       SUM(%%mm4, %%mm5, %%mm0, %%mm1)
-      
+
       SUM(%%mm0, %%mm1, %%mm4, %%mm5)
-      
+
       "subl $2, %%ecx\n"
       "jnz 1b\n"
 
@@ -1236,7 +1239,7 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si
       "psrlq $16, %%mm0\n"
       "paddw %%mm6,%%mm0\n"
       "movd %%mm0,%1\n"
-      : "+r" (pix), "=r"(tmp) 
+      : "+r" (pix), "=r"(tmp)
       : "r" ((long)line_size) , "m" (h)
       : "%ecx");
     return tmp & 0xFFFF;
@@ -1245,10 +1248,10 @@ static int vsad_intra16_mmx(void *v, uint8_t * pix, uint8_t * dummy, int line_si
 
 static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_size, int h) {
     int tmp;
-    
+
     assert( (((int)pix) & 7) == 0);
     assert((line_size &7) ==0);
-    
+
 #define SUM(in0, in1, out0, out1) \
       "movq (%0), " #out0 "\n"\
       "movq 8(%0), " #out1 "\n"\
@@ -1268,16 +1271,16 @@ static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_s
       "subl $2, %%ecx\n"
       SUM(%%mm0, %%mm1, %%mm4, %%mm5)
       "1:\n"
-      
+
       SUM(%%mm4, %%mm5, %%mm0, %%mm1)
-      
+
       SUM(%%mm0, %%mm1, %%mm4, %%mm5)
-      
+
       "subl $2, %%ecx\n"
       "jnz 1b\n"
 
       "movd %%mm6,%1\n"
-      : "+r" (pix), "=r"(tmp) 
+      : "+r" (pix), "=r"(tmp)
       : "r" ((long)line_size) , "m" (h)
       : "%ecx");
     return tmp;
@@ -1286,11 +1289,11 @@ static int vsad_intra16_mmx2(void *v, uint8_t * pix, uint8_t * dummy, int line_s
 
 static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
     int tmp;
-    
+
     assert( (((int)pix1) & 7) == 0);
     assert( (((int)pix2) & 7) == 0);
     assert((line_size &7) ==0);
-    
+
 #define SUM(in0, in1, out0, out1) \
       "movq (%0),%%mm2\n"\
       "movq (%1)," #out0 "\n"\
@@ -1321,7 +1324,7 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
       "paddw %%mm2, " #in0 "\n"\
       "paddw " #in0 ", %%mm6\n"
 
-    
+
   asm volatile (
       "movl %4,%%ecx\n"
       "pxor %%mm6,%%mm6\n"
@@ -1341,11 +1344,11 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
       "pxor %%mm7, %%mm1\n"
       SUM(%%mm0, %%mm1, %%mm4, %%mm5)
       "1:\n"
-      
+
       SUM(%%mm4, %%mm5, %%mm0, %%mm1)
-      
+
       SUM(%%mm0, %%mm1, %%mm4, %%mm5)
-      
+
       "subl $2, %%ecx\n"
       "jnz 1b\n"
 
@@ -1356,7 +1359,7 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
       "psrlq $16, %%mm0\n"
       "paddw %%mm6,%%mm0\n"
       "movd %%mm0,%2\n"
-      : "+r" (pix1), "+r" (pix2), "=r"(tmp) 
+      : "+r" (pix1), "+r" (pix2), "=r"(tmp)
       : "r" ((long)line_size) , "m" (h)
       : "%ecx");
     return tmp & 0x7FFF;
@@ -1365,11 +1368,11 @@ static int vsad16_mmx(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, in
 
 static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) {
     int tmp;
-    
+
     assert( (((int)pix1) & 7) == 0);
     assert( (((int)pix2) & 7) == 0);
     assert((line_size &7) ==0);
-    
+
 #define SUM(in0, in1, out0, out1) \
       "movq (%0)," #out0 "\n"\
       "movq (%1),%%mm2\n"\
@@ -1405,16 +1408,16 @@ static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, i
       "pxor %%mm7, %%mm1\n"
       SUM(%%mm0, %%mm1, %%mm4, %%mm5)
       "1:\n"
-      
+
       SUM(%%mm4, %%mm5, %%mm0, %%mm1)
-      
+
       SUM(%%mm0, %%mm1, %%mm4, %%mm5)
-      
+
       "subl $2, %%ecx\n"
       "jnz 1b\n"
 
       "movd %%mm6,%2\n"
-      : "+r" (pix1), "+r" (pix2), "=r"(tmp) 
+      : "+r" (pix1), "+r" (pix2), "=r"(tmp)
       : "r" ((long)line_size) , "m" (h)
       : "%ecx");
     return tmp;
@@ -1424,18 +1427,18 @@ static int vsad16_mmx2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, i
 static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
     long i=0;
     asm volatile(
-        "1:				\n\t"
-        "movq  (%2, %0), %%mm0		\n\t"
-        "movq  (%1, %0), %%mm1		\n\t"
-        "psubb %%mm0, %%mm1		\n\t"
-        "movq %%mm1, (%3, %0)		\n\t"
-        "movq 8(%2, %0), %%mm0		\n\t"
-        "movq 8(%1, %0), %%mm1		\n\t"
-        "psubb %%mm0, %%mm1		\n\t"
-        "movq %%mm1, 8(%3, %0)		\n\t"
-        "add $16, %0			\n\t"
-        "cmp %4, %0			\n\t"
-        " jb 1b				\n\t"
+        "1:                             \n\t"
+        "movq  (%2, %0), %%mm0          \n\t"
+        "movq  (%1, %0), %%mm1          \n\t"
+        "psubb %%mm0, %%mm1             \n\t"
+        "movq %%mm1, (%3, %0)           \n\t"
+        "movq 8(%2, %0), %%mm0          \n\t"
+        "movq 8(%1, %0), %%mm1          \n\t"
+        "psubb %%mm0, %%mm1             \n\t"
+        "movq %%mm1, 8(%3, %0)          \n\t"
+        "add $16, %0                    \n\t"
+        "cmp %4, %0                     \n\t"
+        " jb 1b                         \n\t"
         : "+r" (i)
         : "r"(src1), "r"(src2), "r"(dst), "r"((long)w-15)
     );
@@ -1446,46 +1449,46 @@ static void diff_bytes_mmx(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w){
 static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w, int *left, int *left_top){
     long i=0;
     uint8_t l, lt;
-    
+
     asm volatile(
-        "1:				\n\t"
-        "movq  -1(%1, %0), %%mm0	\n\t" // LT
-        "movq  (%1, %0), %%mm1		\n\t" // T
-        "movq  -1(%2, %0), %%mm2	\n\t" // L
-        "movq  (%2, %0), %%mm3		\n\t" // X
-        "movq %%mm2, %%mm4		\n\t" // L
-        "psubb %%mm0, %%mm2		\n\t"
-        "paddb %%mm1, %%mm2		\n\t" // L + T - LT
-        "movq %%mm4, %%mm5		\n\t" // L
-        "pmaxub %%mm1, %%mm4		\n\t" // max(T, L)
-        "pminub %%mm5, %%mm1		\n\t" // min(T, L)
-        "pminub %%mm2, %%mm4		\n\t" 
-        "pmaxub %%mm1, %%mm4		\n\t"
-        "psubb %%mm4, %%mm3		\n\t" // dst - pred
-        "movq %%mm3, (%3, %0)		\n\t"
-        "add $8, %0			\n\t"
-        "cmp %4, %0			\n\t"
-        " jb 1b				\n\t"
+        "1:                             \n\t"
+        "movq  -1(%1, %0), %%mm0        \n\t" // LT
+        "movq  (%1, %0), %%mm1          \n\t" // T
+        "movq  -1(%2, %0), %%mm2        \n\t" // L
+        "movq  (%2, %0), %%mm3          \n\t" // X
+        "movq %%mm2, %%mm4              \n\t" // L
+        "psubb %%mm0, %%mm2             \n\t"
+        "paddb %%mm1, %%mm2             \n\t" // L + T - LT
+        "movq %%mm4, %%mm5              \n\t" // L
+        "pmaxub %%mm1, %%mm4            \n\t" // max(T, L)
+        "pminub %%mm5, %%mm1            \n\t" // min(T, L)
+        "pminub %%mm2, %%mm4            \n\t"
+        "pmaxub %%mm1, %%mm4            \n\t"
+        "psubb %%mm4, %%mm3             \n\t" // dst - pred
+        "movq %%mm3, (%3, %0)           \n\t"
+        "add $8, %0                     \n\t"
+        "cmp %4, %0                     \n\t"
+        " jb 1b                         \n\t"
         : "+r" (i)
         : "r"(src1), "r"(src2), "r"(dst), "r"((long)w)
     );
 
     l= *left;
     lt= *left_top;
-    
+
     dst[0]= src2[0] - mid_pred(l, src1[0], (l + src1[0] - lt)&0xFF);
-    
+
     *left_top= src1[w-1];
     *left    = src2[w-1];
 }
 
 #define LBUTTERFLY2(a1,b1,a2,b2)\
-    "paddw " #b1 ", " #a1 "		\n\t"\
-    "paddw " #b2 ", " #a2 "		\n\t"\
-    "paddw " #b1 ", " #b1 "		\n\t"\
-    "paddw " #b2 ", " #b2 "		\n\t"\
-    "psubw " #a1 ", " #b1 "		\n\t"\
-    "psubw " #a2 ", " #b2 "		\n\t"
+    "paddw " #b1 ", " #a1 "           \n\t"\
+    "paddw " #b2 ", " #a2 "           \n\t"\
+    "paddw " #b1 ", " #b1 "           \n\t"\
+    "paddw " #b2 ", " #b2 "           \n\t"\
+    "psubw " #a1 ", " #b1 "           \n\t"\
+    "psubw " #a2 ", " #b2 "           \n\t"
 
 #define HADAMARD48\
         LBUTTERFLY2(%%mm0, %%mm1, %%mm2, %%mm3)\
@@ -1496,33 +1499,33 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t
         LBUTTERFLY2(%%mm2, %%mm6, %%mm3, %%mm7)\
 
 #define MMABS(a,z)\
-    "pxor " #z ", " #z "		\n\t"\
-    "pcmpgtw " #a ", " #z "		\n\t"\
-    "pxor " #z ", " #a "		\n\t"\
-    "psubw " #z ", " #a "		\n\t"
+    "pxor " #z ", " #z "              \n\t"\
+    "pcmpgtw " #a ", " #z "           \n\t"\
+    "pxor " #z ", " #a "              \n\t"\
+    "psubw " #z ", " #a "             \n\t"
 
 #define MMABS_SUM(a,z, sum)\
-    "pxor " #z ", " #z "		\n\t"\
-    "pcmpgtw " #a ", " #z "		\n\t"\
-    "pxor " #z ", " #a "		\n\t"\
-    "psubw " #z ", " #a "		\n\t"\
-    "paddusw " #a ", " #sum "		\n\t"
+    "pxor " #z ", " #z "              \n\t"\
+    "pcmpgtw " #a ", " #z "           \n\t"\
+    "pxor " #z ", " #a "              \n\t"\
+    "psubw " #z ", " #a "             \n\t"\
+    "paddusw " #a ", " #sum "         \n\t"
 
 #define MMABS_MMX2(a,z)\
-    "pxor " #z ", " #z "		\n\t"\
-    "psubw " #a ", " #z "		\n\t"\
-    "pmaxsw " #z ", " #a "		\n\t"
+    "pxor " #z ", " #z "              \n\t"\
+    "psubw " #a ", " #z "             \n\t"\
+    "pmaxsw " #z ", " #a "            \n\t"
 
 #define MMABS_SUM_MMX2(a,z, sum)\
-    "pxor " #z ", " #z "		\n\t"\
-    "psubw " #a ", " #z "		\n\t"\
-    "pmaxsw " #z ", " #a "		\n\t"\
-    "paddusw " #a ", " #sum "		\n\t"
-        
+    "pxor " #z ", " #z "              \n\t"\
+    "psubw " #a ", " #z "             \n\t"\
+    "pmaxsw " #z ", " #a "            \n\t"\
+    "paddusw " #a ", " #sum "         \n\t"
+
 #define SBUTTERFLY(a,b,t,n)\
-    "movq " #a ", " #t "		\n\t" /* abcd */\
-    "punpckl" #n " " #b ", " #a "	\n\t" /* aebf */\
-    "punpckh" #n " " #b ", " #t "	\n\t" /* cgdh */\
+    "movq " #a ", " #t "              \n\t" /* abcd */\
+    "punpckl" #n " " #b ", " #a "     \n\t" /* aebf */\
+    "punpckh" #n " " #b ", " #t "     \n\t" /* cgdh */\
 
 #define TRANSPOSE4(a,b,c,d,t)\
     SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\
@@ -1531,21 +1534,21 @@ static void sub_hfyu_median_prediction_mmx2(uint8_t *dst, uint8_t *src1, uint8_t
     SBUTTERFLY(t,b,c,dq) /* t=cgko c=dhlp */
 
 #define LOAD4(o, a, b, c, d)\
-        "movq "#o"(%1), " #a "		\n\t"\
-        "movq "#o"+16(%1), " #b "	\n\t"\
-        "movq "#o"+32(%1), " #c "	\n\t"\
-        "movq "#o"+48(%1), " #d "	\n\t"
+        "movq "#o"(%1), " #a "        \n\t"\
+        "movq "#o"+16(%1), " #b "     \n\t"\
+        "movq "#o"+32(%1), " #c "     \n\t"\
+        "movq "#o"+48(%1), " #d "     \n\t"
 
 #define STORE4(o, a, b, c, d)\
-        "movq "#a", "#o"(%1)		\n\t"\
-        "movq "#b", "#o"+16(%1)		\n\t"\
-        "movq "#c", "#o"+32(%1)		\n\t"\
-        "movq "#d", "#o"+48(%1)		\n\t"\
+        "movq "#a", "#o"(%1)          \n\t"\
+        "movq "#b", "#o"+16(%1)       \n\t"\
+        "movq "#c", "#o"+32(%1)       \n\t"\
+        "movq "#d", "#o"+48(%1)       \n\t"\
 
 static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){
-    uint64_t temp[16] __align8;
+    DECLARE_ALIGNED_8(uint64_t, temp[16]);
     int sum=0;
-    
+
     assert(h==8);
 
     diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride);
@@ -1553,40 +1556,40 @@ static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride,
     asm volatile(
         LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
         LOAD4(64, %%mm4, %%mm5, %%mm6, %%mm7)
-        
+
         HADAMARD48
-        
-        "movq %%mm7, 112(%1)		\n\t"
-        
+
+        "movq %%mm7, 112(%1)            \n\t"
+
         TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
         STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2)
-        
-        "movq 112(%1), %%mm7 		\n\t"
+
+        "movq 112(%1), %%mm7            \n\t"
         TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
         STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6)
 
         LOAD4(8 , %%mm0, %%mm1, %%mm2, %%mm3)
         LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
-        
+
         HADAMARD48
-        
-        "movq %%mm7, 120(%1)		\n\t"
-        
+
+        "movq %%mm7, 120(%1)            \n\t"
+
         TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
         STORE4(8 , %%mm0, %%mm3, %%mm7, %%mm2)
-        
-        "movq 120(%1), %%mm7 		\n\t"
+
+        "movq 120(%1), %%mm7            \n\t"
         TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
-        "movq %%mm7, %%mm5		\n\t"//FIXME remove
-        "movq %%mm6, %%mm7		\n\t"
-        "movq %%mm0, %%mm6		\n\t"
+        "movq %%mm7, %%mm5              \n\t"//FIXME remove
+        "movq %%mm6, %%mm7              \n\t"
+        "movq %%mm0, %%mm6              \n\t"
 //        STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove
-        
+
         LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3)
 //        LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
-        
+
         HADAMARD48
-        "movq %%mm7, 64(%1)		\n\t"
+        "movq %%mm7, 64(%1)             \n\t"
         MMABS(%%mm0, %%mm7)
         MMABS_SUM(%%mm1, %%mm7, %%mm0)
         MMABS_SUM(%%mm2, %%mm7, %%mm0)
@@ -1594,15 +1597,15 @@ static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride,
         MMABS_SUM(%%mm4, %%mm7, %%mm0)
         MMABS_SUM(%%mm5, %%mm7, %%mm0)
         MMABS_SUM(%%mm6, %%mm7, %%mm0)
-        "movq 64(%1), %%mm1		\n\t"
+        "movq 64(%1), %%mm1             \n\t"
         MMABS_SUM(%%mm1, %%mm7, %%mm0)
-        "movq %%mm0, 64(%1)		\n\t"
-        
+        "movq %%mm0, 64(%1)             \n\t"
+
         LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
         LOAD4(8 , %%mm4, %%mm5, %%mm6, %%mm7)
-        
+
         HADAMARD48
-        "movq %%mm7, (%1)		\n\t"
+        "movq %%mm7, (%1)               \n\t"
         MMABS(%%mm0, %%mm7)
         MMABS_SUM(%%mm1, %%mm7, %%mm0)
         MMABS_SUM(%%mm2, %%mm7, %%mm0)
@@ -1610,19 +1613,19 @@ static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride,
         MMABS_SUM(%%mm4, %%mm7, %%mm0)
         MMABS_SUM(%%mm5, %%mm7, %%mm0)
         MMABS_SUM(%%mm6, %%mm7, %%mm0)
-        "movq (%1), %%mm1		\n\t"
+        "movq (%1), %%mm1               \n\t"
         MMABS_SUM(%%mm1, %%mm7, %%mm0)
-        "movq 64(%1), %%mm1		\n\t"
+        "movq 64(%1), %%mm1             \n\t"
         MMABS_SUM(%%mm1, %%mm7, %%mm0)
-        
-        "movq %%mm0, %%mm1		\n\t"
-        "psrlq $32, %%mm0		\n\t"
-        "paddusw %%mm1, %%mm0		\n\t"
-        "movq %%mm0, %%mm1		\n\t"
-        "psrlq $16, %%mm0		\n\t"
-        "paddusw %%mm1, %%mm0		\n\t"
-        "movd %%mm0, %0			\n\t"
-                
+
+        "movq %%mm0, %%mm1              \n\t"
+        "psrlq $32, %%mm0               \n\t"
+        "paddusw %%mm1, %%mm0           \n\t"
+        "movq %%mm0, %%mm1              \n\t"
+        "psrlq $16, %%mm0               \n\t"
+        "paddusw %%mm1, %%mm0           \n\t"
+        "movd %%mm0, %0                 \n\t"
+
         : "=r" (sum)
         : "r"(temp)
     );
@@ -1630,9 +1633,9 @@ static int hadamard8_diff_mmx(void *s, uint8_t *src1, uint8_t *src2, int stride,
 }
 
 static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride, int h){
-    uint64_t temp[16] __align8;
+    DECLARE_ALIGNED_8(uint64_t, temp[16]);
     int sum=0;
-    
+
     assert(h==8);
 
     diff_pixels_mmx((DCTELEM*)temp, src1, src2, stride);
@@ -1640,40 +1643,40 @@ static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride
     asm volatile(
         LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
         LOAD4(64, %%mm4, %%mm5, %%mm6, %%mm7)
-        
+
         HADAMARD48
-        
-        "movq %%mm7, 112(%1)		\n\t"
-        
+
+        "movq %%mm7, 112(%1)            \n\t"
+
         TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
         STORE4(0 , %%mm0, %%mm3, %%mm7, %%mm2)
-        
-        "movq 112(%1), %%mm7 		\n\t"
+
+        "movq 112(%1), %%mm7            \n\t"
         TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
         STORE4(64, %%mm4, %%mm7, %%mm0, %%mm6)
 
         LOAD4(8 , %%mm0, %%mm1, %%mm2, %%mm3)
         LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
-        
+
         HADAMARD48
-        
-        "movq %%mm7, 120(%1)		\n\t"
-        
+
+        "movq %%mm7, 120(%1)            \n\t"
+
         TRANSPOSE4(%%mm0, %%mm1, %%mm2, %%mm3, %%mm7)
         STORE4(8 , %%mm0, %%mm3, %%mm7, %%mm2)
-        
-        "movq 120(%1), %%mm7 		\n\t"
+
+        "movq 120(%1), %%mm7            \n\t"
         TRANSPOSE4(%%mm4, %%mm5, %%mm6, %%mm7, %%mm0)
-        "movq %%mm7, %%mm5		\n\t"//FIXME remove
-        "movq %%mm6, %%mm7		\n\t"
-        "movq %%mm0, %%mm6		\n\t"
+        "movq %%mm7, %%mm5              \n\t"//FIXME remove
+        "movq %%mm6, %%mm7              \n\t"
+        "movq %%mm0, %%mm6              \n\t"
 //        STORE4(72, %%mm4, %%mm7, %%mm0, %%mm6) //FIXME remove
-        
+
         LOAD4(64, %%mm0, %%mm1, %%mm2, %%mm3)
 //        LOAD4(72, %%mm4, %%mm5, %%mm6, %%mm7)
-        
+
         HADAMARD48
-        "movq %%mm7, 64(%1)		\n\t"
+        "movq %%mm7, 64(%1)             \n\t"
         MMABS_MMX2(%%mm0, %%mm7)
         MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
         MMABS_SUM_MMX2(%%mm2, %%mm7, %%mm0)
@@ -1681,15 +1684,15 @@ static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride
         MMABS_SUM_MMX2(%%mm4, %%mm7, %%mm0)
         MMABS_SUM_MMX2(%%mm5, %%mm7, %%mm0)
         MMABS_SUM_MMX2(%%mm6, %%mm7, %%mm0)
-        "movq 64(%1), %%mm1		\n\t"
+        "movq 64(%1), %%mm1             \n\t"
         MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
-        "movq %%mm0, 64(%1)		\n\t"
-        
+        "movq %%mm0, 64(%1)             \n\t"
+
         LOAD4(0 , %%mm0, %%mm1, %%mm2, %%mm3)
         LOAD4(8 , %%mm4, %%mm5, %%mm6, %%mm7)
-        
+
         HADAMARD48
-        "movq %%mm7, (%1)		\n\t"
+        "movq %%mm7, (%1)               \n\t"
         MMABS_MMX2(%%mm0, %%mm7)
         MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
         MMABS_SUM_MMX2(%%mm2, %%mm7, %%mm0)
@@ -1697,17 +1700,17 @@ static int hadamard8_diff_mmx2(void *s, uint8_t *src1, uint8_t *src2, int stride
         MMABS_SUM_MMX2(%%mm4, %%mm7, %%mm0)
         MMABS_SUM_MMX2(%%mm5, %%mm7, %%mm0)
         MMABS_SUM_MMX2(%%mm6, %%mm7, %%mm0)
-        "movq (%1), %%mm1		\n\t"
+        "movq (%1), %%mm1               \n\t"
         MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
-        "movq 64(%1), %%mm1		\n\t"
+        "movq 64(%1), %%mm1             \n\t"
         MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
-        
+
         "pshufw $0x0E, %%mm0, %%mm1     \n\t"
-        "paddusw %%mm1, %%mm0		\n\t"
+        "paddusw %%mm1, %%mm0           \n\t"
         "pshufw $0x01, %%mm0, %%mm1     \n\t"
-        "paddusw %%mm1, %%mm0		\n\t"
-        "movd %%mm0, %0			\n\t"
-                
+        "paddusw %%mm1, %%mm0           \n\t"
+        "movd %%mm0, %0                 \n\t"
+
         : "=r" (sum)
         : "r"(temp)
     );
@@ -1723,24 +1726,24 @@ WARPER8_16_SQ(hadamard8_diff_mmx2, hadamard8_diff16_mmx2)
 #define put_no_rnd_pixels16_mmx(a,b,c,d) put_pixels16_mmx(a,b,c,d)
 
 #define QPEL_V_LOW(m3,m4,m5,m6, pw_20, pw_3, rnd, in0, in1, in2, in7, out, OP)\
-        "paddw " #m4 ", " #m3 "		\n\t" /* x1 */\
-        "movq "MANGLE(ff_pw_20)", %%mm4		\n\t" /* 20 */\
-        "pmullw " #m3 ", %%mm4		\n\t" /* 20x1 */\
-        "movq "#in7", " #m3 "		\n\t" /* d */\
-        "movq "#in0", %%mm5		\n\t" /* D */\
-        "paddw " #m3 ", %%mm5		\n\t" /* x4 */\
-        "psubw %%mm5, %%mm4		\n\t" /* 20x1 - x4 */\
-        "movq "#in1", %%mm5		\n\t" /* C */\
-        "movq "#in2", %%mm6		\n\t" /* B */\
-        "paddw " #m6 ", %%mm5		\n\t" /* x3 */\
-        "paddw " #m5 ", %%mm6		\n\t" /* x2 */\
-        "paddw %%mm6, %%mm6		\n\t" /* 2x2 */\
-        "psubw %%mm6, %%mm5		\n\t" /* -2x2 + x3 */\
-        "pmullw "MANGLE(ff_pw_3)", %%mm5	\n\t" /* -6x2 + 3x3 */\
-        "paddw " #rnd ", %%mm4		\n\t" /* x2 */\
-        "paddw %%mm4, %%mm5		\n\t" /* 20x1 - 6x2 + 3x3 - x4 */\
-        "psraw $5, %%mm5		\n\t"\
-        "packuswb %%mm5, %%mm5		\n\t"\
+        "paddw " #m4 ", " #m3 "           \n\t" /* x1 */\
+        "movq "MANGLE(ff_pw_20)", %%mm4   \n\t" /* 20 */\
+        "pmullw " #m3 ", %%mm4            \n\t" /* 20x1 */\
+        "movq "#in7", " #m3 "             \n\t" /* d */\
+        "movq "#in0", %%mm5               \n\t" /* D */\
+        "paddw " #m3 ", %%mm5             \n\t" /* x4 */\
+        "psubw %%mm5, %%mm4               \n\t" /* 20x1 - x4 */\
+        "movq "#in1", %%mm5               \n\t" /* C */\
+        "movq "#in2", %%mm6               \n\t" /* B */\
+        "paddw " #m6 ", %%mm5             \n\t" /* x3 */\
+        "paddw " #m5 ", %%mm6             \n\t" /* x2 */\
+        "paddw %%mm6, %%mm6               \n\t" /* 2x2 */\
+        "psubw %%mm6, %%mm5               \n\t" /* -2x2 + x3 */\
+        "pmullw "MANGLE(ff_pw_3)", %%mm5  \n\t" /* -6x2 + 3x3 */\
+        "paddw " #rnd ", %%mm4            \n\t" /* x2 */\
+        "paddw %%mm4, %%mm5               \n\t" /* 20x1 - 6x2 + 3x3 - x4 */\
+        "psraw $5, %%mm5                  \n\t"\
+        "packuswb %%mm5, %%mm5            \n\t"\
         OP(%%mm5, out, %%mm7, d)
 
 #define QPEL_BASE(OPNAME, ROUNDER, RND, OP_MMX2, OP_3DNOW)\
@@ -1748,116 +1751,116 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, in
     uint64_t temp;\
 \
     asm volatile(\
-        "pxor %%mm7, %%mm7		\n\t"\
-        "1:				\n\t"\
-        "movq  (%0), %%mm0		\n\t" /* ABCDEFGH */\
-        "movq %%mm0, %%mm1		\n\t" /* ABCDEFGH */\
-        "movq %%mm0, %%mm2		\n\t" /* ABCDEFGH */\
-        "punpcklbw %%mm7, %%mm0		\n\t" /* 0A0B0C0D */\
-        "punpckhbw %%mm7, %%mm1		\n\t" /* 0E0F0G0H */\
-        "pshufw $0x90, %%mm0, %%mm5	\n\t" /* 0A0A0B0C */\
-        "pshufw $0x41, %%mm0, %%mm6	\n\t" /* 0B0A0A0B */\
-        "movq %%mm2, %%mm3		\n\t" /* ABCDEFGH */\
-        "movq %%mm2, %%mm4		\n\t" /* ABCDEFGH */\
-        "psllq $8, %%mm2		\n\t" /* 0ABCDEFG */\
-        "psllq $16, %%mm3		\n\t" /* 00ABCDEF */\
-        "psllq $24, %%mm4		\n\t" /* 000ABCDE */\
-        "punpckhbw %%mm7, %%mm2		\n\t" /* 0D0E0F0G */\
-        "punpckhbw %%mm7, %%mm3		\n\t" /* 0C0D0E0F */\
-        "punpckhbw %%mm7, %%mm4		\n\t" /* 0B0C0D0E */\
-        "paddw %%mm3, %%mm5		\n\t" /* b */\
-        "paddw %%mm2, %%mm6		\n\t" /* c */\
-        "paddw %%mm5, %%mm5		\n\t" /* 2b */\
-        "psubw %%mm5, %%mm6		\n\t" /* c - 2b */\
-        "pshufw $0x06, %%mm0, %%mm5	\n\t" /* 0C0B0A0A */\
-        "pmullw "MANGLE(ff_pw_3)", %%mm6		\n\t" /* 3c - 6b */\
-        "paddw %%mm4, %%mm0		\n\t" /* a */\
-        "paddw %%mm1, %%mm5		\n\t" /* d */\
-        "pmullw "MANGLE(ff_pw_20)", %%mm0		\n\t" /* 20a */\
-        "psubw %%mm5, %%mm0		\n\t" /* 20a - d */\
-        "paddw %6, %%mm6		\n\t"\
-        "paddw %%mm6, %%mm0		\n\t" /* 20a - 6b + 3c - d */\
-        "psraw $5, %%mm0		\n\t"\
-        "movq %%mm0, %5			\n\t"\
+        "pxor %%mm7, %%mm7                \n\t"\
+        "1:                               \n\t"\
+        "movq  (%0), %%mm0                \n\t" /* ABCDEFGH */\
+        "movq %%mm0, %%mm1                \n\t" /* ABCDEFGH */\
+        "movq %%mm0, %%mm2                \n\t" /* ABCDEFGH */\
+        "punpcklbw %%mm7, %%mm0           \n\t" /* 0A0B0C0D */\
+        "punpckhbw %%mm7, %%mm1           \n\t" /* 0E0F0G0H */\
+        "pshufw $0x90, %%mm0, %%mm5       \n\t" /* 0A0A0B0C */\
+        "pshufw $0x41, %%mm0, %%mm6       \n\t" /* 0B0A0A0B */\
+        "movq %%mm2, %%mm3                \n\t" /* ABCDEFGH */\
+        "movq %%mm2, %%mm4                \n\t" /* ABCDEFGH */\
+        "psllq $8, %%mm2                  \n\t" /* 0ABCDEFG */\
+        "psllq $16, %%mm3                 \n\t" /* 00ABCDEF */\
+        "psllq $24, %%mm4                 \n\t" /* 000ABCDE */\
+        "punpckhbw %%mm7, %%mm2           \n\t" /* 0D0E0F0G */\
+        "punpckhbw %%mm7, %%mm3           \n\t" /* 0C0D0E0F */\
+        "punpckhbw %%mm7, %%mm4           \n\t" /* 0B0C0D0E */\
+        "paddw %%mm3, %%mm5               \n\t" /* b */\
+        "paddw %%mm2, %%mm6               \n\t" /* c */\
+        "paddw %%mm5, %%mm5               \n\t" /* 2b */\
+        "psubw %%mm5, %%mm6               \n\t" /* c - 2b */\
+        "pshufw $0x06, %%mm0, %%mm5       \n\t" /* 0C0B0A0A */\
+        "pmullw "MANGLE(ff_pw_3)", %%mm6  \n\t" /* 3c - 6b */\
+        "paddw %%mm4, %%mm0               \n\t" /* a */\
+        "paddw %%mm1, %%mm5               \n\t" /* d */\
+        "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
+        "psubw %%mm5, %%mm0               \n\t" /* 20a - d */\
+        "paddw %6, %%mm6                  \n\t"\
+        "paddw %%mm6, %%mm0               \n\t" /* 20a - 6b + 3c - d */\
+        "psraw $5, %%mm0                  \n\t"\
+        "movq %%mm0, %5                   \n\t"\
         /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
         \
-        "movq 5(%0), %%mm0		\n\t" /* FGHIJKLM */\
-        "movq %%mm0, %%mm5		\n\t" /* FGHIJKLM */\
-        "movq %%mm0, %%mm6		\n\t" /* FGHIJKLM */\
-        "psrlq $8, %%mm0		\n\t" /* GHIJKLM0 */\
-        "psrlq $16, %%mm5		\n\t" /* HIJKLM00 */\
-        "punpcklbw %%mm7, %%mm0		\n\t" /* 0G0H0I0J */\
-        "punpcklbw %%mm7, %%mm5		\n\t" /* 0H0I0J0K */\
-        "paddw %%mm0, %%mm2		\n\t" /* b */\
-        "paddw %%mm5, %%mm3		\n\t" /* c */\
-        "paddw %%mm2, %%mm2		\n\t" /* 2b */\
-        "psubw %%mm2, %%mm3		\n\t" /* c - 2b */\
-        "movq %%mm6, %%mm2		\n\t" /* FGHIJKLM */\
-        "psrlq $24, %%mm6		\n\t" /* IJKLM000 */\
-        "punpcklbw %%mm7, %%mm2		\n\t" /* 0F0G0H0I */\
-        "punpcklbw %%mm7, %%mm6		\n\t" /* 0I0J0K0L */\
-        "pmullw "MANGLE(ff_pw_3)", %%mm3		\n\t" /* 3c - 6b */\
-        "paddw %%mm2, %%mm1		\n\t" /* a */\
-        "paddw %%mm6, %%mm4		\n\t" /* d */\
-        "pmullw "MANGLE(ff_pw_20)", %%mm1		\n\t" /* 20a */\
-        "psubw %%mm4, %%mm3		\n\t" /* - 6b +3c - d */\
-        "paddw %6, %%mm1		\n\t"\
-        "paddw %%mm1, %%mm3		\n\t" /* 20a - 6b +3c - d */\
-        "psraw $5, %%mm3		\n\t"\
-        "movq %5, %%mm1			\n\t"\
-        "packuswb %%mm3, %%mm1		\n\t"\
+        "movq 5(%0), %%mm0                \n\t" /* FGHIJKLM */\
+        "movq %%mm0, %%mm5                \n\t" /* FGHIJKLM */\
+        "movq %%mm0, %%mm6                \n\t" /* FGHIJKLM */\
+        "psrlq $8, %%mm0                  \n\t" /* GHIJKLM0 */\
+        "psrlq $16, %%mm5                 \n\t" /* HIJKLM00 */\
+        "punpcklbw %%mm7, %%mm0           \n\t" /* 0G0H0I0J */\
+        "punpcklbw %%mm7, %%mm5           \n\t" /* 0H0I0J0K */\
+        "paddw %%mm0, %%mm2               \n\t" /* b */\
+        "paddw %%mm5, %%mm3               \n\t" /* c */\
+        "paddw %%mm2, %%mm2               \n\t" /* 2b */\
+        "psubw %%mm2, %%mm3               \n\t" /* c - 2b */\
+        "movq %%mm6, %%mm2                \n\t" /* FGHIJKLM */\
+        "psrlq $24, %%mm6                 \n\t" /* IJKLM000 */\
+        "punpcklbw %%mm7, %%mm2           \n\t" /* 0F0G0H0I */\
+        "punpcklbw %%mm7, %%mm6           \n\t" /* 0I0J0K0L */\
+        "pmullw "MANGLE(ff_pw_3)", %%mm3  \n\t" /* 3c - 6b */\
+        "paddw %%mm2, %%mm1               \n\t" /* a */\
+        "paddw %%mm6, %%mm4               \n\t" /* d */\
+        "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
+        "psubw %%mm4, %%mm3               \n\t" /* - 6b +3c - d */\
+        "paddw %6, %%mm1                  \n\t"\
+        "paddw %%mm1, %%mm3               \n\t" /* 20a - 6b +3c - d */\
+        "psraw $5, %%mm3                  \n\t"\
+        "movq %5, %%mm1                   \n\t"\
+        "packuswb %%mm3, %%mm1            \n\t"\
         OP_MMX2(%%mm1, (%1),%%mm4, q)\
         /* mm0= GHIJ, mm2=FGHI, mm5=HIJK, mm6=IJKL, mm7=0 */\
         \
-        "movq 9(%0), %%mm1		\n\t" /* JKLMNOPQ */\
-        "movq %%mm1, %%mm4		\n\t" /* JKLMNOPQ */\
-        "movq %%mm1, %%mm3		\n\t" /* JKLMNOPQ */\
-        "psrlq $8, %%mm1		\n\t" /* KLMNOPQ0 */\
-        "psrlq $16, %%mm4		\n\t" /* LMNOPQ00 */\
-        "punpcklbw %%mm7, %%mm1		\n\t" /* 0K0L0M0N */\
-        "punpcklbw %%mm7, %%mm4		\n\t" /* 0L0M0N0O */\
-        "paddw %%mm1, %%mm5		\n\t" /* b */\
-        "paddw %%mm4, %%mm0		\n\t" /* c */\
-        "paddw %%mm5, %%mm5		\n\t" /* 2b */\
-        "psubw %%mm5, %%mm0		\n\t" /* c - 2b */\
-        "movq %%mm3, %%mm5		\n\t" /* JKLMNOPQ */\
-        "psrlq $24, %%mm3		\n\t" /* MNOPQ000 */\
-        "pmullw "MANGLE(ff_pw_3)", %%mm0		\n\t" /* 3c - 6b */\
-        "punpcklbw %%mm7, %%mm3		\n\t" /* 0M0N0O0P */\
-        "paddw %%mm3, %%mm2		\n\t" /* d */\
-        "psubw %%mm2, %%mm0		\n\t" /* -6b + 3c - d */\
-        "movq %%mm5, %%mm2		\n\t" /* JKLMNOPQ */\
-        "punpcklbw %%mm7, %%mm2		\n\t" /* 0J0K0L0M */\
-        "punpckhbw %%mm7, %%mm5		\n\t" /* 0N0O0P0Q */\
-        "paddw %%mm2, %%mm6		\n\t" /* a */\
-        "pmullw "MANGLE(ff_pw_20)", %%mm6		\n\t" /* 20a */\
-        "paddw %6, %%mm0		\n\t"\
-        "paddw %%mm6, %%mm0		\n\t" /* 20a - 6b + 3c - d */\
-        "psraw $5, %%mm0		\n\t"\
+        "movq 9(%0), %%mm1                \n\t" /* JKLMNOPQ */\
+        "movq %%mm1, %%mm4                \n\t" /* JKLMNOPQ */\
+        "movq %%mm1, %%mm3                \n\t" /* JKLMNOPQ */\
+        "psrlq $8, %%mm1                  \n\t" /* KLMNOPQ0 */\
+        "psrlq $16, %%mm4                 \n\t" /* LMNOPQ00 */\
+        "punpcklbw %%mm7, %%mm1           \n\t" /* 0K0L0M0N */\
+        "punpcklbw %%mm7, %%mm4           \n\t" /* 0L0M0N0O */\
+        "paddw %%mm1, %%mm5               \n\t" /* b */\
+        "paddw %%mm4, %%mm0               \n\t" /* c */\
+        "paddw %%mm5, %%mm5               \n\t" /* 2b */\
+        "psubw %%mm5, %%mm0               \n\t" /* c - 2b */\
+        "movq %%mm3, %%mm5                \n\t" /* JKLMNOPQ */\
+        "psrlq $24, %%mm3                 \n\t" /* MNOPQ000 */\
+        "pmullw "MANGLE(ff_pw_3)", %%mm0  \n\t" /* 3c - 6b */\
+        "punpcklbw %%mm7, %%mm3           \n\t" /* 0M0N0O0P */\
+        "paddw %%mm3, %%mm2               \n\t" /* d */\
+        "psubw %%mm2, %%mm0               \n\t" /* -6b + 3c - d */\
+        "movq %%mm5, %%mm2                \n\t" /* JKLMNOPQ */\
+        "punpcklbw %%mm7, %%mm2           \n\t" /* 0J0K0L0M */\
+        "punpckhbw %%mm7, %%mm5           \n\t" /* 0N0O0P0Q */\
+        "paddw %%mm2, %%mm6               \n\t" /* a */\
+        "pmullw "MANGLE(ff_pw_20)", %%mm6 \n\t" /* 20a */\
+        "paddw %6, %%mm0                  \n\t"\
+        "paddw %%mm6, %%mm0               \n\t" /* 20a - 6b + 3c - d */\
+        "psraw $5, %%mm0                  \n\t"\
         /* mm1=KLMN, mm2=JKLM, mm3=MNOP, mm4=LMNO, mm5=NOPQ mm7=0 */\
         \
-        "paddw %%mm5, %%mm3		\n\t" /* a */\
-        "pshufw $0xF9, %%mm5, %%mm6	\n\t" /* 0O0P0Q0Q */\
-        "paddw %%mm4, %%mm6		\n\t" /* b */\
-        "pshufw $0xBE, %%mm5, %%mm4	\n\t" /* 0P0Q0Q0P */\
-        "pshufw $0x6F, %%mm5, %%mm5	\n\t" /* 0Q0Q0P0O */\
-        "paddw %%mm1, %%mm4		\n\t" /* c */\
-        "paddw %%mm2, %%mm5		\n\t" /* d */\
-        "paddw %%mm6, %%mm6		\n\t" /* 2b */\
-        "psubw %%mm6, %%mm4		\n\t" /* c - 2b */\
-        "pmullw "MANGLE(ff_pw_20)", %%mm3		\n\t" /* 20a */\
-        "pmullw "MANGLE(ff_pw_3)", %%mm4		\n\t" /* 3c - 6b */\
-        "psubw %%mm5, %%mm3		\n\t" /* -6b + 3c - d */\
-        "paddw %6, %%mm4		\n\t"\
-        "paddw %%mm3, %%mm4		\n\t" /* 20a - 6b + 3c - d */\
-        "psraw $5, %%mm4		\n\t"\
-        "packuswb %%mm4, %%mm0		\n\t"\
+        "paddw %%mm5, %%mm3               \n\t" /* a */\
+        "pshufw $0xF9, %%mm5, %%mm6       \n\t" /* 0O0P0Q0Q */\
+        "paddw %%mm4, %%mm6               \n\t" /* b */\
+        "pshufw $0xBE, %%mm5, %%mm4       \n\t" /* 0P0Q0Q0P */\
+        "pshufw $0x6F, %%mm5, %%mm5       \n\t" /* 0Q0Q0P0O */\
+        "paddw %%mm1, %%mm4               \n\t" /* c */\
+        "paddw %%mm2, %%mm5               \n\t" /* d */\
+        "paddw %%mm6, %%mm6               \n\t" /* 2b */\
+        "psubw %%mm6, %%mm4               \n\t" /* c - 2b */\
+        "pmullw "MANGLE(ff_pw_20)", %%mm3 \n\t" /* 20a */\
+        "pmullw "MANGLE(ff_pw_3)", %%mm4  \n\t" /* 3c - 6b */\
+        "psubw %%mm5, %%mm3               \n\t" /* -6b + 3c - d */\
+        "paddw %6, %%mm4                  \n\t"\
+        "paddw %%mm3, %%mm4               \n\t" /* 20a - 6b + 3c - d */\
+        "psraw $5, %%mm4                  \n\t"\
+        "packuswb %%mm4, %%mm0            \n\t"\
         OP_MMX2(%%mm0, 8(%1), %%mm4, q)\
         \
-        "add %3, %0			\n\t"\
-        "add %4, %1			\n\t"\
-        "decl %2			\n\t"\
-        " jnz 1b				\n\t"\
+        "add %3, %0                       \n\t"\
+        "add %4, %1                       \n\t"\
+        "decl %2                          \n\t"\
+        " jnz 1b                          \n\t"\
         : "+a"(src), "+c"(dst), "+m"(h)\
         : "d"((long)srcStride), "S"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
         : "memory"\
@@ -1887,21 +1890,21 @@ static void OPNAME ## mpeg4_qpel16_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, i
         temp[14]= (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]);\
         temp[15]= (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]);\
         asm volatile(\
-            "movq (%0), %%mm0		\n\t"\
-            "movq 8(%0), %%mm1		\n\t"\
-            "paddw %2, %%mm0		\n\t"\
-            "paddw %2, %%mm1		\n\t"\
-            "psraw $5, %%mm0		\n\t"\
-            "psraw $5, %%mm1		\n\t"\
-            "packuswb %%mm1, %%mm0	\n\t"\
+            "movq (%0), %%mm0               \n\t"\
+            "movq 8(%0), %%mm1              \n\t"\
+            "paddw %2, %%mm0                \n\t"\
+            "paddw %2, %%mm1                \n\t"\
+            "psraw $5, %%mm0                \n\t"\
+            "psraw $5, %%mm1                \n\t"\
+            "packuswb %%mm1, %%mm0          \n\t"\
             OP_3DNOW(%%mm0, (%1), %%mm1, q)\
-            "movq 16(%0), %%mm0		\n\t"\
-            "movq 24(%0), %%mm1		\n\t"\
-            "paddw %2, %%mm0		\n\t"\
-            "paddw %2, %%mm1		\n\t"\
-            "psraw $5, %%mm0		\n\t"\
-            "psraw $5, %%mm1		\n\t"\
-            "packuswb %%mm1, %%mm0	\n\t"\
+            "movq 16(%0), %%mm0             \n\t"\
+            "movq 24(%0), %%mm1             \n\t"\
+            "paddw %2, %%mm0                \n\t"\
+            "paddw %2, %%mm1                \n\t"\
+            "psraw $5, %%mm0                \n\t"\
+            "psraw $5, %%mm1                \n\t"\
+            "packuswb %%mm1, %%mm0          \n\t"\
             OP_3DNOW(%%mm0, 8(%1), %%mm1, q)\
             :: "r"(temp), "r"(dst), "m"(ROUNDER)\
             : "memory"\
@@ -1915,62 +1918,62 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass_mmx2(uint8_t *dst, uint8_t *src, int
     uint64_t temp;\
 \
     asm volatile(\
-        "pxor %%mm7, %%mm7		\n\t"\
-        "1:				\n\t"\
-        "movq  (%0), %%mm0		\n\t" /* ABCDEFGH */\
-        "movq %%mm0, %%mm1		\n\t" /* ABCDEFGH */\
-        "movq %%mm0, %%mm2		\n\t" /* ABCDEFGH */\
-        "punpcklbw %%mm7, %%mm0		\n\t" /* 0A0B0C0D */\
-        "punpckhbw %%mm7, %%mm1		\n\t" /* 0E0F0G0H */\
-        "pshufw $0x90, %%mm0, %%mm5	\n\t" /* 0A0A0B0C */\
-        "pshufw $0x41, %%mm0, %%mm6	\n\t" /* 0B0A0A0B */\
-        "movq %%mm2, %%mm3		\n\t" /* ABCDEFGH */\
-        "movq %%mm2, %%mm4		\n\t" /* ABCDEFGH */\
-        "psllq $8, %%mm2		\n\t" /* 0ABCDEFG */\
-        "psllq $16, %%mm3		\n\t" /* 00ABCDEF */\
-        "psllq $24, %%mm4		\n\t" /* 000ABCDE */\
-        "punpckhbw %%mm7, %%mm2		\n\t" /* 0D0E0F0G */\
-        "punpckhbw %%mm7, %%mm3		\n\t" /* 0C0D0E0F */\
-        "punpckhbw %%mm7, %%mm4		\n\t" /* 0B0C0D0E */\
-        "paddw %%mm3, %%mm5		\n\t" /* b */\
-        "paddw %%mm2, %%mm6		\n\t" /* c */\
-        "paddw %%mm5, %%mm5		\n\t" /* 2b */\
-        "psubw %%mm5, %%mm6		\n\t" /* c - 2b */\
-        "pshufw $0x06, %%mm0, %%mm5	\n\t" /* 0C0B0A0A */\
-        "pmullw "MANGLE(ff_pw_3)", %%mm6		\n\t" /* 3c - 6b */\
-        "paddw %%mm4, %%mm0		\n\t" /* a */\
-        "paddw %%mm1, %%mm5		\n\t" /* d */\
-        "pmullw "MANGLE(ff_pw_20)", %%mm0		\n\t" /* 20a */\
-        "psubw %%mm5, %%mm0		\n\t" /* 20a - d */\
-        "paddw %6, %%mm6		\n\t"\
-        "paddw %%mm6, %%mm0		\n\t" /* 20a - 6b + 3c - d */\
-        "psraw $5, %%mm0		\n\t"\
+        "pxor %%mm7, %%mm7                \n\t"\
+        "1:                               \n\t"\
+        "movq  (%0), %%mm0                \n\t" /* ABCDEFGH */\
+        "movq %%mm0, %%mm1                \n\t" /* ABCDEFGH */\
+        "movq %%mm0, %%mm2                \n\t" /* ABCDEFGH */\
+        "punpcklbw %%mm7, %%mm0           \n\t" /* 0A0B0C0D */\
+        "punpckhbw %%mm7, %%mm1           \n\t" /* 0E0F0G0H */\
+        "pshufw $0x90, %%mm0, %%mm5       \n\t" /* 0A0A0B0C */\
+        "pshufw $0x41, %%mm0, %%mm6       \n\t" /* 0B0A0A0B */\
+        "movq %%mm2, %%mm3                \n\t" /* ABCDEFGH */\
+        "movq %%mm2, %%mm4                \n\t" /* ABCDEFGH */\
+        "psllq $8, %%mm2                  \n\t" /* 0ABCDEFG */\
+        "psllq $16, %%mm3                 \n\t" /* 00ABCDEF */\
+        "psllq $24, %%mm4                 \n\t" /* 000ABCDE */\
+        "punpckhbw %%mm7, %%mm2           \n\t" /* 0D0E0F0G */\
+        "punpckhbw %%mm7, %%mm3           \n\t" /* 0C0D0E0F */\
+        "punpckhbw %%mm7, %%mm4           \n\t" /* 0B0C0D0E */\
+        "paddw %%mm3, %%mm5               \n\t" /* b */\
+        "paddw %%mm2, %%mm6               \n\t" /* c */\
+        "paddw %%mm5, %%mm5               \n\t" /* 2b */\
+        "psubw %%mm5, %%mm6               \n\t" /* c - 2b */\
+        "pshufw $0x06, %%mm0, %%mm5       \n\t" /* 0C0B0A0A */\
+        "pmullw "MANGLE(ff_pw_3)", %%mm6  \n\t" /* 3c - 6b */\
+        "paddw %%mm4, %%mm0               \n\t" /* a */\
+        "paddw %%mm1, %%mm5               \n\t" /* d */\
+        "pmullw "MANGLE(ff_pw_20)", %%mm0 \n\t" /* 20a */\
+        "psubw %%mm5, %%mm0               \n\t" /* 20a - d */\
+        "paddw %6, %%mm6                  \n\t"\
+        "paddw %%mm6, %%mm0               \n\t" /* 20a - 6b + 3c - d */\
+        "psraw $5, %%mm0                  \n\t"\
         /* mm1=EFGH, mm2=DEFG, mm3=CDEF, mm4=BCDE, mm7=0 */\
         \
-        "movd 5(%0), %%mm5		\n\t" /* FGHI */\
-        "punpcklbw %%mm7, %%mm5		\n\t" /* 0F0G0H0I */\
-        "pshufw $0xF9, %%mm5, %%mm6	\n\t" /* 0G0H0I0I */\
-        "paddw %%mm5, %%mm1		\n\t" /* a */\
-        "paddw %%mm6, %%mm2		\n\t" /* b */\
-        "pshufw $0xBE, %%mm5, %%mm6	\n\t" /* 0H0I0I0H */\
-        "pshufw $0x6F, %%mm5, %%mm5	\n\t" /* 0I0I0H0G */\
-        "paddw %%mm6, %%mm3		\n\t" /* c */\
-        "paddw %%mm5, %%mm4		\n\t" /* d */\
-        "paddw %%mm2, %%mm2		\n\t" /* 2b */\
-        "psubw %%mm2, %%mm3		\n\t" /* c - 2b */\
-        "pmullw "MANGLE(ff_pw_20)", %%mm1		\n\t" /* 20a */\
-        "pmullw "MANGLE(ff_pw_3)", %%mm3		\n\t" /* 3c - 6b */\
-        "psubw %%mm4, %%mm3		\n\t" /* -6b + 3c - d */\
-        "paddw %6, %%mm1		\n\t"\
-        "paddw %%mm1, %%mm3		\n\t" /* 20a - 6b + 3c - d */\
-        "psraw $5, %%mm3		\n\t"\
-        "packuswb %%mm3, %%mm0		\n\t"\
+        "movd 5(%0), %%mm5                \n\t" /* FGHI */\
+        "punpcklbw %%mm7, %%mm5           \n\t" /* 0F0G0H0I */\
+        "pshufw $0xF9, %%mm5, %%mm6       \n\t" /* 0G0H0I0I */\
+        "paddw %%mm5, %%mm1               \n\t" /* a */\
+        "paddw %%mm6, %%mm2               \n\t" /* b */\
+        "pshufw $0xBE, %%mm5, %%mm6       \n\t" /* 0H0I0I0H */\
+        "pshufw $0x6F, %%mm5, %%mm5       \n\t" /* 0I0I0H0G */\
+        "paddw %%mm6, %%mm3               \n\t" /* c */\
+        "paddw %%mm5, %%mm4               \n\t" /* d */\
+        "paddw %%mm2, %%mm2               \n\t" /* 2b */\
+        "psubw %%mm2, %%mm3               \n\t" /* c - 2b */\
+        "pmullw "MANGLE(ff_pw_20)", %%mm1 \n\t" /* 20a */\
+        "pmullw "MANGLE(ff_pw_3)", %%mm3  \n\t" /* 3c - 6b */\
+        "psubw %%mm4, %%mm3               \n\t" /* -6b + 3c - d */\
+        "paddw %6, %%mm1                  \n\t"\
+        "paddw %%mm1, %%mm3               \n\t" /* 20a - 6b + 3c - d */\
+        "psraw $5, %%mm3                  \n\t"\
+        "packuswb %%mm3, %%mm0            \n\t"\
         OP_MMX2(%%mm0, (%1), %%mm4, q)\
         \
-        "add %3, %0			\n\t"\
-        "add %4, %1			\n\t"\
-        "decl %2			\n\t"\
-        " jnz 1b			\n\t"\
+        "add %3, %0                       \n\t"\
+        "add %4, %1                       \n\t"\
+        "decl %2                          \n\t"\
+        " jnz 1b                          \n\t"\
         : "+a"(src), "+c"(dst), "+m"(h)\
         : "S"((long)srcStride), "D"((long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(temp), "m"(ROUNDER)\
         : "memory"\
@@ -1992,13 +1995,13 @@ static void OPNAME ## mpeg4_qpel8_h_lowpass_3dnow(uint8_t *dst, uint8_t *src, in
         temp[ 6]= (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 8])*3 - (src[ 3]+src[ 7]);\
         temp[ 7]= (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 8])*6 + (src[ 5]+src[ 7])*3 - (src[ 4]+src[ 6]);\
         asm volatile(\
-            "movq (%0), %%mm0		\n\t"\
-            "movq 8(%0), %%mm1		\n\t"\
-            "paddw %2, %%mm0		\n\t"\
-            "paddw %2, %%mm1		\n\t"\
-            "psraw $5, %%mm0		\n\t"\
-            "psraw $5, %%mm1		\n\t"\
-            "packuswb %%mm1, %%mm0	\n\t"\
+            "movq (%0), %%mm0           \n\t"\
+            "movq 8(%0), %%mm1          \n\t"\
+            "paddw %2, %%mm0            \n\t"\
+            "paddw %2, %%mm1            \n\t"\
+            "psraw $5, %%mm0            \n\t"\
+            "psraw $5, %%mm1            \n\t"\
+            "packuswb %%mm1, %%mm0      \n\t"\
             OP_3DNOW(%%mm0, (%1), %%mm1, q)\
             :: "r"(temp), "r"(dst), "m"(ROUNDER)\
             :"memory"\
@@ -2017,24 +2020,24 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src,
 \
     /*FIXME unroll */\
     asm volatile(\
-        "pxor %%mm7, %%mm7		\n\t"\
-        "1:				\n\t"\
-        "movq (%0), %%mm0		\n\t"\
-        "movq (%0), %%mm1		\n\t"\
-        "movq 8(%0), %%mm2		\n\t"\
-        "movq 8(%0), %%mm3		\n\t"\
-        "punpcklbw %%mm7, %%mm0		\n\t"\
-        "punpckhbw %%mm7, %%mm1		\n\t"\
-        "punpcklbw %%mm7, %%mm2		\n\t"\
-        "punpckhbw %%mm7, %%mm3		\n\t"\
-        "movq %%mm0, (%1)		\n\t"\
-        "movq %%mm1, 17*8(%1)		\n\t"\
-        "movq %%mm2, 2*17*8(%1)		\n\t"\
-        "movq %%mm3, 3*17*8(%1)		\n\t"\
-        "add $8, %1			\n\t"\
-        "add %3, %0			\n\t"\
-        "decl %2			\n\t"\
-        " jnz 1b			\n\t"\
+        "pxor %%mm7, %%mm7              \n\t"\
+        "1:                             \n\t"\
+        "movq (%0), %%mm0               \n\t"\
+        "movq (%0), %%mm1               \n\t"\
+        "movq 8(%0), %%mm2              \n\t"\
+        "movq 8(%0), %%mm3              \n\t"\
+        "punpcklbw %%mm7, %%mm0         \n\t"\
+        "punpckhbw %%mm7, %%mm1         \n\t"\
+        "punpcklbw %%mm7, %%mm2         \n\t"\
+        "punpckhbw %%mm7, %%mm3         \n\t"\
+        "movq %%mm0, (%1)               \n\t"\
+        "movq %%mm1, 17*8(%1)           \n\t"\
+        "movq %%mm2, 2*17*8(%1)         \n\t"\
+        "movq %%mm3, 3*17*8(%1)         \n\t"\
+        "add $8, %1                     \n\t"\
+        "add %3, %0                     \n\t"\
+        "decl %2                        \n\t"\
+        " jnz 1b                        \n\t"\
         : "+r" (src), "+r" (temp_ptr), "+r"(count)\
         : "r" ((long)srcStride)\
         : "memory"\
@@ -2045,42 +2048,42 @@ static void OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src,
     \
 /*FIXME reorder for speed */\
     asm volatile(\
-        /*"pxor %%mm7, %%mm7		\n\t"*/\
-        "1:				\n\t"\
-        "movq (%0), %%mm0		\n\t"\
-        "movq 8(%0), %%mm1		\n\t"\
-        "movq 16(%0), %%mm2		\n\t"\
-        "movq 24(%0), %%mm3		\n\t"\
+        /*"pxor %%mm7, %%mm7              \n\t"*/\
+        "1:                             \n\t"\
+        "movq (%0), %%mm0               \n\t"\
+        "movq 8(%0), %%mm1              \n\t"\
+        "movq 16(%0), %%mm2             \n\t"\
+        "movq 24(%0), %%mm3             \n\t"\
         QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0),  8(%0),   (%0), 32(%0), (%1), OP)\
         QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5,  8(%0),   (%0),   (%0), 40(%0), (%1, %3), OP)\
-        "add %4, %1			\n\t"\
+        "add %4, %1                     \n\t"\
         QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5,   (%0),   (%0),  8(%0), 48(%0), (%1), OP)\
         \
         QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5,   (%0),  8(%0), 16(%0), 56(%0), (%1, %3), OP)\
-        "add %4, %1			\n\t"\
+        "add %4, %1                     \n\t"\
         QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5,  8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
         QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 72(%0), (%1, %3), OP)\
-        "add %4, %1			\n\t"\
+        "add %4, %1                     \n\t"\
         QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 80(%0), (%1), OP)\
         QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 88(%0), (%1, %3), OP)\
-        "add %4, %1			\n\t"\
+        "add %4, %1                     \n\t"\
         QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 40(%0), 48(%0), 56(%0), 96(%0), (%1), OP)\
         QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 48(%0), 56(%0), 64(%0),104(%0), (%1, %3), OP)\
-        "add %4, %1			\n\t"\
+        "add %4, %1                     \n\t"\
         QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 56(%0), 64(%0), 72(%0),112(%0), (%1), OP)\
         QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 64(%0), 72(%0), 80(%0),120(%0), (%1, %3), OP)\
-        "add %4, %1			\n\t"\
+        "add %4, %1                     \n\t"\
         QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 72(%0), 80(%0), 88(%0),128(%0), (%1), OP)\
         \
         QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 80(%0), 88(%0), 96(%0),128(%0), (%1, %3), OP)\
-        "add %4, %1			\n\t"  \
+        "add %4, %1                     \n\t"  \
         QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 88(%0), 96(%0),104(%0),120(%0), (%1), OP)\
         QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 96(%0),104(%0),112(%0),112(%0), (%1, %3), OP)\
         \
-        "add $136, %0			\n\t"\
-        "add %6, %1			\n\t"\
-        "decl %2			\n\t"\
-        " jnz 1b			\n\t"\
+        "add $136, %0                   \n\t"\
+        "add %6, %1                     \n\t"\
+        "decl %2                        \n\t"\
+        " jnz 1b                        \n\t"\
         \
         : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
         : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-14*(long)dstStride)\
@@ -2095,18 +2098,18 @@ static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src,
 \
     /*FIXME unroll */\
     asm volatile(\
-        "pxor %%mm7, %%mm7		\n\t"\
-        "1:				\n\t"\
-        "movq (%0), %%mm0		\n\t"\
-        "movq (%0), %%mm1		\n\t"\
-        "punpcklbw %%mm7, %%mm0		\n\t"\
-        "punpckhbw %%mm7, %%mm1		\n\t"\
-        "movq %%mm0, (%1)		\n\t"\
-        "movq %%mm1, 9*8(%1)		\n\t"\
-        "add $8, %1			\n\t"\
-        "add %3, %0			\n\t"\
-        "decl %2			\n\t"\
-        " jnz 1b			\n\t"\
+        "pxor %%mm7, %%mm7              \n\t"\
+        "1:                             \n\t"\
+        "movq (%0), %%mm0               \n\t"\
+        "movq (%0), %%mm1               \n\t"\
+        "punpcklbw %%mm7, %%mm0         \n\t"\
+        "punpckhbw %%mm7, %%mm1         \n\t"\
+        "movq %%mm0, (%1)               \n\t"\
+        "movq %%mm1, 9*8(%1)            \n\t"\
+        "add $8, %1                     \n\t"\
+        "add %3, %0                     \n\t"\
+        "decl %2                        \n\t"\
+        " jnz 1b                        \n\t"\
         : "+r" (src), "+r" (temp_ptr), "+r"(count)\
         : "r" ((long)srcStride)\
         : "memory"\
@@ -2117,30 +2120,30 @@ static void OPNAME ## mpeg4_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src,
     \
 /*FIXME reorder for speed */\
     asm volatile(\
-        /*"pxor %%mm7, %%mm7		\n\t"*/\
-        "1:				\n\t"\
-        "movq (%0), %%mm0		\n\t"\
-        "movq 8(%0), %%mm1		\n\t"\
-        "movq 16(%0), %%mm2		\n\t"\
-        "movq 24(%0), %%mm3		\n\t"\
+        /*"pxor %%mm7, %%mm7              \n\t"*/\
+        "1:                             \n\t"\
+        "movq (%0), %%mm0               \n\t"\
+        "movq 8(%0), %%mm1              \n\t"\
+        "movq 16(%0), %%mm2             \n\t"\
+        "movq 24(%0), %%mm3             \n\t"\
         QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5, 16(%0),  8(%0),   (%0), 32(%0), (%1), OP)\
         QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5,  8(%0),   (%0),   (%0), 40(%0), (%1, %3), OP)\
-        "add %4, %1			\n\t"\
+        "add %4, %1                     \n\t"\
         QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5,   (%0),   (%0),  8(%0), 48(%0), (%1), OP)\
         \
         QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5,   (%0),  8(%0), 16(%0), 56(%0), (%1, %3), OP)\
-        "add %4, %1			\n\t"\
+        "add %4, %1                     \n\t"\
         QPEL_V_LOW(%%mm0, %%mm1, %%mm2, %%mm3, %5, %6, %5,  8(%0), 16(%0), 24(%0), 64(%0), (%1), OP)\
         \
         QPEL_V_LOW(%%mm1, %%mm2, %%mm3, %%mm0, %5, %6, %5, 16(%0), 24(%0), 32(%0), 64(%0), (%1, %3), OP)\
-        "add %4, %1			\n\t"\
+        "add %4, %1                     \n\t"\
         QPEL_V_LOW(%%mm2, %%mm3, %%mm0, %%mm1, %5, %6, %5, 24(%0), 32(%0), 40(%0), 56(%0), (%1), OP)\
         QPEL_V_LOW(%%mm3, %%mm0, %%mm1, %%mm2, %5, %6, %5, 32(%0), 40(%0), 48(%0), 48(%0), (%1, %3), OP)\
                 \
-        "add $72, %0			\n\t"\
-        "add %6, %1			\n\t"\
-        "decl %2			\n\t"\
-        " jnz 1b			\n\t"\
+        "add $72, %0                    \n\t"\
+        "add %6, %1                     \n\t"\
+        "decl %2                        \n\t"\
+        " jnz 1b                        \n\t"\
          \
         : "+r"(temp_ptr), "+r"(dst), "+g"(count)\
         : "r"((long)dstStride), "r"(2*(long)dstStride), /*"m"(ff_pw_20), "m"(ff_pw_3),*/ "m"(ROUNDER), "g"(4-6*(long)dstStride)\
@@ -2371,15 +2374,15 @@ static void OPNAME ## qpel16_mc22_ ## MMX(uint8_t *dst, uint8_t *src, int stride
     OPNAME ## mpeg4_qpel16_v_lowpass_ ## MMX(dst, halfH, stride, 16);\
 }
 
-#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b "	\n\t"
+#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b "        \n\t"
 #define AVG_3DNOW_OP(a,b,temp, size) \
-"mov" #size " " #b ", " #temp "	\n\t"\
-"pavgusb " #temp ", " #a "	\n\t"\
-"mov" #size " " #a ", " #b "	\n\t"
+"mov" #size " " #b ", " #temp "   \n\t"\
+"pavgusb " #temp ", " #a "        \n\t"\
+"mov" #size " " #a ", " #b "      \n\t"
 #define AVG_MMX2_OP(a,b,temp, size) \
-"mov" #size " " #b ", " #temp "	\n\t"\
-"pavgb " #temp ", " #a "	\n\t"\
-"mov" #size " " #a ", " #b "	\n\t"
+"mov" #size " " #b ", " #temp "   \n\t"\
+"pavgb " #temp ", " #a "          \n\t"\
+"mov" #size " " #a ", " #b "      \n\t"
 
 QPEL_BASE(put_       , ff_pw_16, _       , PUT_OP, PUT_OP)
 QPEL_BASE(avg_       , ff_pw_16, _       , AVG_MMX2_OP, AVG_3DNOW_OP)
@@ -2402,46 +2405,46 @@ static void just_return() { return; }
 
 static int try_8x8basis_mmx(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
     long i=0;
-    
+
     assert(ABS(scale) < 256);
     scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT;
 
     asm volatile(
-        "pcmpeqw %%mm6, %%mm6		\n\t" // -1w
-        "psrlw $15, %%mm6		\n\t" //  1w
-        "pxor %%mm7, %%mm7		\n\t"
-        "movd  %4, %%mm5		\n\t" 
-        "punpcklwd %%mm5, %%mm5		\n\t" 
-        "punpcklwd %%mm5, %%mm5		\n\t" 
-        "1:				\n\t"
-        "movq  (%1, %0), %%mm0		\n\t" 
-        "movq  8(%1, %0), %%mm1		\n\t"
-        "pmulhw %%mm5, %%mm0		\n\t"
-        "pmulhw %%mm5, %%mm1		\n\t"
-        "paddw %%mm6, %%mm0		\n\t"
-        "paddw %%mm6, %%mm1		\n\t"
-        "psraw $1, %%mm0		\n\t"
-        "psraw $1, %%mm1		\n\t"
-        "paddw (%2, %0), %%mm0		\n\t"
-        "paddw 8(%2, %0), %%mm1		\n\t"
-        "psraw $6, %%mm0		\n\t"
-        "psraw $6, %%mm1		\n\t"
-        "pmullw (%3, %0), %%mm0		\n\t"
-        "pmullw 8(%3, %0), %%mm1	\n\t"
-        "pmaddwd %%mm0, %%mm0		\n\t"
-        "pmaddwd %%mm1, %%mm1		\n\t"
-        "paddd %%mm1, %%mm0		\n\t"
-        "psrld $4, %%mm0		\n\t"
-        "paddd %%mm0, %%mm7		\n\t"
-        "add $16, %0			\n\t"
-        "cmp $128, %0			\n\t" //FIXME optimize & bench
-        " jb 1b				\n\t"
-        "movq %%mm7, %%mm6		\n\t"
-        "psrlq $32, %%mm7		\n\t"
-        "paddd %%mm6, %%mm7		\n\t"
-        "psrld $2, %%mm7		\n\t"
-        "movd %%mm7, %0			\n\t"
-        
+        "pcmpeqw %%mm6, %%mm6           \n\t" // -1w
+        "psrlw $15, %%mm6               \n\t" //  1w
+        "pxor %%mm7, %%mm7              \n\t"
+        "movd  %4, %%mm5                \n\t"
+        "punpcklwd %%mm5, %%mm5         \n\t"
+        "punpcklwd %%mm5, %%mm5         \n\t"
+        "1:                             \n\t"
+        "movq  (%1, %0), %%mm0          \n\t"
+        "movq  8(%1, %0), %%mm1         \n\t"
+        "pmulhw %%mm5, %%mm0            \n\t"
+        "pmulhw %%mm5, %%mm1            \n\t"
+        "paddw %%mm6, %%mm0             \n\t"
+        "paddw %%mm6, %%mm1             \n\t"
+        "psraw $1, %%mm0                \n\t"
+        "psraw $1, %%mm1                \n\t"
+        "paddw (%2, %0), %%mm0          \n\t"
+        "paddw 8(%2, %0), %%mm1         \n\t"
+        "psraw $6, %%mm0                \n\t"
+        "psraw $6, %%mm1                \n\t"
+        "pmullw (%3, %0), %%mm0         \n\t"
+        "pmullw 8(%3, %0), %%mm1        \n\t"
+        "pmaddwd %%mm0, %%mm0           \n\t"
+        "pmaddwd %%mm1, %%mm1           \n\t"
+        "paddd %%mm1, %%mm0             \n\t"
+        "psrld $4, %%mm0                \n\t"
+        "paddd %%mm0, %%mm7             \n\t"
+        "add $16, %0                    \n\t"
+        "cmp $128, %0                   \n\t" //FIXME optimize & bench
+        " jb 1b                         \n\t"
+        "movq %%mm7, %%mm6              \n\t"
+        "psrlq $32, %%mm7               \n\t"
+        "paddd %%mm6, %%mm7             \n\t"
+        "psrld $2, %%mm7                \n\t"
+        "movd %%mm7, %0                 \n\t"
+
         : "+r" (i)
         : "r"(basis), "r"(rem), "r"(weight), "g"(scale)
     );
@@ -2450,44 +2453,44 @@ static int try_8x8basis_mmx(int16_t rem[64], int16_t weight[64], int16_t basis[6
 
 static void add_8x8basis_mmx(int16_t rem[64], int16_t basis[64], int scale){
     long i=0;
-    
+
     if(ABS(scale) < 256){
         scale<<= 16 + 1 - BASIS_SHIFT + RECON_SHIFT;
         asm volatile(
-                "pcmpeqw %%mm6, %%mm6		\n\t" // -1w
-                "psrlw $15, %%mm6		\n\t" //  1w
-                "movd  %3, %%mm5		\n\t" 
-                "punpcklwd %%mm5, %%mm5		\n\t" 
-                "punpcklwd %%mm5, %%mm5		\n\t" 
-                "1:				\n\t"
-                "movq  (%1, %0), %%mm0		\n\t" 
-                "movq  8(%1, %0), %%mm1		\n\t"
-                "pmulhw %%mm5, %%mm0		\n\t"
-                "pmulhw %%mm5, %%mm1		\n\t"
-                "paddw %%mm6, %%mm0		\n\t" 
-                "paddw %%mm6, %%mm1		\n\t"
-                "psraw $1, %%mm0		\n\t"
-                "psraw $1, %%mm1		\n\t"
-                "paddw (%2, %0), %%mm0		\n\t"
-                "paddw 8(%2, %0), %%mm1		\n\t"
-                "movq %%mm0, (%2, %0)		\n\t"
-                "movq %%mm1, 8(%2, %0)		\n\t"
-                "add $16, %0			\n\t"
-                "cmp $128, %0			\n\t" //FIXME optimize & bench
-                " jb 1b				\n\t"
-                
+                "pcmpeqw %%mm6, %%mm6   \n\t" // -1w
+                "psrlw $15, %%mm6       \n\t" //  1w
+                "movd  %3, %%mm5        \n\t"
+                "punpcklwd %%mm5, %%mm5 \n\t"
+                "punpcklwd %%mm5, %%mm5 \n\t"
+                "1:                     \n\t"
+                "movq  (%1, %0), %%mm0  \n\t"
+                "movq  8(%1, %0), %%mm1 \n\t"
+                "pmulhw %%mm5, %%mm0    \n\t"
+                "pmulhw %%mm5, %%mm1    \n\t"
+                "paddw %%mm6, %%mm0     \n\t"
+                "paddw %%mm6, %%mm1     \n\t"
+                "psraw $1, %%mm0        \n\t"
+                "psraw $1, %%mm1        \n\t"
+                "paddw (%2, %0), %%mm0  \n\t"
+                "paddw 8(%2, %0), %%mm1 \n\t"
+                "movq %%mm0, (%2, %0)   \n\t"
+                "movq %%mm1, 8(%2, %0)  \n\t"
+                "add $16, %0            \n\t"
+                "cmp $128, %0           \n\t" //FIXME optimize & bench
+                " jb 1b                 \n\t"
+
                 : "+r" (i)
                 : "r"(basis), "r"(rem), "g"(scale)
         );
     }else{
         for(i=0; i<8*8; i++){
             rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
-        }    
+        }
     }
 }
 
 #include "h264dsp_mmx.c"
-    
+
 /* external functions, from idct_mmx.c */
 void ff_mmx_idct(DCTELEM *block);
 void ff_mmxext_idct(DCTELEM *block);
@@ -2560,16 +2563,16 @@ static void ff_idct_xvid_mmx2_add(uint8_t *dest, int line_size, DCTELEM *block)
     add_pixels_clamped_mmx(block, dest, line_size);
 }
 #endif
-    
+
 void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
 {
     mm_flags = mm_support();
 
     if (avctx->dsp_mask) {
-	if (avctx->dsp_mask & FF_MM_FORCE)
-	    mm_flags |= (avctx->dsp_mask & 0xffff);
-	else
-	    mm_flags &= ~(avctx->dsp_mask & 0xffff);
+        if (avctx->dsp_mask & FF_MM_FORCE)
+            mm_flags |= (avctx->dsp_mask & 0xffff);
+        else
+            mm_flags &= ~(avctx->dsp_mask & 0xffff);
     }
 
 #if 0
@@ -2595,7 +2598,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
             if(mm_flags & MM_SSE2){
                 c->fdct = ff_fdct_sse2;
-	    }else if(mm_flags & MM_MMXEXT){
+            }else if(mm_flags & MM_MMXEXT){
                 c->fdct = ff_fdct_mmx2;
             }else{
                 c->fdct = ff_fdct_mmx;
@@ -2698,36 +2701,37 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
         c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx;
         c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx;
         c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx;
-                
+
         c->add_bytes= add_bytes_mmx;
 #ifdef CONFIG_ENCODERS
         c->diff_bytes= diff_bytes_mmx;
-        
+
         c->hadamard8_diff[0]= hadamard8_diff16_mmx;
         c->hadamard8_diff[1]= hadamard8_diff_mmx;
-        
-	c->pix_norm1 = pix_norm1_mmx;
-	c->sse[0] = (mm_flags & MM_SSE2) ? sse16_sse2 : sse16_mmx;
-  	c->sse[1] = sse8_mmx;
+
+        c->pix_norm1 = pix_norm1_mmx;
+        c->sse[0] = (mm_flags & MM_SSE2) ? sse16_sse2 : sse16_mmx;
+          c->sse[1] = sse8_mmx;
         c->vsad[4]= vsad_intra16_mmx;
 
-	c->nsse[0] = nsse16_mmx;
-	c->nsse[1] = nsse8_mmx;
+        c->nsse[0] = nsse16_mmx;
+        c->nsse[1] = nsse8_mmx;
         if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
             c->vsad[0] = vsad16_mmx;
         }
-        
+
         if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
             c->try_8x8basis= try_8x8basis_mmx;
         }
         c->add_8x8basis= add_8x8basis_mmx;
-        
+
 #endif //CONFIG_ENCODERS
 
         c->h263_v_loop_filter= h263_v_loop_filter_mmx;
-        c->h263_h_loop_filter= h263_h_loop_filter_mmx;        
-	c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx;
-        
+        c->h263_h_loop_filter= h263_h_loop_filter_mmx;
+        c->put_h264_chroma_pixels_tab[0]= put_h264_chroma_mc8_mmx;
+        c->put_h264_chroma_pixels_tab[1]= put_h264_chroma_mc4_mmx;
+
         if (mm_flags & MM_MMXEXT) {
             c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
             c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
@@ -2825,7 +2829,8 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             dspfunc(avg_h264_qpel, 2, 4);
 #undef dspfunc
 
-	    c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2;
+            c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2;
+            c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_mmx2;
             c->h264_v_loop_filter_luma= h264_v_loop_filter_luma_mmx2;
             c->h264_h_loop_filter_luma= h264_h_loop_filter_luma_mmx2;
             c->h264_v_loop_filter_chroma= h264_v_loop_filter_chroma_mmx2;
@@ -2936,10 +2941,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
             dspfunc(avg_h264_qpel, 1, 8);
             dspfunc(avg_h264_qpel, 2, 4);
 
-	    c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow;
+            c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow;
+            c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow;
         }
     }
-        
+
 #ifdef CONFIG_ENCODERS
     dsputil_init_pix_mmx(c, avctx);
 #endif //CONFIG_ENCODERS
diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx_avg.h b/src/libffmpeg/libavcodec/i386/dsputil_mmx_avg.h
index c70891304..440c5bb9c 100644
--- a/src/libffmpeg/libavcodec/i386/dsputil_mmx_avg.h
+++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx_avg.h
@@ -15,603 +15,603 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
  * mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
  * and improved by Zdenek Kabelac <kabi@users.sf.net>
  */
- 
+
 /* XXX: we use explicit registers to avoid a gcc 2.95.2 register asm
    clobber bug - now it will work with 2.95.2 and also with -fPIC
  */
 static void DEF(put_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
     __asm __volatile(
-	"lea (%3, %3), %%"REG_a"	\n\t"
-	"1:				\n\t"
-	"movq (%1), %%mm0		\n\t"
-	"movq (%1, %3), %%mm1		\n\t"
-	PAVGB" 1(%1), %%mm0		\n\t"
-	PAVGB" 1(%1, %3), %%mm1		\n\t"
-	"movq %%mm0, (%2)		\n\t"
-	"movq %%mm1, (%2, %3)		\n\t"
-	"add %%"REG_a", %1		\n\t"
-	"add %%"REG_a", %2		\n\t"
-	"movq (%1), %%mm0		\n\t"
-	"movq (%1, %3), %%mm1		\n\t"
-	PAVGB" 1(%1), %%mm0		\n\t"
-	PAVGB" 1(%1, %3), %%mm1		\n\t"
-	"add %%"REG_a", %1		\n\t"
-	"movq %%mm0, (%2)		\n\t"
-	"movq %%mm1, (%2, %3)		\n\t"
-	"add %%"REG_a", %2		\n\t"
-	"subl $4, %0			\n\t"
-	"jnz 1b				\n\t"
-	:"+g"(h), "+S"(pixels), "+D"(block)
-	:"r" ((long)line_size)
-	:"%"REG_a, "memory");
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "1:                             \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        PAVGB" 1(%1), %%mm0             \n\t"
+        PAVGB" 1(%1, %3), %%mm1         \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm1, (%2, %3)           \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        PAVGB" 1(%1), %%mm0             \n\t"
+        PAVGB" 1(%1, %3), %%mm1         \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm1, (%2, %3)           \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "subl $4, %0                    \n\t"
+        "jnz 1b                         \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r" ((long)line_size)
+        :"%"REG_a, "memory");
 }
 
 static void DEF(put_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
 {
     __asm __volatile(
-	"testl $1, %0			\n\t"
-	    " jz 1f				\n\t"
-	"movd	(%1), %%mm0		\n\t"
-	"movd	(%2), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	"add	$4, %2			\n\t"
-	PAVGB" %%mm1, %%mm0		\n\t"
-	"movd	%%mm0, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"decl	%0			\n\t"
-	"1:				\n\t"
-	"movd	(%1), %%mm0		\n\t"
-	"add	%4, %1			\n\t"
-	"movd	(%1), %%mm1		\n\t"
-	"movd	(%2), %%mm2		\n\t"
-	"movd	4(%2), %%mm3		\n\t"
-	"add	%4, %1			\n\t"
-	PAVGB" %%mm2, %%mm0		\n\t"
-	PAVGB" %%mm3, %%mm1		\n\t"
-	"movd	%%mm0, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"movd	%%mm1, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"movd	(%1), %%mm0		\n\t"
-	"add	%4, %1			\n\t"
-	"movd	(%1), %%mm1		\n\t"
-	"movd	8(%2), %%mm2		\n\t"
-	"movd	12(%2), %%mm3		\n\t"
-	"add	%4, %1			\n\t"
-	PAVGB" %%mm2, %%mm0		\n\t"
-	PAVGB" %%mm3, %%mm1		\n\t"
-	"movd	%%mm0, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"movd	%%mm1, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"add	$16, %2			\n\t"
-	"subl	$4, %0			\n\t"
-	"jnz	1b			\n\t"
+        "testl $1, %0                   \n\t"
+            " jz 1f                     \n\t"
+        "movd   (%1), %%mm0             \n\t"
+        "movd   (%2), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $4, %2                  \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        "movd   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        "1:                             \n\t"
+        "movd   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movd   (%1), %%mm1             \n\t"
+        "movd   (%2), %%mm2             \n\t"
+        "movd   4(%2), %%mm3            \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" %%mm2, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm1             \n\t"
+        "movd   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movd   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movd   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movd   (%1), %%mm1             \n\t"
+        "movd   8(%2), %%mm2            \n\t"
+        "movd   12(%2), %%mm3           \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" %%mm2, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm1             \n\t"
+        "movd   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movd   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "add    $16, %2                 \n\t"
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
 #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
-	:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #else
-	:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #endif
-	:"S"((long)src1Stride), "D"((long)dstStride)
-	:"memory"); 
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
 }
 
 
 static void DEF(put_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
 {
     __asm __volatile(
-	"testl $1, %0			\n\t"
-	    " jz 1f				\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	(%2), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	"add	$8, %2			\n\t"
-	PAVGB" %%mm1, %%mm0		\n\t"
-	"movq	%%mm0, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"decl	%0			\n\t"
-	"1:				\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"add	%4, %1			\n\t"
-	"movq	(%1), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	PAVGB" (%2), %%mm0		\n\t"
-	PAVGB" 8(%2), %%mm1		\n\t"
-	"movq	%%mm0, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"movq	%%mm1, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"add	%4, %1			\n\t"
-	"movq	(%1), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	PAVGB" 16(%2), %%mm0		\n\t"
-	PAVGB" 24(%2), %%mm1		\n\t"
-	"movq	%%mm0, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"movq	%%mm1, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"add	$32, %2			\n\t"
-	"subl	$4, %0			\n\t"
-	"jnz	1b			\n\t"
+        "testl $1, %0                   \n\t"
+            " jz 1f                     \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   (%2), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $8, %2                  \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%1), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" 8(%2), %%mm1             \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%1), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" 16(%2), %%mm0            \n\t"
+        PAVGB" 24(%2), %%mm1            \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "add    $32, %2                 \n\t"
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
 #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
-	:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #else
-	:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #endif
-	:"S"((long)src1Stride), "D"((long)dstStride)
-	:"memory"); 
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
 //the following should be used, though better not with gcc ...
-/*	:"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
-	:"r"(src1Stride), "r"(dstStride)
-	:"memory");*/
+/*        :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
+        :"r"(src1Stride), "r"(dstStride)
+        :"memory");*/
 }
 
 static void DEF(put_no_rnd_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
 {
     __asm __volatile(
-	"pcmpeqb %%mm6, %%mm6	\n\t"
-	"testl $1, %0			\n\t"
-	    " jz 1f				\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	(%2), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	"add	$8, %2			\n\t"
-	"pxor %%mm6, %%mm0		\n\t"
-	"pxor %%mm6, %%mm1		\n\t"
-	PAVGB" %%mm1, %%mm0		\n\t"
-	"pxor %%mm6, %%mm0		\n\t"
-	"movq	%%mm0, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"decl	%0			\n\t"
-	"1:				\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"add	%4, %1			\n\t"
-	"movq	(%1), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	"movq	(%2), %%mm2		\n\t"
-	"movq	8(%2), %%mm3		\n\t"
-	"pxor %%mm6, %%mm0		\n\t"
-	"pxor %%mm6, %%mm1		\n\t"
-	"pxor %%mm6, %%mm2		\n\t"
-	"pxor %%mm6, %%mm3		\n\t"
-	PAVGB" %%mm2, %%mm0		\n\t"
-	PAVGB" %%mm3, %%mm1		\n\t"
-	"pxor %%mm6, %%mm0		\n\t"
-	"pxor %%mm6, %%mm1		\n\t"
-	"movq	%%mm0, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"movq	%%mm1, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"add	%4, %1			\n\t"
-	"movq	(%1), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	"movq	16(%2), %%mm2		\n\t"
-	"movq	24(%2), %%mm3		\n\t"
-	"pxor %%mm6, %%mm0		\n\t"
-	"pxor %%mm6, %%mm1		\n\t"
-	"pxor %%mm6, %%mm2		\n\t"
-	"pxor %%mm6, %%mm3		\n\t"
-	PAVGB" %%mm2, %%mm0		\n\t"
-	PAVGB" %%mm3, %%mm1		\n\t"
-	"pxor %%mm6, %%mm0		\n\t"
-	"pxor %%mm6, %%mm1		\n\t"
-	"movq	%%mm0, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"movq	%%mm1, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"add	$32, %2			\n\t"
-	"subl	$4, %0			\n\t"
-	"jnz	1b			\n\t"
+        "pcmpeqb %%mm6, %%mm6           \n\t"
+        "testl $1, %0                   \n\t"
+            " jz 1f                     \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   (%2), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $8, %2                  \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%1), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%2), %%mm2             \n\t"
+        "movq   8(%2), %%mm3            \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "pxor %%mm6, %%mm2              \n\t"
+        "pxor %%mm6, %%mm3              \n\t"
+        PAVGB" %%mm2, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm1             \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%1), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   16(%2), %%mm2           \n\t"
+        "movq   24(%2), %%mm3           \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "pxor %%mm6, %%mm2              \n\t"
+        "pxor %%mm6, %%mm3              \n\t"
+        PAVGB" %%mm2, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm1             \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "add    $32, %2                 \n\t"
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
 #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
-	:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #else
-	:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #endif
-	:"S"((long)src1Stride), "D"((long)dstStride)
-	:"memory"); 
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
 //the following should be used, though better not with gcc ...
-/*	:"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
-	:"r"(src1Stride), "r"(dstStride)
-	:"memory");*/
+/*        :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
+        :"r"(src1Stride), "r"(dstStride)
+        :"memory");*/
 }
 
 static void DEF(avg_pixels4_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
 {
     __asm __volatile(
-	"testl $1, %0			\n\t"
-	    " jz 1f				\n\t"
-	"movd	(%1), %%mm0		\n\t"
-	"movd	(%2), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	"add	$4, %2			\n\t"
-	PAVGB" %%mm1, %%mm0		\n\t"
-	PAVGB" (%3), %%mm0		\n\t"
-	"movd	%%mm0, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"decl	%0			\n\t"
-	"1:				\n\t"
-	"movd	(%1), %%mm0		\n\t"
-	"add	%4, %1			\n\t"
-	"movd	(%1), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	PAVGB" (%2), %%mm0		\n\t"
-	PAVGB" 4(%2), %%mm1		\n\t"
-	PAVGB" (%3), %%mm0	 	\n\t"
-	"movd	%%mm0, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	PAVGB" (%3), %%mm1	 	\n\t"
-	"movd	%%mm1, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"movd	(%1), %%mm0		\n\t"
-	"add	%4, %1			\n\t"
-	"movd	(%1), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	PAVGB" 8(%2), %%mm0		\n\t"
-	PAVGB" 12(%2), %%mm1		\n\t"
-	PAVGB" (%3), %%mm0	 	\n\t"
-	"movd	%%mm0, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	PAVGB" (%3), %%mm1	 	\n\t"
-	"movd	%%mm1, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"add	$16, %2			\n\t"
-	"subl	$4, %0			\n\t"
-	"jnz	1b			\n\t"
+        "testl $1, %0                   \n\t"
+            " jz 1f                     \n\t"
+        "movd   (%1), %%mm0             \n\t"
+        "movd   (%2), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $4, %2                  \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" (%3), %%mm0              \n\t"
+        "movd   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        "1:                             \n\t"
+        "movd   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movd   (%1), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" 4(%2), %%mm1             \n\t"
+        PAVGB" (%3), %%mm0              \n\t"
+        "movd   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        PAVGB" (%3), %%mm1              \n\t"
+        "movd   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movd   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movd   (%1), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" 8(%2), %%mm0             \n\t"
+        PAVGB" 12(%2), %%mm1            \n\t"
+        PAVGB" (%3), %%mm0              \n\t"
+        "movd   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        PAVGB" (%3), %%mm1              \n\t"
+        "movd   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "add    $16, %2                 \n\t"
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
 #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
-	:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #else
-	:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #endif
-	:"S"((long)src1Stride), "D"((long)dstStride)
-	:"memory"); 
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
 }
 
 
 static void DEF(avg_pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
 {
     __asm __volatile(
-	"testl $1, %0			\n\t"
-	    " jz 1f				\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	(%2), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	"add	$8, %2			\n\t"
-	PAVGB" %%mm1, %%mm0		\n\t"
-	PAVGB" (%3), %%mm0		\n\t"
-	"movq	%%mm0, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"decl	%0			\n\t"
-	"1:				\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"add	%4, %1			\n\t"
-	"movq	(%1), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	PAVGB" (%2), %%mm0		\n\t"
-	PAVGB" 8(%2), %%mm1		\n\t"
-	PAVGB" (%3), %%mm0	 	\n\t"
-	"movq	%%mm0, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	PAVGB" (%3), %%mm1	 	\n\t"
-	"movq	%%mm1, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"add	%4, %1			\n\t"
-	"movq	(%1), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	PAVGB" 16(%2), %%mm0		\n\t"
-	PAVGB" 24(%2), %%mm1		\n\t"
-	PAVGB" (%3), %%mm0	 	\n\t"
-	"movq	%%mm0, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	PAVGB" (%3), %%mm1	 	\n\t"
-	"movq	%%mm1, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"add	$32, %2			\n\t"
-	"subl	$4, %0			\n\t"
-	"jnz	1b			\n\t"
+        "testl $1, %0                   \n\t"
+            " jz 1f                     \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   (%2), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $8, %2                  \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" (%3), %%mm0              \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%1), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" 8(%2), %%mm1             \n\t"
+        PAVGB" (%3), %%mm0              \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        PAVGB" (%3), %%mm1              \n\t"
+        "movq   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%1), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" 16(%2), %%mm0            \n\t"
+        PAVGB" 24(%2), %%mm1            \n\t"
+        PAVGB" (%3), %%mm0              \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        PAVGB" (%3), %%mm1              \n\t"
+        "movq   %%mm1, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "add    $32, %2                 \n\t"
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
 #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
-	:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #else
-	:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #endif
-	:"S"((long)src1Stride), "D"((long)dstStride)
-	:"memory"); 
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
 //the following should be used, though better not with gcc ...
-/*	:"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
-	:"r"(src1Stride), "r"(dstStride)
-	:"memory");*/
+/*        :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
+        :"r"(src1Stride), "r"(dstStride)
+        :"memory");*/
 }
 
 static void DEF(put_pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
     __asm __volatile(
-	"lea (%3, %3), %%"REG_a"	\n\t"
-	"1:				\n\t"
-	"movq (%1), %%mm0		\n\t"
-	"movq (%1, %3), %%mm1		\n\t"
-	"movq 8(%1), %%mm2		\n\t"
-	"movq 8(%1, %3), %%mm3		\n\t"
-	PAVGB" 1(%1), %%mm0		\n\t"
-	PAVGB" 1(%1, %3), %%mm1		\n\t"
-	PAVGB" 9(%1), %%mm2		\n\t"
-	PAVGB" 9(%1, %3), %%mm3		\n\t"
-	"movq %%mm0, (%2)		\n\t"
-	"movq %%mm1, (%2, %3)		\n\t"
-	"movq %%mm2, 8(%2)		\n\t"
-	"movq %%mm3, 8(%2, %3)		\n\t"
-	"add %%"REG_a", %1		\n\t"
-	"add %%"REG_a", %2		\n\t"
-	"movq (%1), %%mm0		\n\t"
-	"movq (%1, %3), %%mm1		\n\t"
-	"movq 8(%1), %%mm2		\n\t"
-	"movq 8(%1, %3), %%mm3		\n\t"
-	PAVGB" 1(%1), %%mm0		\n\t"
-	PAVGB" 1(%1, %3), %%mm1		\n\t"
-	PAVGB" 9(%1), %%mm2		\n\t"
-	PAVGB" 9(%1, %3), %%mm3		\n\t"
-	"add %%"REG_a", %1		\n\t"
-	"movq %%mm0, (%2)		\n\t"
-	"movq %%mm1, (%2, %3)		\n\t"
-	"movq %%mm2, 8(%2)		\n\t"
-	"movq %%mm3, 8(%2, %3)		\n\t"
-	"add %%"REG_a", %2		\n\t"
-	"subl $4, %0			\n\t"
-	"jnz 1b				\n\t"
-	:"+g"(h), "+S"(pixels), "+D"(block)
-	:"r" ((long)line_size)
-	:"%"REG_a, "memory");
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "1:                             \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "movq 8(%1), %%mm2              \n\t"
+        "movq 8(%1, %3), %%mm3          \n\t"
+        PAVGB" 1(%1), %%mm0             \n\t"
+        PAVGB" 1(%1, %3), %%mm1         \n\t"
+        PAVGB" 9(%1), %%mm2             \n\t"
+        PAVGB" 9(%1, %3), %%mm3         \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm1, (%2, %3)           \n\t"
+        "movq %%mm2, 8(%2)              \n\t"
+        "movq %%mm3, 8(%2, %3)          \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "movq 8(%1), %%mm2              \n\t"
+        "movq 8(%1, %3), %%mm3          \n\t"
+        PAVGB" 1(%1), %%mm0             \n\t"
+        PAVGB" 1(%1, %3), %%mm1         \n\t"
+        PAVGB" 9(%1), %%mm2             \n\t"
+        PAVGB" 9(%1, %3), %%mm3         \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm1, (%2, %3)           \n\t"
+        "movq %%mm2, 8(%2)              \n\t"
+        "movq %%mm3, 8(%2, %3)          \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "subl $4, %0                    \n\t"
+        "jnz 1b                         \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r" ((long)line_size)
+        :"%"REG_a, "memory");
 }
 
 static void DEF(put_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
 {
     __asm __volatile(
-	"testl $1, %0			\n\t"
-	    " jz 1f				\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	8(%1), %%mm1		\n\t"
-	PAVGB" (%2), %%mm0		\n\t"
-	PAVGB" 8(%2), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	"add	$16, %2			\n\t"
-	"movq	%%mm0, (%3)		\n\t"
-	"movq	%%mm1, 8(%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"decl	%0			\n\t"
-	"1:				\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	8(%1), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	PAVGB" (%2), %%mm0		\n\t"
-	PAVGB" 8(%2), %%mm1		\n\t"
-	"movq	%%mm0, (%3)		\n\t"
-	"movq	%%mm1, 8(%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	8(%1), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	PAVGB" 16(%2), %%mm0		\n\t"
-	PAVGB" 24(%2), %%mm1		\n\t"
-	"movq	%%mm0, (%3)		\n\t"
-	"movq	%%mm1, 8(%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"add	$32, %2			\n\t"
-	"subl	$2, %0			\n\t"
-	"jnz	1b			\n\t"
+        "testl $1, %0                   \n\t"
+            " jz 1f                     \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   8(%1), %%mm1            \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" 8(%2), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $16, %2                 \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "movq   %%mm1, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   8(%1), %%mm1            \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" 8(%2), %%mm1             \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "movq   %%mm1, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   8(%1), %%mm1            \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" 16(%2), %%mm0            \n\t"
+        PAVGB" 24(%2), %%mm1            \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "movq   %%mm1, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "add    $32, %2                 \n\t"
+        "subl   $2, %0                  \n\t"
+        "jnz    1b                      \n\t"
 #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
-	:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #else
-	:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #endif
-	:"S"((long)src1Stride), "D"((long)dstStride)
-	:"memory"); 
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
 //the following should be used, though better not with gcc ...
-/*	:"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
-	:"r"(src1Stride), "r"(dstStride)
-	:"memory");*/
+/*        :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
+        :"r"(src1Stride), "r"(dstStride)
+        :"memory");*/
 }
 
 static void DEF(avg_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
 {
     __asm __volatile(
-	"testl $1, %0			\n\t"
-	    " jz 1f				\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	8(%1), %%mm1		\n\t"
-	PAVGB" (%2), %%mm0		\n\t"
-	PAVGB" 8(%2), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	"add	$16, %2			\n\t"
-	PAVGB" (%3), %%mm0		\n\t"
-	PAVGB" 8(%3), %%mm1		\n\t"
-	"movq	%%mm0, (%3)		\n\t"
-	"movq	%%mm1, 8(%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"decl	%0			\n\t"
-	"1:				\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	8(%1), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	PAVGB" (%2), %%mm0		\n\t"
-	PAVGB" 8(%2), %%mm1		\n\t"
-	PAVGB" (%3), %%mm0		\n\t"
-	PAVGB" 8(%3), %%mm1		\n\t"
-	"movq	%%mm0, (%3)		\n\t"
-	"movq	%%mm1, 8(%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	8(%1), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	PAVGB" 16(%2), %%mm0		\n\t"
-	PAVGB" 24(%2), %%mm1		\n\t"
-	PAVGB" (%3), %%mm0		\n\t"
-	PAVGB" 8(%3), %%mm1		\n\t"
-	"movq	%%mm0, (%3)		\n\t"
-	"movq	%%mm1, 8(%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"add	$32, %2			\n\t"
-	"subl	$2, %0			\n\t"
-	"jnz	1b			\n\t"
+        "testl $1, %0                   \n\t"
+            " jz 1f                     \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   8(%1), %%mm1            \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" 8(%2), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $16, %2                 \n\t"
+        PAVGB" (%3), %%mm0              \n\t"
+        PAVGB" 8(%3), %%mm1             \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "movq   %%mm1, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   8(%1), %%mm1            \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" 8(%2), %%mm1             \n\t"
+        PAVGB" (%3), %%mm0              \n\t"
+        PAVGB" 8(%3), %%mm1             \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "movq   %%mm1, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   8(%1), %%mm1            \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGB" 16(%2), %%mm0            \n\t"
+        PAVGB" 24(%2), %%mm1            \n\t"
+        PAVGB" (%3), %%mm0              \n\t"
+        PAVGB" 8(%3), %%mm1             \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "movq   %%mm1, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "add    $32, %2                 \n\t"
+        "subl   $2, %0                  \n\t"
+        "jnz    1b                      \n\t"
 #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
-	:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #else
-	:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #endif
-	:"S"((long)src1Stride), "D"((long)dstStride)
-	:"memory"); 
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
 //the following should be used, though better not with gcc ...
-/*	:"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
-	:"r"(src1Stride), "r"(dstStride)
-	:"memory");*/
+/*        :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
+        :"r"(src1Stride), "r"(dstStride)
+        :"memory");*/
 }
 
 static void DEF(put_no_rnd_pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
 {
     __asm __volatile(
-	"pcmpeqb %%mm6, %%mm6\n\t"
-	"testl $1, %0			\n\t"
-	    " jz 1f				\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	8(%1), %%mm1		\n\t"
-	"movq	(%2), %%mm2		\n\t"
-	"movq	8(%2), %%mm3		\n\t"
-	"pxor %%mm6, %%mm0		\n\t"
-	"pxor %%mm6, %%mm1		\n\t"
-	"pxor %%mm6, %%mm2		\n\t"
-	"pxor %%mm6, %%mm3		\n\t"
-	PAVGB" %%mm2, %%mm0		\n\t"
-	PAVGB" %%mm3, %%mm1		\n\t"
-	"pxor %%mm6, %%mm0		\n\t"
-	"pxor %%mm6, %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	"add	$16, %2			\n\t"
-	"movq	%%mm0, (%3)		\n\t"
-	"movq	%%mm1, 8(%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"decl	%0			\n\t"
-	"1:				\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	8(%1), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	"movq	(%2), %%mm2		\n\t"
-	"movq	8(%2), %%mm3		\n\t"
-	"pxor %%mm6, %%mm0		\n\t"
-	"pxor %%mm6, %%mm1		\n\t"
-	"pxor %%mm6, %%mm2		\n\t"
-	"pxor %%mm6, %%mm3		\n\t"
-	PAVGB" %%mm2, %%mm0		\n\t"
-	PAVGB" %%mm3, %%mm1		\n\t"
-	"pxor %%mm6, %%mm0		\n\t"
-	"pxor %%mm6, %%mm1		\n\t"
-	"movq	%%mm0, (%3)		\n\t"
-	"movq	%%mm1, 8(%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	8(%1), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	"movq	16(%2), %%mm2		\n\t"
-	"movq	24(%2), %%mm3		\n\t"
-	"pxor %%mm6, %%mm0		\n\t"
-	"pxor %%mm6, %%mm1		\n\t"
-	"pxor %%mm6, %%mm2		\n\t"
-	"pxor %%mm6, %%mm3		\n\t"
-	PAVGB" %%mm2, %%mm0		\n\t"
-	PAVGB" %%mm3, %%mm1		\n\t"
-	"pxor %%mm6, %%mm0		\n\t"
-	"pxor %%mm6, %%mm1		\n\t"
-	"movq	%%mm0, (%3)		\n\t"
-	"movq	%%mm1, 8(%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"add	$32, %2			\n\t"
-	"subl	$2, %0			\n\t"
-	"jnz	1b			\n\t"
+        "pcmpeqb %%mm6, %%mm6           \n\t"
+        "testl $1, %0                   \n\t"
+            " jz 1f                     \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   8(%1), %%mm1            \n\t"
+        "movq   (%2), %%mm2             \n\t"
+        "movq   8(%2), %%mm3            \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "pxor %%mm6, %%mm2              \n\t"
+        "pxor %%mm6, %%mm3              \n\t"
+        PAVGB" %%mm2, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm1             \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $16, %2                 \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "movq   %%mm1, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   8(%1), %%mm1            \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%2), %%mm2             \n\t"
+        "movq   8(%2), %%mm3            \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "pxor %%mm6, %%mm2              \n\t"
+        "pxor %%mm6, %%mm3              \n\t"
+        PAVGB" %%mm2, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm1             \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "movq   %%mm1, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   8(%1), %%mm1            \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   16(%2), %%mm2           \n\t"
+        "movq   24(%2), %%mm3           \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "pxor %%mm6, %%mm2              \n\t"
+        "pxor %%mm6, %%mm3              \n\t"
+        PAVGB" %%mm2, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm1             \n\t"
+        "pxor %%mm6, %%mm0              \n\t"
+        "pxor %%mm6, %%mm1              \n\t"
+        "movq   %%mm0, (%3)             \n\t"
+        "movq   %%mm1, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "add    $32, %2                 \n\t"
+        "subl   $2, %0                  \n\t"
+        "jnz    1b                      \n\t"
 #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
-	:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #else
-	:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #endif
-	:"S"((long)src1Stride), "D"((long)dstStride)
-	:"memory"); 
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
 //the following should be used, though better not with gcc ...
-/*	:"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
-	:"r"(src1Stride), "r"(dstStride)
-	:"memory");*/
+/*        :"+g"(h), "+r"(src1), "+r"(src2), "+r"(dst)
+        :"r"(src1Stride), "r"(dstStride)
+        :"memory");*/
 }
- 
+
 /* GL: this function does incorrect rounding if overflow */
 static void DEF(put_no_rnd_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
     MOVQ_BONE(mm6);
     __asm __volatile(
-	"lea (%3, %3), %%"REG_a"	\n\t"
-	"1:				\n\t"
-	"movq (%1), %%mm0		\n\t"
-	"movq (%1, %3), %%mm2		\n\t"
-	"movq 1(%1), %%mm1		\n\t"
-	"movq 1(%1, %3), %%mm3		\n\t"
-	"add %%"REG_a", %1		\n\t"
-	"psubusb %%mm6, %%mm0		\n\t"
-	"psubusb %%mm6, %%mm2		\n\t"
-	PAVGB" %%mm1, %%mm0		\n\t"
-	PAVGB" %%mm3, %%mm2		\n\t"
-	"movq %%mm0, (%2)		\n\t"
-	"movq %%mm2, (%2, %3)		\n\t"
-	"movq (%1), %%mm0		\n\t"
-	"movq 1(%1), %%mm1		\n\t"
-	"movq (%1, %3), %%mm2		\n\t"
-	"movq 1(%1, %3), %%mm3		\n\t"
-	"add %%"REG_a", %2		\n\t"
-	"add %%"REG_a", %1		\n\t"
-	"psubusb %%mm6, %%mm0		\n\t"
-	"psubusb %%mm6, %%mm2		\n\t"
-	PAVGB" %%mm1, %%mm0		\n\t"
-	PAVGB" %%mm3, %%mm2		\n\t"
-	"movq %%mm0, (%2)		\n\t"
-	"movq %%mm2, (%2, %3)		\n\t"
-	"add %%"REG_a", %2		\n\t"
-	"subl $4, %0			\n\t"
-	"jnz 1b				\n\t"
-	:"+g"(h), "+S"(pixels), "+D"(block)
-	:"r" ((long)line_size)
-	:"%"REG_a, "memory");
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "1:                             \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "movq (%1, %3), %%mm2           \n\t"
+        "movq 1(%1), %%mm1              \n\t"
+        "movq 1(%1, %3), %%mm3          \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "psubusb %%mm6, %%mm0           \n\t"
+        "psubusb %%mm6, %%mm2           \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm2             \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm2, (%2, %3)           \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "movq 1(%1), %%mm1              \n\t"
+        "movq (%1, %3), %%mm2           \n\t"
+        "movq 1(%1, %3), %%mm3          \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "psubusb %%mm6, %%mm0           \n\t"
+        "psubusb %%mm6, %%mm2           \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm2             \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm2, (%2, %3)           \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "subl $4, %0                    \n\t"
+        "jnz 1b                         \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r" ((long)line_size)
+        :"%"REG_a, "memory");
 }
 
 static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
     __asm __volatile(
-	"lea (%3, %3), %%"REG_a"	\n\t"
-	"movq (%1), %%mm0		\n\t"
-	"sub %3, %2			\n\t"
-	"1:				\n\t"
-	"movq (%1, %3), %%mm1		\n\t"
-	"movq (%1, %%"REG_a"), %%mm2	\n\t"
-	"add %%"REG_a", %1		\n\t"
-	PAVGB" %%mm1, %%mm0		\n\t"
-	PAVGB" %%mm2, %%mm1		\n\t"
-	"movq %%mm0, (%2, %3)		\n\t"
-	"movq %%mm1, (%2, %%"REG_a")	\n\t"
-	"movq (%1, %3), %%mm1		\n\t"
-	"movq (%1, %%"REG_a"), %%mm0	\n\t"
-	"add %%"REG_a", %2		\n\t"
-	"add %%"REG_a", %1		\n\t"
-	PAVGB" %%mm1, %%mm2		\n\t"
-	PAVGB" %%mm0, %%mm1		\n\t"
-	"movq %%mm2, (%2, %3)		\n\t"
-	"movq %%mm1, (%2, %%"REG_a")	\n\t"
-	"add %%"REG_a", %2		\n\t"
-	"subl $4, %0			\n\t"
-	"jnz 1b				\n\t"
-	:"+g"(h), "+S"(pixels), "+D" (block)
-	:"r" ((long)line_size)
-	:"%"REG_a, "memory");
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "sub %3, %2                     \n\t"
+        "1:                             \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "movq (%1, %%"REG_a"), %%mm2    \n\t"
+        "add %%"REG_a", %1              \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm2, %%mm1             \n\t"
+        "movq %%mm0, (%2, %3)           \n\t"
+        "movq %%mm1, (%2, %%"REG_a")    \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "add %%"REG_a", %1              \n\t"
+        PAVGB" %%mm1, %%mm2             \n\t"
+        PAVGB" %%mm0, %%mm1             \n\t"
+        "movq %%mm2, (%2, %3)           \n\t"
+        "movq %%mm1, (%2, %%"REG_a")    \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "subl $4, %0                    \n\t"
+        "jnz 1b                         \n\t"
+        :"+g"(h), "+S"(pixels), "+D" (block)
+        :"r" ((long)line_size)
+        :"%"REG_a, "memory");
 }
 
 /* GL: this function does incorrect rounding if overflow */
@@ -619,173 +619,173 @@ static void DEF(put_no_rnd_pixels8_y2)(uint8_t *block, const uint8_t *pixels, in
 {
     MOVQ_BONE(mm6);
     __asm __volatile(
-	"lea (%3, %3), %%"REG_a"	\n\t"
-	"movq (%1), %%mm0		\n\t"
-	"sub %3, %2			\n\t"
-	"1:				\n\t"
-	"movq (%1, %3), %%mm1		\n\t"
-	"movq (%1, %%"REG_a"), %%mm2	\n\t"
-	"add %%"REG_a", %1		\n\t"
-	"psubusb %%mm6, %%mm1		\n\t"
-	PAVGB" %%mm1, %%mm0		\n\t"
-	PAVGB" %%mm2, %%mm1		\n\t"
-	"movq %%mm0, (%2, %3)		\n\t"
-	"movq %%mm1, (%2, %%"REG_a")	\n\t"
-	"movq (%1, %3), %%mm1		\n\t"
-	"movq (%1, %%"REG_a"), %%mm0	\n\t"
-	"add %%"REG_a", %2		\n\t"
-	"add %%"REG_a", %1		\n\t"
-	"psubusb %%mm6, %%mm1		\n\t"
-	PAVGB" %%mm1, %%mm2		\n\t"
-	PAVGB" %%mm0, %%mm1		\n\t"
-	"movq %%mm2, (%2, %3)		\n\t"
-	"movq %%mm1, (%2, %%"REG_a")	\n\t"
-	"add %%"REG_a", %2		\n\t"
-	"subl $4, %0			\n\t"
-	"jnz 1b				\n\t"
-	:"+g"(h), "+S"(pixels), "+D" (block)
-	:"r" ((long)line_size)
-	:"%"REG_a, "memory");
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "sub %3, %2                     \n\t"
+        "1:                             \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "movq (%1, %%"REG_a"), %%mm2    \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "psubusb %%mm6, %%mm1           \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm2, %%mm1             \n\t"
+        "movq %%mm0, (%2, %3)           \n\t"
+        "movq %%mm1, (%2, %%"REG_a")    \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "psubusb %%mm6, %%mm1           \n\t"
+        PAVGB" %%mm1, %%mm2             \n\t"
+        PAVGB" %%mm0, %%mm1             \n\t"
+        "movq %%mm2, (%2, %3)           \n\t"
+        "movq %%mm1, (%2, %%"REG_a")    \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "subl $4, %0                    \n\t"
+        "jnz 1b                         \n\t"
+        :"+g"(h), "+S"(pixels), "+D" (block)
+        :"r" ((long)line_size)
+        :"%"REG_a, "memory");
 }
 
 static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
     __asm __volatile(
-	"lea (%3, %3), %%"REG_a"	\n\t"
-	"1:				\n\t"
-	"movq (%2), %%mm0		\n\t"
-	"movq (%2, %3), %%mm1		\n\t"
-	PAVGB" (%1), %%mm0		\n\t"
-	PAVGB" (%1, %3), %%mm1		\n\t"
-	"movq %%mm0, (%2)		\n\t"
-	"movq %%mm1, (%2, %3)		\n\t"
-	"add %%"REG_a", %1		\n\t"
-	"add %%"REG_a", %2		\n\t"
-	"movq (%2), %%mm0		\n\t"
-	"movq (%2, %3), %%mm1		\n\t"
-	PAVGB" (%1), %%mm0		\n\t"
-	PAVGB" (%1, %3), %%mm1		\n\t"
-	"add %%"REG_a", %1		\n\t"
-	"movq %%mm0, (%2)		\n\t"
-	"movq %%mm1, (%2, %3)		\n\t"
-	"add %%"REG_a", %2		\n\t"
-	"subl $4, %0			\n\t"
-	"jnz 1b				\n\t"
-	:"+g"(h), "+S"(pixels), "+D"(block)
-	:"r" ((long)line_size)
-	:"%"REG_a, "memory");
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "1:                             \n\t"
+        "movq (%2), %%mm0               \n\t"
+        "movq (%2, %3), %%mm1           \n\t"
+        PAVGB" (%1), %%mm0              \n\t"
+        PAVGB" (%1, %3), %%mm1          \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm1, (%2, %3)           \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "movq (%2), %%mm0               \n\t"
+        "movq (%2, %3), %%mm1           \n\t"
+        PAVGB" (%1), %%mm0              \n\t"
+        PAVGB" (%1, %3), %%mm1          \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm1, (%2, %3)           \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "subl $4, %0                    \n\t"
+        "jnz 1b                         \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r" ((long)line_size)
+        :"%"REG_a, "memory");
 }
 
 static void DEF(avg_pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
     __asm __volatile(
-	"lea (%3, %3), %%"REG_a"	\n\t"
-	"1:				\n\t"
-	"movq (%1), %%mm0		\n\t"
-	"movq (%1, %3), %%mm2		\n\t"
-	PAVGB" 1(%1), %%mm0		\n\t"
-	PAVGB" 1(%1, %3), %%mm2		\n\t"
-	PAVGB" (%2), %%mm0		\n\t"
-	PAVGB" (%2, %3), %%mm2		\n\t"
-	"add %%"REG_a", %1		\n\t"
-	"movq %%mm0, (%2)		\n\t"
-	"movq %%mm2, (%2, %3)		\n\t"
-	"movq (%1), %%mm0		\n\t"
-	"movq (%1, %3), %%mm2		\n\t"
-	PAVGB" 1(%1), %%mm0		\n\t"
-	PAVGB" 1(%1, %3), %%mm2		\n\t"
-	"add %%"REG_a", %2		\n\t"
-	"add %%"REG_a", %1		\n\t"
-	PAVGB" (%2), %%mm0		\n\t"
-	PAVGB" (%2, %3), %%mm2		\n\t"
-	"movq %%mm0, (%2)		\n\t"
-	"movq %%mm2, (%2, %3)		\n\t"
-	"add %%"REG_a", %2		\n\t"
-	"subl $4, %0			\n\t"
-	"jnz 1b				\n\t"
-	:"+g"(h), "+S"(pixels), "+D"(block)
-	:"r" ((long)line_size)
-	:"%"REG_a, "memory");
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "1:                             \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "movq (%1, %3), %%mm2           \n\t"
+        PAVGB" 1(%1), %%mm0             \n\t"
+        PAVGB" 1(%1, %3), %%mm2         \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" (%2, %3), %%mm2          \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm2, (%2, %3)           \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "movq (%1, %3), %%mm2           \n\t"
+        PAVGB" 1(%1), %%mm0             \n\t"
+        PAVGB" 1(%1, %3), %%mm2         \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "add %%"REG_a", %1              \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" (%2, %3), %%mm2          \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm2, (%2, %3)           \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "subl $4, %0                    \n\t"
+        "jnz 1b                         \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r" ((long)line_size)
+        :"%"REG_a, "memory");
 }
 
 static void DEF(avg_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
     __asm __volatile(
-	"lea (%3, %3), %%"REG_a"	\n\t"
-	"movq (%1), %%mm0		\n\t"
-	"sub %3, %2			\n\t"
-	"1:				\n\t"
-	"movq (%1, %3), %%mm1		\n\t"
-	"movq (%1, %%"REG_a"), %%mm2	\n\t"
-	"add %%"REG_a", %1		\n\t"
-	PAVGB" %%mm1, %%mm0		\n\t"
-	PAVGB" %%mm2, %%mm1		\n\t"
-	"movq (%2, %3), %%mm3		\n\t"
-	"movq (%2, %%"REG_a"), %%mm4	\n\t"
-	PAVGB" %%mm3, %%mm0		\n\t"
-	PAVGB" %%mm4, %%mm1		\n\t"
-	"movq %%mm0, (%2, %3)		\n\t"
-	"movq %%mm1, (%2, %%"REG_a")	\n\t"
-	"movq (%1, %3), %%mm1		\n\t"
-	"movq (%1, %%"REG_a"), %%mm0	\n\t"
-	PAVGB" %%mm1, %%mm2		\n\t"
-	PAVGB" %%mm0, %%mm1		\n\t"
-	"add %%"REG_a", %2		\n\t"
-	"add %%"REG_a", %1		\n\t"
-	"movq (%2, %3), %%mm3		\n\t"
-	"movq (%2, %%"REG_a"), %%mm4	\n\t"
-	PAVGB" %%mm3, %%mm2		\n\t"
-	PAVGB" %%mm4, %%mm1		\n\t"
-	"movq %%mm2, (%2, %3)		\n\t"
-	"movq %%mm1, (%2, %%"REG_a")	\n\t"
-	"add %%"REG_a", %2		\n\t"
-	"subl $4, %0			\n\t"
-	"jnz 1b				\n\t"
-	:"+g"(h), "+S"(pixels), "+D"(block)
-	:"r" ((long)line_size)
-	:"%"REG_a, "memory");
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "movq (%1), %%mm0               \n\t"
+        "sub %3, %2                     \n\t"
+        "1:                             \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "movq (%1, %%"REG_a"), %%mm2    \n\t"
+        "add %%"REG_a", %1              \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm2, %%mm1             \n\t"
+        "movq (%2, %3), %%mm3           \n\t"
+        "movq (%2, %%"REG_a"), %%mm4    \n\t"
+        PAVGB" %%mm3, %%mm0             \n\t"
+        PAVGB" %%mm4, %%mm1             \n\t"
+        "movq %%mm0, (%2, %3)           \n\t"
+        "movq %%mm1, (%2, %%"REG_a")    \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        PAVGB" %%mm1, %%mm2             \n\t"
+        PAVGB" %%mm0, %%mm1             \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "add %%"REG_a", %1              \n\t"
+        "movq (%2, %3), %%mm3           \n\t"
+        "movq (%2, %%"REG_a"), %%mm4    \n\t"
+        PAVGB" %%mm3, %%mm2             \n\t"
+        PAVGB" %%mm4, %%mm1             \n\t"
+        "movq %%mm2, (%2, %3)           \n\t"
+        "movq %%mm1, (%2, %%"REG_a")    \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "subl $4, %0                    \n\t"
+        "jnz 1b                         \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r" ((long)line_size)
+        :"%"REG_a, "memory");
 }
 
-// Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter 
+// Note this is not correctly rounded, but this function is only used for b frames so it doesnt matter
 static void DEF(avg_pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
     MOVQ_BONE(mm6);
     __asm __volatile(
-	"lea (%3, %3), %%"REG_a"	\n\t"
-	"movq (%1), %%mm0		\n\t"
-	PAVGB" 1(%1), %%mm0		\n\t"
-	".balign 8			\n\t"
-	"1:				\n\t"
-	"movq (%1, %%"REG_a"), %%mm2	\n\t"
-	"movq (%1, %3), %%mm1		\n\t"
-	"psubusb %%mm6, %%mm2		\n\t"
-	PAVGB" 1(%1, %3), %%mm1		\n\t"
-	PAVGB" 1(%1, %%"REG_a"), %%mm2	\n\t"
-	"add %%"REG_a", %1		\n\t"
-	PAVGB" %%mm1, %%mm0		\n\t"
-	PAVGB" %%mm2, %%mm1		\n\t"
-	PAVGB" (%2), %%mm0		\n\t"
-	PAVGB" (%2, %3), %%mm1		\n\t"
-	"movq %%mm0, (%2)		\n\t"
-	"movq %%mm1, (%2, %3)		\n\t"
-	"movq (%1, %3), %%mm1		\n\t"
-	"movq (%1, %%"REG_a"), %%mm0	\n\t"
-	PAVGB" 1(%1, %3), %%mm1		\n\t"
-	PAVGB" 1(%1, %%"REG_a"), %%mm0	\n\t"
-	"add %%"REG_a", %2		\n\t"
-	"add %%"REG_a", %1		\n\t"
-	PAVGB" %%mm1, %%mm2		\n\t"
-	PAVGB" %%mm0, %%mm1		\n\t"
-	PAVGB" (%2), %%mm2		\n\t"
-	PAVGB" (%2, %3), %%mm1		\n\t"
-	"movq %%mm2, (%2)		\n\t"
-	"movq %%mm1, (%2, %3)		\n\t"
-	"add %%"REG_a", %2		\n\t"
-	"subl $4, %0			\n\t"
-	"jnz 1b				\n\t"
-	:"+g"(h), "+S"(pixels), "+D"(block)
-	:"r" ((long)line_size)
-	:"%"REG_a,  "memory");
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "movq (%1), %%mm0               \n\t"
+        PAVGB" 1(%1), %%mm0             \n\t"
+        ".balign 8                      \n\t"
+        "1:                             \n\t"
+        "movq (%1, %%"REG_a"), %%mm2    \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "psubusb %%mm6, %%mm2           \n\t"
+        PAVGB" 1(%1, %3), %%mm1         \n\t"
+        PAVGB" 1(%1, %%"REG_a"), %%mm2  \n\t"
+        "add %%"REG_a", %1              \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm2, %%mm1             \n\t"
+        PAVGB" (%2), %%mm0              \n\t"
+        PAVGB" (%2, %3), %%mm1          \n\t"
+        "movq %%mm0, (%2)               \n\t"
+        "movq %%mm1, (%2, %3)           \n\t"
+        "movq (%1, %3), %%mm1           \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        PAVGB" 1(%1, %3), %%mm1         \n\t"
+        PAVGB" 1(%1, %%"REG_a"), %%mm0  \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "add %%"REG_a", %1              \n\t"
+        PAVGB" %%mm1, %%mm2             \n\t"
+        PAVGB" %%mm0, %%mm1             \n\t"
+        PAVGB" (%2), %%mm2              \n\t"
+        PAVGB" (%2, %3), %%mm1          \n\t"
+        "movq %%mm2, (%2)               \n\t"
+        "movq %%mm1, (%2, %3)           \n\t"
+        "add %%"REG_a", %2              \n\t"
+        "subl $4, %0                    \n\t"
+        "jnz 1b                         \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r" ((long)line_size)
+        :"%"REG_a,  "memory");
 }
 
 //FIXME the following could be optimized too ...
diff --git a/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h b/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h
index a56374b63..3ecd776b8 100644
--- a/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h
+++ b/src/libffmpeg/libavcodec/i386/dsputil_mmx_rnd.h
@@ -15,7 +15,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
  * mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
@@ -27,206 +27,206 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line
 {
     MOVQ_BFE(mm6);
     __asm __volatile(
-	"lea	(%3, %3), %%"REG_a"	\n\t"
-	".balign 8			\n\t"
-	"1:				\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	1(%1), %%mm1		\n\t"
-	"movq	(%1, %3), %%mm2		\n\t"
-	"movq	1(%1, %3), %%mm3	\n\t"
-	PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
-	"movq	%%mm4, (%2)		\n\t"
-	"movq	%%mm5, (%2, %3)		\n\t"
-	"add	%%"REG_a", %1		\n\t"
-	"add	%%"REG_a", %2		\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	1(%1), %%mm1		\n\t"
-	"movq	(%1, %3), %%mm2		\n\t"
-	"movq	1(%1, %3), %%mm3	\n\t"
-	PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
-	"movq	%%mm4, (%2)		\n\t"
-	"movq	%%mm5, (%2, %3)		\n\t"
-	"add	%%"REG_a", %1		\n\t"
-	"add	%%"REG_a", %2		\n\t"
-	"subl	$4, %0			\n\t"
-	"jnz	1b			\n\t"
-	:"+g"(h), "+S"(pixels), "+D"(block)
-	:"r"((long)line_size)
-	:REG_a, "memory");
+        "lea    (%3, %3), %%"REG_a"     \n\t"
+        ".balign 8                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   1(%1), %%mm1            \n\t"
+        "movq   (%1, %3), %%mm2         \n\t"
+        "movq   1(%1, %3), %%mm3        \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%2)             \n\t"
+        "movq   %%mm5, (%2, %3)         \n\t"
+        "add    %%"REG_a", %1           \n\t"
+        "add    %%"REG_a", %2           \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   1(%1), %%mm1            \n\t"
+        "movq   (%1, %3), %%mm2         \n\t"
+        "movq   1(%1, %3), %%mm3        \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%2)             \n\t"
+        "movq   %%mm5, (%2, %3)         \n\t"
+        "add    %%"REG_a", %1           \n\t"
+        "add    %%"REG_a", %2           \n\t"
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r"((long)line_size)
+        :REG_a, "memory");
 }
 
 static void attribute_unused DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
 {
     MOVQ_BFE(mm6);
     __asm __volatile(
-	"testl $1, %0			\n\t"
-        " jz 1f				\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	(%2), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	"add	$8, %2			\n\t"
-	PAVGB(%%mm0, %%mm1, %%mm4, %%mm6)
-	"movq	%%mm4, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-        "decl	%0			\n\t"
-	".balign 8			\n\t"
-	"1:				\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	(%2), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	"movq	(%1), %%mm2		\n\t"
-	"movq	8(%2), %%mm3		\n\t"
-	"add	%4, %1			\n\t"
-	PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
-	"movq	%%mm4, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"movq	%%mm5, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	16(%2), %%mm1		\n\t"
-	"add	%4, %1			\n\t"
-	"movq	(%1), %%mm2		\n\t"
-	"movq	24(%2), %%mm3		\n\t"
-	"add	%4, %1			\n\t"
-	"add	$32, %2			\n\t"
-	PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
-	"movq	%%mm4, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"movq	%%mm5, (%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"subl	$4, %0			\n\t"
-	"jnz	1b			\n\t"
+        "testl $1, %0                   \n\t"
+        " jz 1f                         \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   (%2), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $8, %2                  \n\t"
+        PAVGB(%%mm0, %%mm1, %%mm4, %%mm6)
+        "movq   %%mm4, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        ".balign 8                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   (%2), %%mm1             \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%1), %%mm2             \n\t"
+        "movq   8(%2), %%mm3            \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   %%mm5, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   16(%2), %%mm1           \n\t"
+        "add    %4, %1                  \n\t"
+        "movq   (%1), %%mm2             \n\t"
+        "movq   24(%2), %%mm3           \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $32, %2                 \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   %%mm5, (%3)             \n\t"
+        "add    %5, %3                  \n\t"
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
 #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
         :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #else
         :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #endif
-	:"S"((long)src1Stride), "D"((long)dstStride)
-	:"memory");
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
 }
 
 static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
     MOVQ_BFE(mm6);
     __asm __volatile(
-	"lea	(%3, %3), %%"REG_a"	\n\t"
-	".balign 8			\n\t"
-	"1:				\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	1(%1), %%mm1		\n\t"
-	"movq	(%1, %3), %%mm2		\n\t"
-	"movq	1(%1, %3), %%mm3	\n\t"
-	PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
-	"movq	%%mm4, (%2)		\n\t"
-	"movq	%%mm5, (%2, %3)		\n\t"
-	"movq	8(%1), %%mm0		\n\t"
-	"movq	9(%1), %%mm1		\n\t"
-	"movq	8(%1, %3), %%mm2	\n\t"
-	"movq	9(%1, %3), %%mm3	\n\t"
-	PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
-	"movq	%%mm4, 8(%2)		\n\t"
-	"movq	%%mm5, 8(%2, %3)	\n\t"
-	"add	%%"REG_a", %1		\n\t"
-	"add	%%"REG_a", %2		\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	1(%1), %%mm1		\n\t"
-	"movq	(%1, %3), %%mm2		\n\t"
-	"movq	1(%1, %3), %%mm3	\n\t"
-	PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
-	"movq	%%mm4, (%2)		\n\t"
-	"movq	%%mm5, (%2, %3)		\n\t"
-	"movq	8(%1), %%mm0		\n\t"
-	"movq	9(%1), %%mm1		\n\t"
-	"movq	8(%1, %3), %%mm2	\n\t"
-	"movq	9(%1, %3), %%mm3	\n\t"
-	PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
-	"movq	%%mm4, 8(%2)		\n\t"
-	"movq	%%mm5, 8(%2, %3)	\n\t"
-	"add	%%"REG_a", %1		\n\t"
-	"add	%%"REG_a", %2		\n\t"
-	"subl	$4, %0			\n\t"
-	"jnz	1b			\n\t"
-	:"+g"(h), "+S"(pixels), "+D"(block)
-	:"r"((long)line_size)
-	:REG_a, "memory");
+        "lea        (%3, %3), %%"REG_a" \n\t"
+        ".balign 8                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   1(%1), %%mm1            \n\t"
+        "movq   (%1, %3), %%mm2         \n\t"
+        "movq   1(%1, %3), %%mm3        \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%2)             \n\t"
+        "movq   %%mm5, (%2, %3)         \n\t"
+        "movq   8(%1), %%mm0            \n\t"
+        "movq   9(%1), %%mm1            \n\t"
+        "movq   8(%1, %3), %%mm2        \n\t"
+        "movq   9(%1, %3), %%mm3        \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, 8(%2)            \n\t"
+        "movq   %%mm5, 8(%2, %3)        \n\t"
+        "add    %%"REG_a", %1           \n\t"
+        "add    %%"REG_a", %2           \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   1(%1), %%mm1            \n\t"
+        "movq   (%1, %3), %%mm2         \n\t"
+        "movq   1(%1, %3), %%mm3        \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%2)             \n\t"
+        "movq   %%mm5, (%2, %3)         \n\t"
+        "movq   8(%1), %%mm0            \n\t"
+        "movq   9(%1), %%mm1            \n\t"
+        "movq   8(%1, %3), %%mm2        \n\t"
+        "movq   9(%1, %3), %%mm3        \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, 8(%2)            \n\t"
+        "movq   %%mm5, 8(%2, %3)        \n\t"
+        "add    %%"REG_a", %1           \n\t"
+        "add    %%"REG_a", %2           \n\t"
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r"((long)line_size)
+        :REG_a, "memory");
 }
 
 static void attribute_unused DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
 {
     MOVQ_BFE(mm6);
     __asm __volatile(
-	"testl $1, %0			\n\t"
-        " jz 1f				\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	(%2), %%mm1		\n\t"
-	"movq	8(%1), %%mm2		\n\t"
-	"movq	8(%2), %%mm3		\n\t"
-	"add	%4, %1			\n\t"
-	"add	$16, %2			\n\t"
-	PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
-	"movq	%%mm4, (%3)		\n\t"
-	"movq	%%mm5, 8(%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"decl	%0			\n\t"
-	".balign 8			\n\t"
-	"1:				\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	(%2), %%mm1		\n\t"
-	"movq	8(%1), %%mm2		\n\t"
-	"movq	8(%2), %%mm3		\n\t"
-	"add	%4, %1			\n\t"
-	PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
-	"movq	%%mm4, (%3)		\n\t"
-	"movq	%%mm5, 8(%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	"movq	16(%2), %%mm1		\n\t"
-	"movq	8(%1), %%mm2		\n\t"
-	"movq	24(%2), %%mm3		\n\t"
-	"add	%4, %1			\n\t"
-	PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
-	"movq	%%mm4, (%3)		\n\t"
-	"movq	%%mm5, 8(%3)		\n\t"
-	"add	%5, %3			\n\t"
-	"add	$32, %2			\n\t"
-	"subl	$2, %0			\n\t"
-	"jnz	1b			\n\t"
+        "testl $1, %0                   \n\t"
+        " jz 1f                         \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   (%2), %%mm1             \n\t"
+        "movq   8(%1), %%mm2            \n\t"
+        "movq   8(%2), %%mm3            \n\t"
+        "add    %4, %1                  \n\t"
+        "add    $16, %2                 \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%3)             \n\t"
+        "movq   %%mm5, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "decl   %0                      \n\t"
+        ".balign 8                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   (%2), %%mm1             \n\t"
+        "movq   8(%1), %%mm2            \n\t"
+        "movq   8(%2), %%mm3            \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%3)             \n\t"
+        "movq   %%mm5, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   16(%2), %%mm1           \n\t"
+        "movq   8(%1), %%mm2            \n\t"
+        "movq   24(%2), %%mm3           \n\t"
+        "add    %4, %1                  \n\t"
+        PAVGBP(%%mm0, %%mm1, %%mm4,   %%mm2, %%mm3, %%mm5)
+        "movq   %%mm4, (%3)             \n\t"
+        "movq   %%mm5, 8(%3)            \n\t"
+        "add    %5, %3                  \n\t"
+        "add    $32, %2                 \n\t"
+        "subl   $2, %0                  \n\t"
+        "jnz    1b                      \n\t"
 #ifdef PIC //Note "+bm" and "+mb" are buggy too (with gcc 3.2.2 at least) and cant be used
-	:"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+        :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #else
-	:"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
+        :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
 #endif
-	:"S"((long)src1Stride), "D"((long)dstStride)
-	:"memory"); 
+        :"S"((long)src1Stride), "D"((long)dstStride)
+        :"memory");
 }
 
 static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
     MOVQ_BFE(mm6);
     __asm __volatile(
-	"lea (%3, %3), %%"REG_a"	\n\t"
-	"movq (%1), %%mm0		\n\t"
-	".balign 8			\n\t"
-	"1:				\n\t"
-	"movq	(%1, %3), %%mm1		\n\t"
-	"movq	(%1, %%"REG_a"),%%mm2	\n\t"
-	PAVGBP(%%mm1, %%mm0, %%mm4,   %%mm2, %%mm1, %%mm5)
-	"movq	%%mm4, (%2)		\n\t"
-	"movq	%%mm5, (%2, %3)		\n\t"
-	"add	%%"REG_a", %1		\n\t"
-	"add	%%"REG_a", %2		\n\t"
-	"movq	(%1, %3), %%mm1		\n\t"
-	"movq	(%1, %%"REG_a"),%%mm0	\n\t"
-	PAVGBP(%%mm1, %%mm2, %%mm4,   %%mm0, %%mm1, %%mm5)
-	"movq	%%mm4, (%2)		\n\t"
-	"movq	%%mm5, (%2, %3)		\n\t"
-	"add	%%"REG_a", %1		\n\t"
-	"add	%%"REG_a", %2		\n\t"
-	"subl	$4, %0			\n\t"
-	"jnz	1b			\n\t"
-	:"+g"(h), "+S"(pixels), "+D"(block)
-	:"r"((long)line_size)
-	:REG_a, "memory");
+        "lea (%3, %3), %%"REG_a"        \n\t"
+        "movq (%1), %%mm0               \n\t"
+        ".balign 8                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1, %3), %%mm1         \n\t"
+        "movq   (%1, %%"REG_a"),%%mm2   \n\t"
+        PAVGBP(%%mm1, %%mm0, %%mm4,   %%mm2, %%mm1, %%mm5)
+        "movq   %%mm4, (%2)             \n\t"
+        "movq   %%mm5, (%2, %3)         \n\t"
+        "add    %%"REG_a", %1           \n\t"
+        "add    %%"REG_a", %2           \n\t"
+        "movq   (%1, %3), %%mm1         \n\t"
+        "movq   (%1, %%"REG_a"),%%mm0   \n\t"
+        PAVGBP(%%mm1, %%mm2, %%mm4,   %%mm0, %%mm1, %%mm5)
+        "movq   %%mm4, (%2)             \n\t"
+        "movq   %%mm5, (%2, %3)         \n\t"
+        "add    %%"REG_a", %1           \n\t"
+        "add    %%"REG_a", %2           \n\t"
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r"((long)line_size)
+        :REG_a, "memory");
 }
 
 static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
@@ -234,65 +234,65 @@ static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
     MOVQ_ZERO(mm7);
     SET_RND(mm6); // =2 for rnd  and  =1 for no_rnd version
     __asm __volatile(
-	"movq	(%1), %%mm0		\n\t"
-	"movq	1(%1), %%mm4		\n\t"
-	"movq	%%mm0, %%mm1		\n\t"
-	"movq	%%mm4, %%mm5		\n\t"
-	"punpcklbw %%mm7, %%mm0		\n\t"
-	"punpcklbw %%mm7, %%mm4		\n\t"
-	"punpckhbw %%mm7, %%mm1		\n\t"
-	"punpckhbw %%mm7, %%mm5		\n\t"
-	"paddusw %%mm0, %%mm4		\n\t"
-	"paddusw %%mm1, %%mm5		\n\t"
-	"xor	%%"REG_a", %%"REG_a"	\n\t"
-	"add	%3, %1			\n\t"
-	".balign 8      		\n\t"
-	"1:				\n\t"
-	"movq	(%1, %%"REG_a"), %%mm0	\n\t"
-	"movq	1(%1, %%"REG_a"), %%mm2	\n\t"
-	"movq	%%mm0, %%mm1		\n\t"
-	"movq	%%mm2, %%mm3		\n\t"
-	"punpcklbw %%mm7, %%mm0		\n\t"
-	"punpcklbw %%mm7, %%mm2		\n\t"
-	"punpckhbw %%mm7, %%mm1		\n\t"
-	"punpckhbw %%mm7, %%mm3		\n\t"
-	"paddusw %%mm2, %%mm0	 	\n\t"
-	"paddusw %%mm3, %%mm1		\n\t"
-	"paddusw %%mm6, %%mm4		\n\t"
-	"paddusw %%mm6, %%mm5		\n\t"
-	"paddusw %%mm0, %%mm4		\n\t"
-	"paddusw %%mm1, %%mm5		\n\t"
-	"psrlw	$2, %%mm4		\n\t"
-	"psrlw	$2, %%mm5		\n\t"
-	"packuswb  %%mm5, %%mm4		\n\t"
-	"movq	%%mm4, (%2, %%"REG_a")	\n\t"
-	"add	%3, %%"REG_a"		\n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   1(%1), %%mm4            \n\t"
+        "movq   %%mm0, %%mm1            \n\t"
+        "movq   %%mm4, %%mm5            \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpcklbw %%mm7, %%mm4         \n\t"
+        "punpckhbw %%mm7, %%mm1         \n\t"
+        "punpckhbw %%mm7, %%mm5         \n\t"
+        "paddusw %%mm0, %%mm4           \n\t"
+        "paddusw %%mm1, %%mm5           \n\t"
+        "xor    %%"REG_a", %%"REG_a"    \n\t"
+        "add    %3, %1                  \n\t"
+        ".balign 8                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1, %%"REG_a"), %%mm0  \n\t"
+        "movq   1(%1, %%"REG_a"), %%mm2 \n\t"
+        "movq   %%mm0, %%mm1            \n\t"
+        "movq   %%mm2, %%mm3            \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpcklbw %%mm7, %%mm2         \n\t"
+        "punpckhbw %%mm7, %%mm1         \n\t"
+        "punpckhbw %%mm7, %%mm3         \n\t"
+        "paddusw %%mm2, %%mm0           \n\t"
+        "paddusw %%mm3, %%mm1           \n\t"
+        "paddusw %%mm6, %%mm4           \n\t"
+        "paddusw %%mm6, %%mm5           \n\t"
+        "paddusw %%mm0, %%mm4           \n\t"
+        "paddusw %%mm1, %%mm5           \n\t"
+        "psrlw  $2, %%mm4               \n\t"
+        "psrlw  $2, %%mm5               \n\t"
+        "packuswb  %%mm5, %%mm4         \n\t"
+        "movq   %%mm4, (%2, %%"REG_a")  \n\t"
+        "add    %3, %%"REG_a"           \n\t"
 
-	"movq	(%1, %%"REG_a"), %%mm2	\n\t" // 0 <-> 2   1 <-> 3
-	"movq	1(%1, %%"REG_a"), %%mm4	\n\t"
-	"movq	%%mm2, %%mm3		\n\t"
-	"movq	%%mm4, %%mm5		\n\t"
-	"punpcklbw %%mm7, %%mm2		\n\t"
-	"punpcklbw %%mm7, %%mm4		\n\t"
-	"punpckhbw %%mm7, %%mm3		\n\t"
-	"punpckhbw %%mm7, %%mm5		\n\t"
-	"paddusw %%mm2, %%mm4	 	\n\t"
-	"paddusw %%mm3, %%mm5		\n\t"
-	"paddusw %%mm6, %%mm0		\n\t"
-	"paddusw %%mm6, %%mm1		\n\t"
-	"paddusw %%mm4, %%mm0		\n\t"
-	"paddusw %%mm5, %%mm1		\n\t"
-	"psrlw	$2, %%mm0		\n\t"
-	"psrlw	$2, %%mm1		\n\t"
-	"packuswb  %%mm1, %%mm0		\n\t"
-	"movq	%%mm0, (%2, %%"REG_a")	\n\t"
-	"add	%3, %%"REG_a"		\n\t"
+        "movq   (%1, %%"REG_a"), %%mm2  \n\t" // 0 <-> 2   1 <-> 3
+        "movq   1(%1, %%"REG_a"), %%mm4 \n\t"
+        "movq   %%mm2, %%mm3            \n\t"
+        "movq   %%mm4, %%mm5            \n\t"
+        "punpcklbw %%mm7, %%mm2         \n\t"
+        "punpcklbw %%mm7, %%mm4         \n\t"
+        "punpckhbw %%mm7, %%mm3         \n\t"
+        "punpckhbw %%mm7, %%mm5         \n\t"
+        "paddusw %%mm2, %%mm4           \n\t"
+        "paddusw %%mm3, %%mm5           \n\t"
+        "paddusw %%mm6, %%mm0           \n\t"
+        "paddusw %%mm6, %%mm1           \n\t"
+        "paddusw %%mm4, %%mm0           \n\t"
+        "paddusw %%mm5, %%mm1           \n\t"
+        "psrlw  $2, %%mm0               \n\t"
+        "psrlw  $2, %%mm1               \n\t"
+        "packuswb  %%mm1, %%mm0         \n\t"
+        "movq   %%mm0, (%2, %%"REG_a")  \n\t"
+        "add    %3, %%"REG_a"           \n\t"
 
-	"subl	$2, %0			\n\t"
-	"jnz	1b			\n\t"
-	:"+g"(h), "+S"(pixels)
-	:"D"(block), "r"((long)line_size)
-	:REG_a, "memory");
+        "subl   $2, %0                  \n\t"
+        "jnz    1b                      \n\t"
+        :"+g"(h), "+S"(pixels)
+        :"D"(block), "r"((long)line_size)
+        :REG_a, "memory");
 }
 
 // avg_pixels
@@ -301,16 +301,16 @@ static void attribute_unused DEF(avg, pixels4)(uint8_t *block, const uint8_t *pi
     MOVQ_BFE(mm6);
     JUMPALIGN();
     do {
-	__asm __volatile(
-	     "movd  %0, %%mm0		\n\t"
-	     "movd  %1, %%mm1		\n\t"
-	     PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
-	     "movd  %%mm2, %0		\n\t"
-	     :"+m"(*block)
-	     :"m"(*pixels)
-	     :"memory");
-	pixels += line_size;
-	block += line_size;
+        __asm __volatile(
+             "movd  %0, %%mm0           \n\t"
+             "movd  %1, %%mm1           \n\t"
+             PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+             "movd  %%mm2, %0           \n\t"
+             :"+m"(*block)
+             :"m"(*pixels)
+             :"memory");
+        pixels += line_size;
+        block += line_size;
     }
     while (--h);
 }
@@ -321,16 +321,16 @@ static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_si
     MOVQ_BFE(mm6);
     JUMPALIGN();
     do {
-	__asm __volatile(
-	     "movq  %0, %%mm0		\n\t"
-	     "movq  %1, %%mm1		\n\t"
-	     PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
-	     "movq  %%mm2, %0		\n\t"
-	     :"+m"(*block)
-	     :"m"(*pixels)
-	     :"memory");
-	pixels += line_size;
-	block += line_size;
+        __asm __volatile(
+             "movq  %0, %%mm0           \n\t"
+             "movq  %1, %%mm1           \n\t"
+             PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+             "movq  %%mm2, %0           \n\t"
+             :"+m"(*block)
+             :"m"(*pixels)
+             :"memory");
+        pixels += line_size;
+        block += line_size;
     }
     while (--h);
 }
@@ -340,20 +340,20 @@ static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_s
     MOVQ_BFE(mm6);
     JUMPALIGN();
     do {
-	__asm __volatile(
-	     "movq  %0, %%mm0		\n\t"
-	     "movq  %1, %%mm1		\n\t"
-	     PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
-	     "movq  %%mm2, %0		\n\t"
-	     "movq  8%0, %%mm0		\n\t"
-	     "movq  8%1, %%mm1		\n\t"
-	     PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
-	     "movq  %%mm2, 8%0		\n\t"
-	     :"+m"(*block)
-	     :"m"(*pixels)
-	     :"memory");
-	pixels += line_size;
-	block += line_size;
+        __asm __volatile(
+             "movq  %0, %%mm0           \n\t"
+             "movq  %1, %%mm1           \n\t"
+             PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+             "movq  %%mm2, %0           \n\t"
+             "movq  8%0, %%mm0          \n\t"
+             "movq  8%1, %%mm1          \n\t"
+             PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+             "movq  %%mm2, 8%0          \n\t"
+             :"+m"(*block)
+             :"m"(*pixels)
+             :"memory");
+        pixels += line_size;
+        block += line_size;
     }
     while (--h);
 }
@@ -363,18 +363,18 @@ static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line
     MOVQ_BFE(mm6);
     JUMPALIGN();
     do {
-	__asm __volatile(
-	    "movq  %1, %%mm0		\n\t"
-	    "movq  1%1, %%mm1		\n\t"
-	    "movq  %0, %%mm3		\n\t"
-	    PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
-	    PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
-	    "movq  %%mm0, %0		\n\t"
-	    :"+m"(*block)
-	    :"m"(*pixels)
-	    :"memory");
-	pixels += line_size;
-	block += line_size;
+        __asm __volatile(
+            "movq  %1, %%mm0            \n\t"
+            "movq  1%1, %%mm1           \n\t"
+            "movq  %0, %%mm3            \n\t"
+            PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+            PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
+            "movq  %%mm0, %0            \n\t"
+            :"+m"(*block)
+            :"m"(*pixels)
+            :"memory");
+        pixels += line_size;
+        block += line_size;
     } while (--h);
 }
 
@@ -383,17 +383,17 @@ static __attribute__((unused)) void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *
     MOVQ_BFE(mm6);
     JUMPALIGN();
     do {
-	__asm __volatile(
-	    "movq  %1, %%mm0		\n\t"
-	    "movq  %2, %%mm1		\n\t"
-	    "movq  %0, %%mm3		\n\t"
-	    PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
-	    PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
-	    "movq  %%mm0, %0		\n\t"
-	    :"+m"(*dst)
-	    :"m"(*src1), "m"(*src2)
-	    :"memory");
-	dst += dstStride;
+        __asm __volatile(
+            "movq  %1, %%mm0            \n\t"
+            "movq  %2, %%mm1            \n\t"
+            "movq  %0, %%mm3            \n\t"
+            PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+            PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
+            "movq  %%mm0, %0            \n\t"
+            :"+m"(*dst)
+            :"m"(*src1), "m"(*src2)
+            :"memory");
+        dst += dstStride;
         src1 += src1Stride;
         src2 += 8;
     } while (--h);
@@ -404,24 +404,24 @@ static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int lin
     MOVQ_BFE(mm6);
     JUMPALIGN();
     do {
-	__asm __volatile(
-	    "movq  %1, %%mm0		\n\t"
-	    "movq  1%1, %%mm1		\n\t"
-	    "movq  %0, %%mm3		\n\t"
-	    PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
-	    PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
-	    "movq  %%mm0, %0		\n\t"
-	    "movq  8%1, %%mm0		\n\t"
-	    "movq  9%1, %%mm1		\n\t"
-	    "movq  8%0, %%mm3		\n\t"
-	    PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
-	    PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
-	    "movq  %%mm0, 8%0		\n\t"
-	    :"+m"(*block)
-	    :"m"(*pixels)
-	    :"memory");
-	pixels += line_size;
-	block += line_size;
+        __asm __volatile(
+            "movq  %1, %%mm0            \n\t"
+            "movq  1%1, %%mm1           \n\t"
+            "movq  %0, %%mm3            \n\t"
+            PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+            PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
+            "movq  %%mm0, %0            \n\t"
+            "movq  8%1, %%mm0           \n\t"
+            "movq  9%1, %%mm1           \n\t"
+            "movq  8%0, %%mm3           \n\t"
+            PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+            PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
+            "movq  %%mm0, 8%0           \n\t"
+            :"+m"(*block)
+            :"m"(*pixels)
+            :"memory");
+        pixels += line_size;
+        block += line_size;
     } while (--h);
 }
 
@@ -430,23 +430,23 @@ static __attribute__((unused)) void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t
     MOVQ_BFE(mm6);
     JUMPALIGN();
     do {
-	__asm __volatile(
-	    "movq  %1, %%mm0		\n\t"
-	    "movq  %2, %%mm1		\n\t"
-	    "movq  %0, %%mm3		\n\t"
-	    PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
-	    PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
-	    "movq  %%mm0, %0		\n\t"
-	    "movq  8%1, %%mm0		\n\t"
-	    "movq  8%2, %%mm1		\n\t"
-	    "movq  8%0, %%mm3		\n\t"
-	    PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
-	    PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
-	    "movq  %%mm0, 8%0		\n\t"
-	    :"+m"(*dst)
-	    :"m"(*src1), "m"(*src2)
-	    :"memory");
-	dst += dstStride;
+        __asm __volatile(
+            "movq  %1, %%mm0            \n\t"
+            "movq  %2, %%mm1            \n\t"
+            "movq  %0, %%mm3            \n\t"
+            PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+            PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
+            "movq  %%mm0, %0            \n\t"
+            "movq  8%1, %%mm0           \n\t"
+            "movq  8%2, %%mm1           \n\t"
+            "movq  8%0, %%mm3           \n\t"
+            PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
+            PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
+            "movq  %%mm0, 8%0           \n\t"
+            :"+m"(*dst)
+            :"m"(*src1), "m"(*src2)
+            :"memory");
+        dst += dstStride;
         src1 += src1Stride;
         src2 += 16;
     } while (--h);
@@ -456,39 +456,39 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line
 {
     MOVQ_BFE(mm6);
     __asm __volatile(
-	"lea	(%3, %3), %%"REG_a"	\n\t"
-	"movq	(%1), %%mm0		\n\t"
-	".balign 8			\n\t"
-	"1:				\n\t"
-	"movq	(%1, %3), %%mm1		\n\t"
-	"movq	(%1, %%"REG_a"), %%mm2	\n\t"
-	PAVGBP(%%mm1, %%mm0, %%mm4,   %%mm2, %%mm1, %%mm5)
-	"movq	(%2), %%mm3		\n\t"
-	PAVGB(%%mm3, %%mm4, %%mm0, %%mm6)
-	"movq	(%2, %3), %%mm3		\n\t"
-	PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
-	"movq	%%mm0, (%2)		\n\t"
-	"movq	%%mm1, (%2, %3)		\n\t"
-	"add	%%"REG_a", %1		\n\t"
-	"add	%%"REG_a", %2		\n\t"
+        "lea    (%3, %3), %%"REG_a"     \n\t"
+        "movq   (%1), %%mm0             \n\t"
+        ".balign 8                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1, %3), %%mm1         \n\t"
+        "movq   (%1, %%"REG_a"), %%mm2  \n\t"
+        PAVGBP(%%mm1, %%mm0, %%mm4,   %%mm2, %%mm1, %%mm5)
+        "movq   (%2), %%mm3             \n\t"
+        PAVGB(%%mm3, %%mm4, %%mm0, %%mm6)
+        "movq   (%2, %3), %%mm3         \n\t"
+        PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
+        "movq   %%mm0, (%2)             \n\t"
+        "movq   %%mm1, (%2, %3)         \n\t"
+        "add    %%"REG_a", %1           \n\t"
+        "add    %%"REG_a", %2           \n\t"
 
-	"movq	(%1, %3), %%mm1		\n\t"
-	"movq	(%1, %%"REG_a"), %%mm0	\n\t"
-	PAVGBP(%%mm1, %%mm2, %%mm4,   %%mm0, %%mm1, %%mm5)
-	"movq	(%2), %%mm3		\n\t"
-	PAVGB(%%mm3, %%mm4, %%mm2, %%mm6)
-	"movq	(%2, %3), %%mm3		\n\t"
-	PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
-	"movq	%%mm2, (%2)		\n\t"
-	"movq	%%mm1, (%2, %3)		\n\t"
-	"add	%%"REG_a", %1		\n\t"
-	"add	%%"REG_a", %2		\n\t"
+        "movq   (%1, %3), %%mm1         \n\t"
+        "movq   (%1, %%"REG_a"), %%mm0  \n\t"
+        PAVGBP(%%mm1, %%mm2, %%mm4,   %%mm0, %%mm1, %%mm5)
+        "movq   (%2), %%mm3             \n\t"
+        PAVGB(%%mm3, %%mm4, %%mm2, %%mm6)
+        "movq   (%2, %3), %%mm3         \n\t"
+        PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
+        "movq   %%mm2, (%2)             \n\t"
+        "movq   %%mm1, (%2, %3)         \n\t"
+        "add    %%"REG_a", %1           \n\t"
+        "add    %%"REG_a", %2           \n\t"
 
-	"subl	$4, %0			\n\t"
-	"jnz	1b			\n\t"
-	:"+g"(h), "+S"(pixels), "+D"(block)
-	:"r"((long)line_size)
-	:REG_a, "memory");
+        "subl   $4, %0                  \n\t"
+        "jnz    1b                      \n\t"
+        :"+g"(h), "+S"(pixels), "+D"(block)
+        :"r"((long)line_size)
+        :REG_a, "memory");
 }
 
 // this routine is 'slightly' suboptimal but mostly unused
@@ -497,73 +497,73 @@ static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int lin
     MOVQ_ZERO(mm7);
     SET_RND(mm6); // =2 for rnd  and  =1 for no_rnd version
     __asm __volatile(
-	"movq	(%1), %%mm0		\n\t"
-	"movq	1(%1), %%mm4		\n\t"
-	"movq	%%mm0, %%mm1		\n\t"
-	"movq	%%mm4, %%mm5		\n\t"
-	"punpcklbw %%mm7, %%mm0		\n\t"
-	"punpcklbw %%mm7, %%mm4		\n\t"
-	"punpckhbw %%mm7, %%mm1		\n\t"
-	"punpckhbw %%mm7, %%mm5		\n\t"
-	"paddusw %%mm0, %%mm4		\n\t"
-	"paddusw %%mm1, %%mm5		\n\t"
-	"xor	%%"REG_a", %%"REG_a"	\n\t"
-	"add	%3, %1			\n\t"
-	".balign 8			\n\t"
-	"1:				\n\t"
-	"movq	(%1, %%"REG_a"), %%mm0	\n\t"
-	"movq	1(%1, %%"REG_a"), %%mm2	\n\t"
-	"movq	%%mm0, %%mm1		\n\t"
-	"movq	%%mm2, %%mm3		\n\t"
-	"punpcklbw %%mm7, %%mm0		\n\t"
-	"punpcklbw %%mm7, %%mm2		\n\t"
-	"punpckhbw %%mm7, %%mm1		\n\t"
-	"punpckhbw %%mm7, %%mm3		\n\t"
-	"paddusw %%mm2, %%mm0	 	\n\t"
-	"paddusw %%mm3, %%mm1		\n\t"
-	"paddusw %%mm6, %%mm4		\n\t"
-	"paddusw %%mm6, %%mm5		\n\t"
-	"paddusw %%mm0, %%mm4		\n\t"
-	"paddusw %%mm1, %%mm5		\n\t"
-	"psrlw	$2, %%mm4		\n\t"
-	"psrlw	$2, %%mm5		\n\t"
-		"movq	(%2, %%"REG_a"), %%mm3	\n\t"
-	"packuswb  %%mm5, %%mm4		\n\t"
-		"pcmpeqd %%mm2, %%mm2	\n\t"
-		"paddb %%mm2, %%mm2	\n\t"
-		PAVGB(%%mm3, %%mm4, %%mm5, %%mm2)
-		"movq	%%mm5, (%2, %%"REG_a")	\n\t"
-	"add	%3, %%"REG_a"		\n\t"
+        "movq   (%1), %%mm0             \n\t"
+        "movq   1(%1), %%mm4            \n\t"
+        "movq   %%mm0, %%mm1            \n\t"
+        "movq   %%mm4, %%mm5            \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpcklbw %%mm7, %%mm4         \n\t"
+        "punpckhbw %%mm7, %%mm1         \n\t"
+        "punpckhbw %%mm7, %%mm5         \n\t"
+        "paddusw %%mm0, %%mm4           \n\t"
+        "paddusw %%mm1, %%mm5           \n\t"
+        "xor    %%"REG_a", %%"REG_a"    \n\t"
+        "add    %3, %1                  \n\t"
+        ".balign 8                      \n\t"
+        "1:                             \n\t"
+        "movq   (%1, %%"REG_a"), %%mm0  \n\t"
+        "movq   1(%1, %%"REG_a"), %%mm2 \n\t"
+        "movq   %%mm0, %%mm1            \n\t"
+        "movq   %%mm2, %%mm3            \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpcklbw %%mm7, %%mm2         \n\t"
+        "punpckhbw %%mm7, %%mm1         \n\t"
+        "punpckhbw %%mm7, %%mm3         \n\t"
+        "paddusw %%mm2, %%mm0           \n\t"
+        "paddusw %%mm3, %%mm1           \n\t"
+        "paddusw %%mm6, %%mm4           \n\t"
+        "paddusw %%mm6, %%mm5           \n\t"
+        "paddusw %%mm0, %%mm4           \n\t"
+        "paddusw %%mm1, %%mm5           \n\t"
+        "psrlw  $2, %%mm4               \n\t"
+        "psrlw  $2, %%mm5               \n\t"
+                "movq   (%2, %%"REG_a"), %%mm3  \n\t"
+        "packuswb  %%mm5, %%mm4         \n\t"
+                "pcmpeqd %%mm2, %%mm2   \n\t"
+                "paddb %%mm2, %%mm2     \n\t"
+                PAVGB(%%mm3, %%mm4, %%mm5, %%mm2)
+                "movq   %%mm5, (%2, %%"REG_a")  \n\t"
+        "add    %3, %%"REG_a"                \n\t"
 
-	"movq	(%1, %%"REG_a"), %%mm2	\n\t" // 0 <-> 2   1 <-> 3
-	"movq	1(%1, %%"REG_a"), %%mm4	\n\t"
-	"movq	%%mm2, %%mm3		\n\t"
-	"movq	%%mm4, %%mm5		\n\t"
-	"punpcklbw %%mm7, %%mm2		\n\t"
-	"punpcklbw %%mm7, %%mm4		\n\t"
-	"punpckhbw %%mm7, %%mm3		\n\t"
-	"punpckhbw %%mm7, %%mm5		\n\t"
-	"paddusw %%mm2, %%mm4	 	\n\t"
-	"paddusw %%mm3, %%mm5		\n\t"
-	"paddusw %%mm6, %%mm0		\n\t"
-	"paddusw %%mm6, %%mm1		\n\t"
-	"paddusw %%mm4, %%mm0		\n\t"
-	"paddusw %%mm5, %%mm1		\n\t"
-	"psrlw	$2, %%mm0		\n\t"
-	"psrlw	$2, %%mm1		\n\t"
-		"movq	(%2, %%"REG_a"), %%mm3	\n\t"
-	"packuswb  %%mm1, %%mm0		\n\t"
-		"pcmpeqd %%mm2, %%mm2	\n\t"
-		"paddb %%mm2, %%mm2	\n\t"
-		PAVGB(%%mm3, %%mm0, %%mm1, %%mm2)
-		"movq	%%mm1, (%2, %%"REG_a")	\n\t"
-	"add	%3, %%"REG_a"		\n\t"
+        "movq   (%1, %%"REG_a"), %%mm2  \n\t" // 0 <-> 2   1 <-> 3
+        "movq   1(%1, %%"REG_a"), %%mm4 \n\t"
+        "movq   %%mm2, %%mm3            \n\t"
+        "movq   %%mm4, %%mm5            \n\t"
+        "punpcklbw %%mm7, %%mm2         \n\t"
+        "punpcklbw %%mm7, %%mm4         \n\t"
+        "punpckhbw %%mm7, %%mm3         \n\t"
+        "punpckhbw %%mm7, %%mm5         \n\t"
+        "paddusw %%mm2, %%mm4           \n\t"
+        "paddusw %%mm3, %%mm5           \n\t"
+        "paddusw %%mm6, %%mm0           \n\t"
+        "paddusw %%mm6, %%mm1           \n\t"
+        "paddusw %%mm4, %%mm0           \n\t"
+        "paddusw %%mm5, %%mm1           \n\t"
+        "psrlw  $2, %%mm0               \n\t"
+        "psrlw  $2, %%mm1               \n\t"
+                "movq   (%2, %%"REG_a"), %%mm3  \n\t"
+        "packuswb  %%mm1, %%mm0         \n\t"
+                "pcmpeqd %%mm2, %%mm2   \n\t"
+                "paddb %%mm2, %%mm2     \n\t"
+                PAVGB(%%mm3, %%mm0, %%mm1, %%mm2)
+                "movq   %%mm1, (%2, %%"REG_a")  \n\t"
+        "add    %3, %%"REG_a"           \n\t"
 
-	"subl	$2, %0			\n\t"
-	"jnz	1b			\n\t"
-	:"+g"(h), "+S"(pixels)
-	:"D"(block), "r"((long)line_size)
-	:REG_a, "memory");
+        "subl   $2, %0                  \n\t"
+        "jnz    1b                      \n\t"
+        :"+g"(h), "+S"(pixels)
+        :"D"(block), "r"((long)line_size)
+        :REG_a, "memory");
 }
 
 //FIXME optimize
diff --git a/src/libffmpeg/libavcodec/i386/fdct_mmx.c b/src/libffmpeg/libavcodec/i386/fdct_mmx.c
index 6a13090a1..f6150c83c 100644
--- a/src/libffmpeg/libavcodec/i386/fdct_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/fdct_mmx.c
@@ -5,7 +5,7 @@
  * SSE2 optimization is Copyright (c) 2004 Denes Balatoni.
  *
  * from  fdctam32.c - AP922 MMX(3D-Now) forward-DCT
- * 
+ *
  *  Intel Application Note AP-922 - fast, precise implementation of DCT
  *        http://developer.intel.com/vtune/cbts/appnotes.htm
  *
@@ -30,28 +30,28 @@
 //
 //////////////////////////////////////////////////////////////////////
 
-#define BITS_FRW_ACC	3 //; 2 or 3 for accuracy
-#define SHIFT_FRW_COL	BITS_FRW_ACC
-#define SHIFT_FRW_ROW	(BITS_FRW_ACC + 17 - 3)
-#define RND_FRW_ROW		(1 << (SHIFT_FRW_ROW-1))
-//#define RND_FRW_COL		(1 << (SHIFT_FRW_COL-1))
+#define BITS_FRW_ACC   3 //; 2 or 3 for accuracy
+#define SHIFT_FRW_COL  BITS_FRW_ACC
+#define SHIFT_FRW_ROW  (BITS_FRW_ACC + 17 - 3)
+#define RND_FRW_ROW    (1 << (SHIFT_FRW_ROW-1))
+//#define RND_FRW_COL    (1 << (SHIFT_FRW_COL-1))
 
 //concatenated table, for forward DCT transformation
 static const int16_t fdct_tg_all_16[] ATTR_ALIGN(8) = {
-    13036, 13036, 13036, 13036,		// tg * (2<<16) + 0.5
-    27146, 27146, 27146, 27146,		// tg * (2<<16) + 0.5
-    -21746, -21746, -21746, -21746,	// tg * (2<<16) + 0.5
+    13036,  13036,  13036,  13036,        // tg * (2<<16) + 0.5
+    27146,  27146,  27146,  27146,        // tg * (2<<16) + 0.5
+   -21746, -21746, -21746, -21746,        // tg * (2<<16) + 0.5
 };
 
 static const int16_t ocos_4_16[4] ATTR_ALIGN(8) = {
-    23170, 23170, 23170, 23170,	//cos * (2<<15) + 0.5
+    23170, 23170, 23170, 23170,           //cos * (2<<15) + 0.5
 };
 
 static const int64_t fdct_one_corr ATTR_ALIGN(8) = 0x0001000100010001LL;
 
 static const int32_t fdct_r_row[2] ATTR_ALIGN(8) = {RND_FRW_ROW, RND_FRW_ROW };
 
-struct 
+struct
 {
  const int32_t fdct_r_row_sse2[4] ATTR_ALIGN(16);
 } fdct_r_row_sse2 ATTR_ALIGN(16)=
@@ -61,90 +61,90 @@ struct
 //static const long fdct_r_row_sse2[4] ATTR_ALIGN(16) = {RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW, RND_FRW_ROW};
 
 static const int16_t tab_frw_01234567[] ATTR_ALIGN(8) = {  // forward_dct coeff table
-  16384,   16384,   22725,   19266, 
-  16384,   16384,   12873,    4520, 
-  21407,    8867,   19266,   -4520, 
-  -8867,  -21407,  -22725,  -12873, 
-  16384,  -16384,   12873,  -22725, 
- -16384,   16384,    4520,   19266, 
-   8867,  -21407,    4520,  -12873, 
-  21407,   -8867,   19266,  -22725, 
-
-  22725,   22725,   31521,   26722, 
-  22725,   22725,   17855,    6270, 
-  29692,   12299,   26722,   -6270, 
- -12299,  -29692,  -31521,  -17855, 
-  22725,  -22725,   17855,  -31521, 
- -22725,   22725,    6270,   26722, 
-  12299,  -29692,    6270,  -17855, 
-  29692,  -12299,   26722,  -31521, 
-
-  21407,   21407,   29692,   25172, 
-  21407,   21407,   16819,    5906, 
-  27969,   11585,   25172,   -5906, 
- -11585,  -27969,  -29692,  -16819, 
-  21407,  -21407,   16819,  -29692, 
- -21407,   21407,    5906,   25172, 
-  11585,  -27969,    5906,  -16819, 
-  27969,  -11585,   25172,  -29692, 
-
-  19266,   19266,   26722,   22654, 
-  19266,   19266,   15137,    5315, 
-  25172,   10426,   22654,   -5315, 
- -10426,  -25172,  -26722,  -15137, 
-  19266,  -19266,   15137,  -26722, 
- -19266,   19266,    5315,   22654, 
-  10426,  -25172,    5315,  -15137, 
-  25172,  -10426,   22654,  -26722, 
-
-  16384,   16384,   22725,   19266, 
-  16384,   16384,   12873,    4520, 
-  21407,    8867,   19266,   -4520, 
-  -8867,  -21407,  -22725,  -12873, 
-  16384,  -16384,   12873,  -22725, 
- -16384,   16384,    4520,   19266, 
-   8867,  -21407,    4520,  -12873, 
-  21407,   -8867,   19266,  -22725, 
-
-  19266,   19266,   26722,   22654, 
-  19266,   19266,   15137,    5315, 
-  25172,   10426,   22654,   -5315, 
- -10426,  -25172,  -26722,  -15137, 
-  19266,  -19266,   15137,  -26722, 
- -19266,   19266,    5315,   22654, 
-  10426,  -25172,    5315,  -15137, 
-  25172,  -10426,   22654,  -26722, 
-
-  21407,   21407,   29692,   25172, 
-  21407,   21407,   16819,    5906, 
-  27969,   11585,   25172,   -5906, 
- -11585,  -27969,  -29692,  -16819, 
-  21407,  -21407,   16819,  -29692, 
- -21407,   21407,    5906,   25172, 
-  11585,  -27969,    5906,  -16819, 
-  27969,  -11585,   25172,  -29692, 
-
-  22725,   22725,   31521,   26722, 
-  22725,   22725,   17855,    6270, 
-  29692,   12299,   26722,   -6270, 
- -12299,  -29692,  -31521,  -17855, 
-  22725,  -22725,   17855,  -31521, 
- -22725,   22725,    6270,   26722, 
-  12299,  -29692,    6270,  -17855, 
-  29692,  -12299,   26722,  -31521, 
+  16384,   16384,   22725,   19266,
+  16384,   16384,   12873,    4520,
+  21407,    8867,   19266,   -4520,
+  -8867,  -21407,  -22725,  -12873,
+  16384,  -16384,   12873,  -22725,
+ -16384,   16384,    4520,   19266,
+   8867,  -21407,    4520,  -12873,
+  21407,   -8867,   19266,  -22725,
+
+  22725,   22725,   31521,   26722,
+  22725,   22725,   17855,    6270,
+  29692,   12299,   26722,   -6270,
+ -12299,  -29692,  -31521,  -17855,
+  22725,  -22725,   17855,  -31521,
+ -22725,   22725,    6270,   26722,
+  12299,  -29692,    6270,  -17855,
+  29692,  -12299,   26722,  -31521,
+
+  21407,   21407,   29692,   25172,
+  21407,   21407,   16819,    5906,
+  27969,   11585,   25172,   -5906,
+ -11585,  -27969,  -29692,  -16819,
+  21407,  -21407,   16819,  -29692,
+ -21407,   21407,    5906,   25172,
+  11585,  -27969,    5906,  -16819,
+  27969,  -11585,   25172,  -29692,
+
+  19266,   19266,   26722,   22654,
+  19266,   19266,   15137,    5315,
+  25172,   10426,   22654,   -5315,
+ -10426,  -25172,  -26722,  -15137,
+  19266,  -19266,   15137,  -26722,
+ -19266,   19266,    5315,   22654,
+  10426,  -25172,    5315,  -15137,
+  25172,  -10426,   22654,  -26722,
+
+  16384,   16384,   22725,   19266,
+  16384,   16384,   12873,    4520,
+  21407,    8867,   19266,   -4520,
+  -8867,  -21407,  -22725,  -12873,
+  16384,  -16384,   12873,  -22725,
+ -16384,   16384,    4520,   19266,
+   8867,  -21407,    4520,  -12873,
+  21407,   -8867,   19266,  -22725,
+
+  19266,   19266,   26722,   22654,
+  19266,   19266,   15137,    5315,
+  25172,   10426,   22654,   -5315,
+ -10426,  -25172,  -26722,  -15137,
+  19266,  -19266,   15137,  -26722,
+ -19266,   19266,    5315,   22654,
+  10426,  -25172,    5315,  -15137,
+  25172,  -10426,   22654,  -26722,
+
+  21407,   21407,   29692,   25172,
+  21407,   21407,   16819,    5906,
+  27969,   11585,   25172,   -5906,
+ -11585,  -27969,  -29692,  -16819,
+  21407,  -21407,   16819,  -29692,
+ -21407,   21407,    5906,   25172,
+  11585,  -27969,    5906,  -16819,
+  27969,  -11585,   25172,  -29692,
+
+  22725,   22725,   31521,   26722,
+  22725,   22725,   17855,    6270,
+  29692,   12299,   26722,   -6270,
+ -12299,  -29692,  -31521,  -17855,
+  22725,  -22725,   17855,  -31521,
+ -22725,   22725,    6270,   26722,
+  12299,  -29692,    6270,  -17855,
+  29692,  -12299,   26722,  -31521,
 };
 
-struct 
+struct
 {
  const int16_t tab_frw_01234567_sse2[256] ATTR_ALIGN(16);
 } tab_frw_01234567_sse2 ATTR_ALIGN(16) =
 {{
-//static const int16_t tab_frw_01234567_sse2[] ATTR_ALIGN(16) = {  // forward_dct coeff table  
+//static const int16_t tab_frw_01234567_sse2[] ATTR_ALIGN(16) = {  // forward_dct coeff table
 #define TABLE_SSE2 C4,  C4,  C1,  C3, -C6, -C2, -C1, -C5, \
                    C4,  C4,  C5,  C7,  C2,  C6,  C3, -C7, \
                   -C4,  C4,  C7,  C3,  C6, -C2,  C7, -C5, \
-                   C4, -C4,  C5, -C1,  C2, -C6,  C3, -C1, 
-// c1..c7 * cos(pi/4) * 2^15 
+                   C4, -C4,  C5, -C1,  C2, -C6,  C3, -C1,
+// c1..c7 * cos(pi/4) * 2^15
 #define C1 22725
 #define C2 21407
 #define C3 19266
@@ -351,67 +351,67 @@ static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
 static always_inline void fdct_row_sse2(const int16_t *in, int16_t *out)
 {
     asm volatile(
-        ".macro FDCT_ROW_SSE2_H1 i t   \n\t"
-	"movq      \\i(%0), %%xmm2     \n\t"
-	"movq      \\i+8(%0), %%xmm0   \n\t"
-	"movdqa    \\t+32(%1), %%xmm3  \n\t"
-	"movdqa    \\t+48(%1), %%xmm7  \n\t"	
-	"movdqa    \\t(%1), %%xmm4     \n\t"
-	"movdqa    \\t+16(%1), %%xmm5  \n\t"	
-	".endm                         \n\t"
-        ".macro FDCT_ROW_SSE2_H2 i t   \n\t"
-	"movq      \\i(%0), %%xmm2     \n\t"
-	"movq      \\i+8(%0), %%xmm0   \n\t"
-	"movdqa    \\t+32(%1), %%xmm3  \n\t"
-	"movdqa    \\t+48(%1), %%xmm7  \n\t"	
-	".endm                         \n\t"
-	".macro FDCT_ROW_SSE2 i        \n\t"	
-	"movq      %%xmm2, %%xmm1      \n\t"
-	"pshuflw   $27, %%xmm0, %%xmm0 \n\t"
-	"paddsw    %%xmm0, %%xmm1      \n\t"
-	"psubsw    %%xmm0, %%xmm2      \n\t"
-	"punpckldq %%xmm2, %%xmm1      \n\t"
-	"pshufd    $78, %%xmm1, %%xmm2 \n\t"
-	"pmaddwd   %%xmm2, %%xmm3      \n\t"
-	"pmaddwd   %%xmm1, %%xmm7      \n\t"
-	"pmaddwd   %%xmm5, %%xmm2      \n\t"
-	"pmaddwd   %%xmm4, %%xmm1      \n\t"
-	"paddd     %%xmm7, %%xmm3      \n\t"	
-	"paddd     %%xmm2, %%xmm1      \n\t"
-	"paddd     %%xmm6, %%xmm3      \n\t"
-	"paddd     %%xmm6, %%xmm1      \n\t"
-	"psrad     %3, %%xmm3          \n\t"
-	"psrad     %3, %%xmm1          \n\t"
-	"packssdw  %%xmm3, %%xmm1      \n\t"
-	"movdqa    %%xmm1, \\i(%4)     \n\t"
-	".endm                         \n\t"	
-	"movdqa    (%2), %%xmm6        \n\t"		
-	"FDCT_ROW_SSE2_H1 0 0 \n\t"
-	"FDCT_ROW_SSE2 0 \n\t"
-	"FDCT_ROW_SSE2_H2 64 0 \n\t"
-	"FDCT_ROW_SSE2 64 \n\t"
-
-	"FDCT_ROW_SSE2_H1 16 64 \n\t"
-	"FDCT_ROW_SSE2 16 \n\t"
-	"FDCT_ROW_SSE2_H2 112 64 \n\t"
-	"FDCT_ROW_SSE2 112 \n\t"
-
-	"FDCT_ROW_SSE2_H1 32 128 \n\t"
-	"FDCT_ROW_SSE2 32 \n\t"
-	"FDCT_ROW_SSE2_H2 96 128 \n\t"
-	"FDCT_ROW_SSE2 96 \n\t"
-
-	"FDCT_ROW_SSE2_H1 48 192 \n\t"
-	"FDCT_ROW_SSE2 48 \n\t"
-	"FDCT_ROW_SSE2_H2 80 192 \n\t"
-	"FDCT_ROW_SSE2 80 \n\t"
-	:
-	: "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
+        ".macro FDCT_ROW_SSE2_H1 i t    \n\t"
+        "movq      \\i(%0), %%xmm2      \n\t"
+        "movq      \\i+8(%0), %%xmm0    \n\t"
+        "movdqa    \\t+32(%1), %%xmm3   \n\t"
+        "movdqa    \\t+48(%1), %%xmm7   \n\t"
+        "movdqa    \\t(%1), %%xmm4      \n\t"
+        "movdqa    \\t+16(%1), %%xmm5   \n\t"
+        ".endm                          \n\t"
+        ".macro FDCT_ROW_SSE2_H2 i t    \n\t"
+        "movq      \\i(%0), %%xmm2      \n\t"
+        "movq      \\i+8(%0), %%xmm0    \n\t"
+        "movdqa    \\t+32(%1), %%xmm3   \n\t"
+        "movdqa    \\t+48(%1), %%xmm7   \n\t"
+        ".endm                          \n\t"
+        ".macro FDCT_ROW_SSE2 i         \n\t"
+        "movq      %%xmm2, %%xmm1       \n\t"
+        "pshuflw   $27, %%xmm0, %%xmm0  \n\t"
+        "paddsw    %%xmm0, %%xmm1       \n\t"
+        "psubsw    %%xmm0, %%xmm2       \n\t"
+        "punpckldq %%xmm2, %%xmm1       \n\t"
+        "pshufd    $78, %%xmm1, %%xmm2  \n\t"
+        "pmaddwd   %%xmm2, %%xmm3       \n\t"
+        "pmaddwd   %%xmm1, %%xmm7       \n\t"
+        "pmaddwd   %%xmm5, %%xmm2       \n\t"
+        "pmaddwd   %%xmm4, %%xmm1       \n\t"
+        "paddd     %%xmm7, %%xmm3       \n\t"
+        "paddd     %%xmm2, %%xmm1       \n\t"
+        "paddd     %%xmm6, %%xmm3       \n\t"
+        "paddd     %%xmm6, %%xmm1       \n\t"
+        "psrad     %3, %%xmm3           \n\t"
+        "psrad     %3, %%xmm1           \n\t"
+        "packssdw  %%xmm3, %%xmm1       \n\t"
+        "movdqa    %%xmm1, \\i(%4)      \n\t"
+        ".endm                          \n\t"
+        "movdqa    (%2), %%xmm6         \n\t"
+        "FDCT_ROW_SSE2_H1 0 0           \n\t"
+        "FDCT_ROW_SSE2 0                \n\t"
+        "FDCT_ROW_SSE2_H2 64 0          \n\t"
+        "FDCT_ROW_SSE2 64               \n\t"
+
+        "FDCT_ROW_SSE2_H1 16 64         \n\t"
+        "FDCT_ROW_SSE2 16               \n\t"
+        "FDCT_ROW_SSE2_H2 112 64        \n\t"
+        "FDCT_ROW_SSE2 112              \n\t"
+
+        "FDCT_ROW_SSE2_H1 32 128        \n\t"
+        "FDCT_ROW_SSE2 32               \n\t"
+        "FDCT_ROW_SSE2_H2 96 128        \n\t"
+        "FDCT_ROW_SSE2 96               \n\t"
+
+        "FDCT_ROW_SSE2_H1 48 192        \n\t"
+        "FDCT_ROW_SSE2 48               \n\t"
+        "FDCT_ROW_SSE2_H2 80 192        \n\t"
+        "FDCT_ROW_SSE2 80               \n\t"
+        :
+        : "r" (in), "r" (tab_frw_01234567_sse2.tab_frw_01234567_sse2), "r" (fdct_r_row_sse2.fdct_r_row_sse2), "i" (SHIFT_FRW_ROW), "r" (out)
     );
 }
 
 static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const int16_t *table)
-{ 
+{
     pshufw_m2r(*(in + 4), mm5, 0x1B);
     movq_m2r(*(in + 0), mm0);
     movq_r2r(mm0, mm1);
@@ -454,7 +454,7 @@ static always_inline void fdct_row_mmx2(const int16_t *in, int16_t *out, const i
 }
 
 static always_inline void fdct_row_mmx(const int16_t *in, int16_t *out, const int16_t *table)
-{ 
+{
 //FIXME reorder (i dont have a old mmx only cpu here to benchmark ...)
     movd_m2r(*(in + 6), mm1);
     punpcklwd_m2r(*(in + 4), mm1);
@@ -547,7 +547,7 @@ void ff_fdct_mmx2(int16_t *block)
     }
 }
 
-void ff_fdct_sse2(int16_t *block) 
+void ff_fdct_sse2(int16_t *block)
 {
     int64_t align_tmp[16] ATTR_ALIGN(8);
     int16_t * const block_tmp= (int16_t*)align_tmp;
diff --git a/src/libffmpeg/libavcodec/i386/fft_sse.c b/src/libffmpeg/libavcodec/i386/fft_sse.c
index d07c943e9..54851fb94 100644
--- a/src/libffmpeg/libavcodec/i386/fft_sse.c
+++ b/src/libffmpeg/libavcodec/i386/fft_sse.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "../dsputil.h"
 #include <math.h>
@@ -23,13 +23,13 @@
 
 #include <xmmintrin.h>
 
-static const float p1p1p1m1[4] __attribute__((aligned(16))) = 
+static const float p1p1p1m1[4] __attribute__((aligned(16))) =
     { 1.0, 1.0, 1.0, -1.0 };
 
-static const float p1p1m1p1[4] __attribute__((aligned(16))) = 
+static const float p1p1m1p1[4] __attribute__((aligned(16))) =
     { 1.0, 1.0, -1.0, 1.0 };
 
-static const float p1p1m1m1[4] __attribute__((aligned(16))) = 
+static const float p1p1m1m1[4] __attribute__((aligned(16))) =
     { 1.0, 1.0, -1.0, -1.0 };
 
 #if 0
@@ -45,8 +45,8 @@ static void print_v4sf(const char *str, __m128 a)
 void ff_fft_calc_sse(FFTContext *s, FFTComplex *z)
 {
     int ln = s->nbits;
-    int	j, np, np2;
-    int	nblocks, nloops;
+    int         j, np, np2;
+    int         nblocks, nloops;
     register FFTComplex *p, *q;
     FFTComplex *cptr, *cptr1;
     int k;
@@ -107,27 +107,27 @@ void ff_fft_calc_sse(FFTContext *s, FFTComplex *z)
 
                 a = *(__m128 *)p;
                 b = *(__m128 *)q;
-                
+
                 /* complex mul */
                 c = *(__m128 *)cptr;
                 /*  cre*re cim*re */
-                t1 = _mm_mul_ps(c, 
-                                _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 0, 0))); 
+                t1 = _mm_mul_ps(c,
+                                _mm_shuffle_ps(b, b, _MM_SHUFFLE(2, 2, 0, 0)));
                 c = *(__m128 *)(cptr + 2);
                 /*  -cim*im cre*im */
                 t2 = _mm_mul_ps(c,
-                                _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 1, 1))); 
+                                _mm_shuffle_ps(b, b, _MM_SHUFFLE(3, 3, 1, 1)));
                 b = _mm_add_ps(t1, t2);
-                
+
                 /* butterfly */
                 *(__m128 *)p = _mm_add_ps(a, b);
                 *(__m128 *)q = _mm_sub_ps(a, b);
-                
+
                 p += 2;
                 q += 2;
                 cptr += 4;
             } while (--k);
-        
+
             p += nloops;
             q += nloops;
         } while (--j);
diff --git a/src/libffmpeg/libavcodec/i386/h264dsp_mmx.c b/src/libffmpeg/libavcodec/i386/h264dsp_mmx.c
index c278affc8..8ab58f389 100644
--- a/src/libffmpeg/libavcodec/i386/h264dsp_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/h264dsp_mmx.c
@@ -13,7 +13,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 
@@ -47,9 +47,9 @@
     SUMSUB_BADC( d13, s02, s13, d02 )
 
 #define SBUTTERFLY(a,b,t,n)\
-    "movq " #a ", " #t "		\n\t" /* abcd */\
-    "punpckl" #n " " #b ", " #a "	\n\t" /* aebf */\
-    "punpckh" #n " " #b ", " #t "	\n\t" /* cgdh */\
+    "movq " #a ", " #t "                \n\t" /* abcd */\
+    "punpckl" #n " " #b ", " #a "       \n\t" /* aebf */\
+    "punpckh" #n " " #b ", " #t "       \n\t" /* cgdh */\
 
 #define TRANSPOSE4(a,b,c,d,t)\
     SBUTTERFLY(a,b,t,wd) /* a=aebf t=cgdh */\
@@ -369,73 +369,73 @@ static void h264_h_loop_filter_chroma_intra_mmx2(uint8_t *pix, int stride, int a
 /* motion compensation */
 
 #define QPEL_H264V(A,B,C,D,E,F,OP)\
-        "movd (%0), "#F"		\n\t"\
-        "movq "#C", %%mm6		\n\t"\
-        "paddw "#D", %%mm6		\n\t"\
-        "psllw $2, %%mm6		\n\t"\
-        "psubw "#B", %%mm6		\n\t"\
-        "psubw "#E", %%mm6		\n\t"\
-        "pmullw %4, %%mm6		\n\t"\
-        "add %2, %0			\n\t"\
-        "punpcklbw %%mm7, "#F"		\n\t"\
-        "paddw %5, "#A"			\n\t"\
-        "paddw "#F", "#A"		\n\t"\
-        "paddw "#A", %%mm6		\n\t"\
-        "psraw $5, %%mm6		\n\t"\
-        "packuswb %%mm6, %%mm6		\n\t"\
+        "movd (%0), "#F"            \n\t"\
+        "movq "#C", %%mm6           \n\t"\
+        "paddw "#D", %%mm6          \n\t"\
+        "psllw $2, %%mm6            \n\t"\
+        "psubw "#B", %%mm6          \n\t"\
+        "psubw "#E", %%mm6          \n\t"\
+        "pmullw %4, %%mm6           \n\t"\
+        "add %2, %0                 \n\t"\
+        "punpcklbw %%mm7, "#F"      \n\t"\
+        "paddw %5, "#A"             \n\t"\
+        "paddw "#F", "#A"           \n\t"\
+        "paddw "#A", %%mm6          \n\t"\
+        "psraw $5, %%mm6            \n\t"\
+        "packuswb %%mm6, %%mm6      \n\t"\
         OP(%%mm6, (%1), A, d)\
-        "add %3, %1			\n\t"     
+        "add %3, %1                 \n\t"
 
 #define QPEL_H264HV(A,B,C,D,E,F,OF)\
-        "movd (%0), "#F"		\n\t"\
-        "movq "#C", %%mm6		\n\t"\
-        "paddw "#D", %%mm6		\n\t"\
-        "psllw $2, %%mm6		\n\t"\
-        "psubw "#B", %%mm6		\n\t"\
-        "psubw "#E", %%mm6		\n\t"\
-        "pmullw %3, %%mm6		\n\t"\
-        "add %2, %0			\n\t"\
-        "punpcklbw %%mm7, "#F"		\n\t"\
-        "paddw "#F", "#A"		\n\t"\
-        "paddw "#A", %%mm6		\n\t"\
-        "movq %%mm6, "#OF"(%1)		\n\t"
-        
+        "movd (%0), "#F"            \n\t"\
+        "movq "#C", %%mm6           \n\t"\
+        "paddw "#D", %%mm6          \n\t"\
+        "psllw $2, %%mm6            \n\t"\
+        "psubw "#B", %%mm6          \n\t"\
+        "psubw "#E", %%mm6          \n\t"\
+        "pmullw %3, %%mm6           \n\t"\
+        "add %2, %0                 \n\t"\
+        "punpcklbw %%mm7, "#F"      \n\t"\
+        "paddw "#F", "#A"           \n\t"\
+        "paddw "#A", %%mm6          \n\t"\
+        "movq %%mm6, "#OF"(%1)      \n\t"
+
 #define QPEL_H264(OPNAME, OP, MMX)\
 static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
     int h=4;\
 \
     asm volatile(\
-        "pxor %%mm7, %%mm7		\n\t"\
-        "movq %5, %%mm4			\n\t"\
-        "movq %6, %%mm5			\n\t"\
-        "1:				\n\t"\
-        "movd  -1(%0), %%mm1		\n\t"\
-        "movd    (%0), %%mm2		\n\t"\
-        "movd   1(%0), %%mm3		\n\t"\
-        "movd   2(%0), %%mm0		\n\t"\
-        "punpcklbw %%mm7, %%mm1		\n\t"\
-        "punpcklbw %%mm7, %%mm2		\n\t"\
-        "punpcklbw %%mm7, %%mm3		\n\t"\
-        "punpcklbw %%mm7, %%mm0		\n\t"\
-        "paddw %%mm0, %%mm1		\n\t"\
-        "paddw %%mm3, %%mm2		\n\t"\
-        "movd  -2(%0), %%mm0		\n\t"\
-        "movd   3(%0), %%mm3		\n\t"\
-        "punpcklbw %%mm7, %%mm0		\n\t"\
-        "punpcklbw %%mm7, %%mm3		\n\t"\
-        "paddw %%mm3, %%mm0		\n\t"\
-        "psllw $2, %%mm2		\n\t"\
-        "psubw %%mm1, %%mm2		\n\t"\
-        "pmullw %%mm4, %%mm2		\n\t"\
-        "paddw %%mm5, %%mm0		\n\t"\
-        "paddw %%mm2, %%mm0		\n\t"\
-        "psraw $5, %%mm0		\n\t"\
-        "packuswb %%mm0, %%mm0		\n\t"\
+        "pxor %%mm7, %%mm7          \n\t"\
+        "movq %5, %%mm4             \n\t"\
+        "movq %6, %%mm5             \n\t"\
+        "1:                         \n\t"\
+        "movd  -1(%0), %%mm1        \n\t"\
+        "movd    (%0), %%mm2        \n\t"\
+        "movd   1(%0), %%mm3        \n\t"\
+        "movd   2(%0), %%mm0        \n\t"\
+        "punpcklbw %%mm7, %%mm1     \n\t"\
+        "punpcklbw %%mm7, %%mm2     \n\t"\
+        "punpcklbw %%mm7, %%mm3     \n\t"\
+        "punpcklbw %%mm7, %%mm0     \n\t"\
+        "paddw %%mm0, %%mm1         \n\t"\
+        "paddw %%mm3, %%mm2         \n\t"\
+        "movd  -2(%0), %%mm0        \n\t"\
+        "movd   3(%0), %%mm3        \n\t"\
+        "punpcklbw %%mm7, %%mm0     \n\t"\
+        "punpcklbw %%mm7, %%mm3     \n\t"\
+        "paddw %%mm3, %%mm0         \n\t"\
+        "psllw $2, %%mm2            \n\t"\
+        "psubw %%mm1, %%mm2         \n\t"\
+        "pmullw %%mm4, %%mm2        \n\t"\
+        "paddw %%mm5, %%mm0         \n\t"\
+        "paddw %%mm2, %%mm0         \n\t"\
+        "psraw $5, %%mm0            \n\t"\
+        "packuswb %%mm0, %%mm0      \n\t"\
         OP(%%mm0, (%1),%%mm6, d)\
-        "add %3, %0			\n\t"\
-        "add %4, %1			\n\t"\
-        "decl %2			\n\t"\
-        " jnz 1b			\n\t"\
+        "add %3, %0                 \n\t"\
+        "add %4, %1                 \n\t"\
+        "decl %2                    \n\t"\
+        " jnz 1b                    \n\t"\
         : "+a"(src), "+c"(dst), "+m"(h)\
         : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
         : "memory"\
@@ -444,22 +444,22 @@ static void OPNAME ## h264_qpel4_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i
 static void OPNAME ## h264_qpel4_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
     src -= 2*srcStride;\
     asm volatile(\
-        "pxor %%mm7, %%mm7		\n\t"\
-        "movd (%0), %%mm0		\n\t"\
-        "add %2, %0			\n\t"\
-        "movd (%0), %%mm1		\n\t"\
-        "add %2, %0			\n\t"\
-        "movd (%0), %%mm2		\n\t"\
-        "add %2, %0			\n\t"\
-        "movd (%0), %%mm3		\n\t"\
-        "add %2, %0			\n\t"\
-        "movd (%0), %%mm4		\n\t"\
-        "add %2, %0			\n\t"\
-        "punpcklbw %%mm7, %%mm0		\n\t"\
-        "punpcklbw %%mm7, %%mm1		\n\t"\
-        "punpcklbw %%mm7, %%mm2		\n\t"\
-        "punpcklbw %%mm7, %%mm3		\n\t"\
-        "punpcklbw %%mm7, %%mm4		\n\t"\
+        "pxor %%mm7, %%mm7          \n\t"\
+        "movd (%0), %%mm0           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm1           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm2           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm3           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm4           \n\t"\
+        "add %2, %0                 \n\t"\
+        "punpcklbw %%mm7, %%mm0     \n\t"\
+        "punpcklbw %%mm7, %%mm1     \n\t"\
+        "punpcklbw %%mm7, %%mm2     \n\t"\
+        "punpcklbw %%mm7, %%mm3     \n\t"\
+        "punpcklbw %%mm7, %%mm4     \n\t"\
         QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
         QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
         QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
@@ -476,22 +476,22 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
     src -= 2*srcStride+2;\
     while(w--){\
         asm volatile(\
-            "pxor %%mm7, %%mm7			\n\t"\
-            "movd (%0), %%mm0			\n\t"\
-            "add %2, %0				\n\t"\
-            "movd (%0), %%mm1			\n\t"\
-            "add %2, %0				\n\t"\
-            "movd (%0), %%mm2			\n\t"\
-            "add %2, %0				\n\t"\
-            "movd (%0), %%mm3			\n\t"\
-            "add %2, %0				\n\t"\
-            "movd (%0), %%mm4			\n\t"\
-            "add %2, %0				\n\t"\
-            "punpcklbw %%mm7, %%mm0		\n\t"\
-            "punpcklbw %%mm7, %%mm1		\n\t"\
-            "punpcklbw %%mm7, %%mm2		\n\t"\
-            "punpcklbw %%mm7, %%mm3		\n\t"\
-            "punpcklbw %%mm7, %%mm4		\n\t"\
+            "pxor %%mm7, %%mm7      \n\t"\
+            "movd (%0), %%mm0       \n\t"\
+            "add %2, %0             \n\t"\
+            "movd (%0), %%mm1       \n\t"\
+            "add %2, %0             \n\t"\
+            "movd (%0), %%mm2       \n\t"\
+            "add %2, %0             \n\t"\
+            "movd (%0), %%mm3       \n\t"\
+            "add %2, %0             \n\t"\
+            "movd (%0), %%mm4       \n\t"\
+            "add %2, %0             \n\t"\
+            "punpcklbw %%mm7, %%mm0 \n\t"\
+            "punpcklbw %%mm7, %%mm1 \n\t"\
+            "punpcklbw %%mm7, %%mm2 \n\t"\
+            "punpcklbw %%mm7, %%mm3 \n\t"\
+            "punpcklbw %%mm7, %%mm4 \n\t"\
             QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*3)\
             QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*3)\
             QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*3)\
@@ -506,28 +506,28 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
     }\
     tmp -= 3*4;\
     asm volatile(\
-        "movq %4, %%mm6			\n\t"\
-        "1:				\n\t"\
-        "movq     (%0), %%mm0		\n\t"\
-        "paddw  10(%0), %%mm0		\n\t"\
-        "movq    2(%0), %%mm1		\n\t"\
-        "paddw   8(%0), %%mm1		\n\t"\
-        "movq    4(%0), %%mm2		\n\t"\
-        "paddw   6(%0), %%mm2		\n\t"\
-        "psubw %%mm1, %%mm0		\n\t"/*a-b   (abccba)*/\
-        "psraw $2, %%mm0		\n\t"/*(a-b)/4 */\
-        "psubw %%mm1, %%mm0		\n\t"/*(a-b)/4-b */\
-        "paddsw %%mm2, %%mm0		\n\t"\
-        "psraw $2, %%mm0		\n\t"/*((a-b)/4-b)/4 */\
-        "paddw %%mm6, %%mm2		\n\t"\
-        "paddw %%mm2, %%mm0		\n\t"\
-        "psraw $6, %%mm0		\n\t"\
-        "packuswb %%mm0, %%mm0		\n\t"\
+        "movq %4, %%mm6             \n\t"\
+        "1:                         \n\t"\
+        "movq     (%0), %%mm0       \n\t"\
+        "paddw  10(%0), %%mm0       \n\t"\
+        "movq    2(%0), %%mm1       \n\t"\
+        "paddw   8(%0), %%mm1       \n\t"\
+        "movq    4(%0), %%mm2       \n\t"\
+        "paddw   6(%0), %%mm2       \n\t"\
+        "psubw %%mm1, %%mm0         \n\t"/*a-b   (abccba)*/\
+        "psraw $2, %%mm0            \n\t"/*(a-b)/4 */\
+        "psubw %%mm1, %%mm0         \n\t"/*(a-b)/4-b */\
+        "paddsw %%mm2, %%mm0        \n\t"\
+        "psraw $2, %%mm0            \n\t"/*((a-b)/4-b+c)/4 */\
+        "paddw %%mm6, %%mm2         \n\t"\
+        "paddw %%mm2, %%mm0         \n\t"/*(a-5*b+20*c)/16 +32 */\
+        "psraw $6, %%mm0            \n\t"\
+        "packuswb %%mm0, %%mm0      \n\t"\
         OP(%%mm0, (%1),%%mm7, d)\
-        "add $24, %0			\n\t"\
-        "add %3, %1			\n\t"\
-        "decl %2			\n\t"\
-        " jnz 1b			\n\t"\
+        "add $24, %0                \n\t"\
+        "add %3, %1                 \n\t"\
+        "decl %2                    \n\t"\
+        " jnz 1b                    \n\t"\
         : "+a"(tmp), "+c"(dst), "+m"(h)\
         : "S"((long)dstStride), "m"(ff_pw_32)\
         : "memory"\
@@ -537,54 +537,54 @@ static void OPNAME ## h264_qpel4_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
 static void OPNAME ## h264_qpel8_h_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
     int h=8;\
     asm volatile(\
-        "pxor %%mm7, %%mm7		\n\t"\
-        "movq %5, %%mm6			\n\t"\
-        "1:				\n\t"\
-        "movq    (%0), %%mm0		\n\t"\
-        "movq   1(%0), %%mm2		\n\t"\
-        "movq %%mm0, %%mm1		\n\t"\
-        "movq %%mm2, %%mm3		\n\t"\
-        "punpcklbw %%mm7, %%mm0		\n\t"\
-        "punpckhbw %%mm7, %%mm1		\n\t"\
-        "punpcklbw %%mm7, %%mm2		\n\t"\
-        "punpckhbw %%mm7, %%mm3		\n\t"\
-        "paddw %%mm2, %%mm0		\n\t"\
-        "paddw %%mm3, %%mm1		\n\t"\
-        "psllw $2, %%mm0		\n\t"\
-        "psllw $2, %%mm1		\n\t"\
-        "movq   -1(%0), %%mm2		\n\t"\
-        "movq    2(%0), %%mm4		\n\t"\
-        "movq %%mm2, %%mm3		\n\t"\
-        "movq %%mm4, %%mm5		\n\t"\
-        "punpcklbw %%mm7, %%mm2		\n\t"\
-        "punpckhbw %%mm7, %%mm3		\n\t"\
-        "punpcklbw %%mm7, %%mm4		\n\t"\
-        "punpckhbw %%mm7, %%mm5		\n\t"\
-        "paddw %%mm4, %%mm2		\n\t"\
-        "paddw %%mm3, %%mm5		\n\t"\
-        "psubw %%mm2, %%mm0		\n\t"\
-        "psubw %%mm5, %%mm1		\n\t"\
-        "pmullw %%mm6, %%mm0		\n\t"\
-        "pmullw %%mm6, %%mm1		\n\t"\
-        "movd   -2(%0), %%mm2		\n\t"\
-        "movd    7(%0), %%mm5		\n\t"\
-        "punpcklbw %%mm7, %%mm2		\n\t"\
-        "punpcklbw %%mm7, %%mm5		\n\t"\
-        "paddw %%mm3, %%mm2		\n\t"\
-        "paddw %%mm5, %%mm4		\n\t"\
-        "movq %6, %%mm5			\n\t"\
-        "paddw %%mm5, %%mm2		\n\t"\
-        "paddw %%mm5, %%mm4		\n\t"\
-        "paddw %%mm2, %%mm0		\n\t"\
-        "paddw %%mm4, %%mm1		\n\t"\
-        "psraw $5, %%mm0		\n\t"\
-        "psraw $5, %%mm1		\n\t"\
-        "packuswb %%mm1, %%mm0		\n\t"\
+        "pxor %%mm7, %%mm7          \n\t"\
+        "movq %5, %%mm6             \n\t"\
+        "1:                         \n\t"\
+        "movq    (%0), %%mm0        \n\t"\
+        "movq   1(%0), %%mm2        \n\t"\
+        "movq %%mm0, %%mm1          \n\t"\
+        "movq %%mm2, %%mm3          \n\t"\
+        "punpcklbw %%mm7, %%mm0     \n\t"\
+        "punpckhbw %%mm7, %%mm1     \n\t"\
+        "punpcklbw %%mm7, %%mm2     \n\t"\
+        "punpckhbw %%mm7, %%mm3     \n\t"\
+        "paddw %%mm2, %%mm0         \n\t"\
+        "paddw %%mm3, %%mm1         \n\t"\
+        "psllw $2, %%mm0            \n\t"\
+        "psllw $2, %%mm1            \n\t"\
+        "movq   -1(%0), %%mm2       \n\t"\
+        "movq    2(%0), %%mm4       \n\t"\
+        "movq %%mm2, %%mm3          \n\t"\
+        "movq %%mm4, %%mm5          \n\t"\
+        "punpcklbw %%mm7, %%mm2     \n\t"\
+        "punpckhbw %%mm7, %%mm3     \n\t"\
+        "punpcklbw %%mm7, %%mm4     \n\t"\
+        "punpckhbw %%mm7, %%mm5     \n\t"\
+        "paddw %%mm4, %%mm2         \n\t"\
+        "paddw %%mm3, %%mm5         \n\t"\
+        "psubw %%mm2, %%mm0         \n\t"\
+        "psubw %%mm5, %%mm1         \n\t"\
+        "pmullw %%mm6, %%mm0        \n\t"\
+        "pmullw %%mm6, %%mm1        \n\t"\
+        "movd   -2(%0), %%mm2       \n\t"\
+        "movd    7(%0), %%mm5       \n\t"\
+        "punpcklbw %%mm7, %%mm2     \n\t"\
+        "punpcklbw %%mm7, %%mm5     \n\t"\
+        "paddw %%mm3, %%mm2         \n\t"\
+        "paddw %%mm5, %%mm4         \n\t"\
+        "movq %6, %%mm5             \n\t"\
+        "paddw %%mm5, %%mm2         \n\t"\
+        "paddw %%mm5, %%mm4         \n\t"\
+        "paddw %%mm2, %%mm0         \n\t"\
+        "paddw %%mm4, %%mm1         \n\t"\
+        "psraw $5, %%mm0            \n\t"\
+        "psraw $5, %%mm1            \n\t"\
+        "packuswb %%mm1, %%mm0      \n\t"\
         OP(%%mm0, (%1),%%mm5, q)\
-        "add %3, %0			\n\t"\
-        "add %4, %1			\n\t"\
-        "decl %2			\n\t"\
-        " jnz 1b			\n\t"\
+        "add %3, %0                 \n\t"\
+        "add %4, %1                 \n\t"\
+        "decl %2                    \n\t"\
+        " jnz 1b                    \n\t"\
         : "+a"(src), "+c"(dst), "+m"(h)\
         : "d"((long)srcStride), "S"((long)dstStride), "m"(ff_pw_5), "m"(ff_pw_16)\
         : "memory"\
@@ -597,22 +597,22 @@ static void OPNAME ## h264_qpel8_v_lowpass_ ## MMX(uint8_t *dst, uint8_t *src, i
     \
     while(h--){\
       asm volatile(\
-        "pxor %%mm7, %%mm7		\n\t"\
-        "movd (%0), %%mm0		\n\t"\
-        "add %2, %0			\n\t"\
-        "movd (%0), %%mm1		\n\t"\
-        "add %2, %0			\n\t"\
-        "movd (%0), %%mm2		\n\t"\
-        "add %2, %0			\n\t"\
-        "movd (%0), %%mm3		\n\t"\
-        "add %2, %0			\n\t"\
-        "movd (%0), %%mm4		\n\t"\
-        "add %2, %0			\n\t"\
-        "punpcklbw %%mm7, %%mm0		\n\t"\
-        "punpcklbw %%mm7, %%mm1		\n\t"\
-        "punpcklbw %%mm7, %%mm2		\n\t"\
-        "punpcklbw %%mm7, %%mm3		\n\t"\
-        "punpcklbw %%mm7, %%mm4		\n\t"\
+        "pxor %%mm7, %%mm7          \n\t"\
+        "movd (%0), %%mm0           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm1           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm2           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm3           \n\t"\
+        "add %2, %0                 \n\t"\
+        "movd (%0), %%mm4           \n\t"\
+        "add %2, %0                 \n\t"\
+        "punpcklbw %%mm7, %%mm0     \n\t"\
+        "punpcklbw %%mm7, %%mm1     \n\t"\
+        "punpcklbw %%mm7, %%mm2     \n\t"\
+        "punpcklbw %%mm7, %%mm3     \n\t"\
+        "punpcklbw %%mm7, %%mm4     \n\t"\
         QPEL_H264V(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, OP)\
         QPEL_H264V(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, OP)\
         QPEL_H264V(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, OP)\
@@ -636,22 +636,22 @@ static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
     src -= 2*srcStride+2;\
     while(w--){\
         asm volatile(\
-            "pxor %%mm7, %%mm7			\n\t"\
-            "movd (%0), %%mm0			\n\t"\
-            "add %2, %0				\n\t"\
-            "movd (%0), %%mm1			\n\t"\
-            "add %2, %0				\n\t"\
-            "movd (%0), %%mm2			\n\t"\
-            "add %2, %0				\n\t"\
-            "movd (%0), %%mm3			\n\t"\
-            "add %2, %0				\n\t"\
-            "movd (%0), %%mm4			\n\t"\
-            "add %2, %0				\n\t"\
-            "punpcklbw %%mm7, %%mm0		\n\t"\
-            "punpcklbw %%mm7, %%mm1		\n\t"\
-            "punpcklbw %%mm7, %%mm2		\n\t"\
-            "punpcklbw %%mm7, %%mm3		\n\t"\
-            "punpcklbw %%mm7, %%mm4		\n\t"\
+            "pxor %%mm7, %%mm7      \n\t"\
+            "movd (%0), %%mm0       \n\t"\
+            "add %2, %0             \n\t"\
+            "movd (%0), %%mm1       \n\t"\
+            "add %2, %0             \n\t"\
+            "movd (%0), %%mm2       \n\t"\
+            "add %2, %0             \n\t"\
+            "movd (%0), %%mm3       \n\t"\
+            "add %2, %0             \n\t"\
+            "movd (%0), %%mm4       \n\t"\
+            "add %2, %0             \n\t"\
+            "punpcklbw %%mm7, %%mm0 \n\t"\
+            "punpcklbw %%mm7, %%mm1 \n\t"\
+            "punpcklbw %%mm7, %%mm2 \n\t"\
+            "punpcklbw %%mm7, %%mm3 \n\t"\
+            "punpcklbw %%mm7, %%mm4 \n\t"\
             QPEL_H264HV(%%mm0, %%mm1, %%mm2, %%mm3, %%mm4, %%mm5, 0*8*4)\
             QPEL_H264HV(%%mm1, %%mm2, %%mm3, %%mm4, %%mm5, %%mm0, 1*8*4)\
             QPEL_H264HV(%%mm2, %%mm3, %%mm4, %%mm5, %%mm0, %%mm1, 2*8*4)\
@@ -670,42 +670,42 @@ static void OPNAME ## h264_qpel8_hv_lowpass_ ## MMX(uint8_t *dst, int16_t *tmp,
     }\
     tmp -= 4*4;\
     asm volatile(\
-        "movq %4, %%mm6			\n\t"\
-        "1:				\n\t"\
-        "movq     (%0), %%mm0		\n\t"\
-        "movq    8(%0), %%mm3		\n\t"\
-        "movq    2(%0), %%mm1		\n\t"\
-        "movq   10(%0), %%mm4		\n\t"\
-        "paddw   %%mm4, %%mm0		\n\t"\
-        "paddw   %%mm3, %%mm1		\n\t"\
-        "paddw  18(%0), %%mm3		\n\t"\
-        "paddw  16(%0), %%mm4		\n\t"\
-        "movq    4(%0), %%mm2		\n\t"\
-        "movq   12(%0), %%mm5		\n\t"\
-        "paddw   6(%0), %%mm2		\n\t"\
-        "paddw  14(%0), %%mm5		\n\t"\
-        "psubw %%mm1, %%mm0		\n\t"\
-        "psubw %%mm4, %%mm3		\n\t"\
-        "psraw $2, %%mm0		\n\t"\
-        "psraw $2, %%mm3		\n\t"\
-        "psubw %%mm1, %%mm0		\n\t"\
-        "psubw %%mm4, %%mm3		\n\t"\
-        "paddsw %%mm2, %%mm0		\n\t"\
-        "paddsw %%mm5, %%mm3		\n\t"\
-        "psraw $2, %%mm0		\n\t"\
-        "psraw $2, %%mm3		\n\t"\
-        "paddw %%mm6, %%mm2		\n\t"\
-        "paddw %%mm6, %%mm5		\n\t"\
-        "paddw %%mm2, %%mm0		\n\t"\
-        "paddw %%mm5, %%mm3		\n\t"\
-        "psraw $6, %%mm0		\n\t"\
-        "psraw $6, %%mm3		\n\t"\
-        "packuswb %%mm3, %%mm0		\n\t"\
+        "movq %4, %%mm6             \n\t"\
+        "1:                         \n\t"\
+        "movq     (%0), %%mm0       \n\t"\
+        "movq    8(%0), %%mm3       \n\t"\
+        "movq    2(%0), %%mm1       \n\t"\
+        "movq   10(%0), %%mm4       \n\t"\
+        "paddw   %%mm4, %%mm0       \n\t"\
+        "paddw   %%mm3, %%mm1       \n\t"\
+        "paddw  18(%0), %%mm3       \n\t"\
+        "paddw  16(%0), %%mm4       \n\t"\
+        "movq    4(%0), %%mm2       \n\t"\
+        "movq   12(%0), %%mm5       \n\t"\
+        "paddw   6(%0), %%mm2       \n\t"\
+        "paddw  14(%0), %%mm5       \n\t"\
+        "psubw %%mm1, %%mm0         \n\t"\
+        "psubw %%mm4, %%mm3         \n\t"\
+        "psraw $2, %%mm0            \n\t"\
+        "psraw $2, %%mm3            \n\t"\
+        "psubw %%mm1, %%mm0         \n\t"\
+        "psubw %%mm4, %%mm3         \n\t"\
+        "paddsw %%mm2, %%mm0        \n\t"\
+        "paddsw %%mm5, %%mm3        \n\t"\
+        "psraw $2, %%mm0            \n\t"\
+        "psraw $2, %%mm3            \n\t"\
+        "paddw %%mm6, %%mm2         \n\t"\
+        "paddw %%mm6, %%mm5         \n\t"\
+        "paddw %%mm2, %%mm0         \n\t"\
+        "paddw %%mm5, %%mm3         \n\t"\
+        "psraw $6, %%mm0            \n\t"\
+        "psraw $6, %%mm3            \n\t"\
+        "packuswb %%mm3, %%mm0      \n\t"\
         OP(%%mm0, (%1),%%mm7, q)\
-        "add $32, %0			\n\t"\
-        "add %3, %1			\n\t"\
-        "decl %2			\n\t"\
-        " jnz 1b			\n\t"\
+        "add $32, %0                \n\t"\
+        "add %3, %1                 \n\t"\
+        "decl %2                    \n\t"\
+        " jnz 1b                    \n\t"\
         : "+a"(tmp), "+c"(dst), "+m"(h)\
         : "S"((long)dstStride), "m"(ff_pw_32)\
         : "memory"\
@@ -862,15 +862,15 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *
 }\
 
 
-#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b "	\n\t"
+#define PUT_OP(a,b,temp, size) "mov" #size " " #a ", " #b "    \n\t"
 #define AVG_3DNOW_OP(a,b,temp, size) \
-"mov" #size " " #b ", " #temp "	\n\t"\
-"pavgusb " #temp ", " #a "	\n\t"\
-"mov" #size " " #a ", " #b "	\n\t"
+"mov" #size " " #b ", " #temp "   \n\t"\
+"pavgusb " #temp ", " #a "        \n\t"\
+"mov" #size " " #a ", " #b "      \n\t"
 #define AVG_MMX2_OP(a,b,temp, size) \
-"mov" #size " " #b ", " #temp "	\n\t"\
-"pavgb " #temp ", " #a "	\n\t"\
-"mov" #size " " #a ", " #b "	\n\t"
+"mov" #size " " #b ", " #temp "   \n\t"\
+"pavgb " #temp ", " #a "          \n\t"\
+"mov" #size " " #a ", " #b "      \n\t"
 
 QPEL_H264(put_,       PUT_OP, 3dnow)
 QPEL_H264(avg_, AVG_3DNOW_OP, 3dnow)
@@ -892,22 +892,42 @@ H264_MC(avg_, 16,mmx2)
 
 
 #define H264_CHROMA_OP(S,D)
+#define H264_CHROMA_OP4(S,D,T)
 #define H264_CHROMA_MC8_TMPL put_h264_chroma_mc8_mmx
+#define H264_CHROMA_MC4_TMPL put_h264_chroma_mc4_mmx
+#define H264_CHROMA_MC8_MV0 put_pixels8_mmx
 #include "dsputil_h264_template_mmx.c"
 #undef H264_CHROMA_OP
+#undef H264_CHROMA_OP4
 #undef H264_CHROMA_MC8_TMPL
+#undef H264_CHROMA_MC4_TMPL
+#undef H264_CHROMA_MC8_MV0
 
 #define H264_CHROMA_OP(S,D) "pavgb " #S ", " #D " \n\t"
+#define H264_CHROMA_OP4(S,D,T) "movd  " #S ", " #T " \n\t"\
+                               "pavgb " #T ", " #D " \n\t"
 #define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_mmx2
+#define H264_CHROMA_MC4_TMPL avg_h264_chroma_mc4_mmx2
+#define H264_CHROMA_MC8_MV0 avg_pixels8_mmx2
 #include "dsputil_h264_template_mmx.c"
 #undef H264_CHROMA_OP
+#undef H264_CHROMA_OP4
 #undef H264_CHROMA_MC8_TMPL
+#undef H264_CHROMA_MC4_TMPL
+#undef H264_CHROMA_MC8_MV0
 
 #define H264_CHROMA_OP(S,D) "pavgusb " #S ", " #D " \n\t"
+#define H264_CHROMA_OP4(S,D,T) "movd " #S ", " #T " \n\t"\
+                               "pavgusb " #T ", " #D " \n\t"
 #define H264_CHROMA_MC8_TMPL avg_h264_chroma_mc8_3dnow
+#define H264_CHROMA_MC4_TMPL avg_h264_chroma_mc4_3dnow
+#define H264_CHROMA_MC8_MV0 avg_pixels8_3dnow
 #include "dsputil_h264_template_mmx.c"
 #undef H264_CHROMA_OP
+#undef H264_CHROMA_OP4
 #undef H264_CHROMA_MC8_TMPL
+#undef H264_CHROMA_MC4_TMPL
+#undef H264_CHROMA_MC8_MV0
 
 /***********************************/
 /* weighted prediction */
@@ -935,8 +955,8 @@ static inline void ff_h264_weight_WxH_mmx2(uint8_t *dst, int stride, int log2_de
                 "punpcklbw %%mm7, %%mm1 \n\t"
                 "pmullw    %%mm4, %%mm0 \n\t"
                 "pmullw    %%mm4, %%mm1 \n\t"
-                "paddw     %%mm5, %%mm0 \n\t"
-                "paddw     %%mm5, %%mm1 \n\t"
+                "paddsw    %%mm5, %%mm0 \n\t"
+                "paddsw    %%mm5, %%mm1 \n\t"
                 "psraw     %%mm6, %%mm0 \n\t"
                 "psraw     %%mm6, %%mm1 \n\t"
                 "packuswb  %%mm7, %%mm0 \n\t"
@@ -951,10 +971,10 @@ static inline void ff_h264_weight_WxH_mmx2(uint8_t *dst, int stride, int log2_de
     }
 }
 
-static inline void ff_h264_biweight_WxH_mmx2(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offsetd, int offsets, int w, int h)
+static inline void ff_h264_biweight_WxH_mmx2(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset, int w, int h)
 {
     int x, y;
-    int offset = ((offsets + offsetd + 1) | 1) << log2_denom;
+    offset = ((offset + 1) | 1) << log2_denom;
     asm volatile(
         "movd    %0, %%mm3        \n\t"
         "movd    %1, %%mm4        \n\t"
@@ -975,8 +995,8 @@ static inline void ff_h264_biweight_WxH_mmx2(uint8_t *dst, uint8_t *src, int str
                 "punpcklbw %%mm7, %%mm1 \n\t"
                 "pmullw    %%mm3, %%mm0 \n\t"
                 "pmullw    %%mm4, %%mm1 \n\t"
-                "paddw     %%mm5, %%mm0 \n\t"
-                "paddw     %%mm1, %%mm0 \n\t"
+                "paddsw    %%mm1, %%mm0 \n\t"
+                "paddsw    %%mm5, %%mm0 \n\t"
                 "psraw     %%mm6, %%mm0 \n\t"
                 "packuswb  %%mm0, %%mm0 \n\t"
                 "movd      %%mm0, %0    \n\t"
@@ -990,8 +1010,8 @@ static inline void ff_h264_biweight_WxH_mmx2(uint8_t *dst, uint8_t *src, int str
 }
 
 #define H264_WEIGHT(W,H) \
-static void ff_h264_biweight_ ## W ## x ## H ## _mmx2(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offsetd, int offsets){ \
-    ff_h264_biweight_WxH_mmx2(dst, src, stride, log2_denom, weightd, weights, offsetd, offsets, W, H); \
+static void ff_h264_biweight_ ## W ## x ## H ## _mmx2(uint8_t *dst, uint8_t *src, int stride, int log2_denom, int weightd, int weights, int offset){ \
+    ff_h264_biweight_WxH_mmx2(dst, src, stride, log2_denom, weightd, weights, offset, W, H); \
 } \
 static void ff_h264_weight_ ## W ## x ## H ## _mmx2(uint8_t *dst, int stride, int log2_denom, int weight, int offset){ \
     ff_h264_weight_WxH_mmx2(dst, stride, log2_denom, weight, offset, W, H); \
diff --git a/src/libffmpeg/libavcodec/i386/idct_mmx.c b/src/libffmpeg/libavcodec/i386/idct_mmx.c
index d1a84549d..1c8632fb7 100644
--- a/src/libffmpeg/libavcodec/i386/idct_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/idct_mmx.c
@@ -19,7 +19,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "common.h"
@@ -38,7 +38,7 @@
 #if 0
 /* C row IDCT - its just here to document the MMXEXT and MMX versions */
 static inline void idct_row (int16_t * row, int offset,
-			     int16_t * table, int32_t * rounder)
+                             int16_t * table, int32_t * rounder)
 {
     int C1, C2, C3, C4, C5, C6, C7;
     int a0, a1, a2, a3, b0, b1, b2, b3;
@@ -77,241 +77,241 @@ static inline void idct_row (int16_t * row, int offset,
 
 /* MMXEXT row IDCT */
 
-#define mmxext_table(c1,c2,c3,c4,c5,c6,c7)	{  c4,  c2, -c4, -c2,	\
-						   c4,  c6,  c4,  c6,	\
-						   c1,  c3, -c1, -c5,	\
-						   c5,  c7,  c3, -c7,	\
-						   c4, -c6,  c4, -c6,	\
-						  -c4,  c2,  c4, -c2,	\
-						   c5, -c1,  c3, -c1,	\
-						   c7,  c3,  c7, -c5 }
+#define mmxext_table(c1,c2,c3,c4,c5,c6,c7)      {  c4,  c2, -c4, -c2,   \
+                                                   c4,  c6,  c4,  c6,   \
+                                                   c1,  c3, -c1, -c5,   \
+                                                   c5,  c7,  c3, -c7,   \
+                                                   c4, -c6,  c4, -c6,   \
+                                                  -c4,  c2,  c4, -c2,   \
+                                                   c5, -c1,  c3, -c1,   \
+                                                   c7,  c3,  c7, -c5 }
 
 static inline void mmxext_row_head (int16_t * row, int offset, const int16_t * table)
 {
-    movq_m2r (*(row+offset), mm2);	// mm2 = x6 x4 x2 x0
+    movq_m2r (*(row+offset), mm2);      // mm2 = x6 x4 x2 x0
 
-    movq_m2r (*(row+offset+4), mm5);	// mm5 = x7 x5 x3 x1
-    movq_r2r (mm2, mm0);		// mm0 = x6 x4 x2 x0
+    movq_m2r (*(row+offset+4), mm5);    // mm5 = x7 x5 x3 x1
+    movq_r2r (mm2, mm0);                // mm0 = x6 x4 x2 x0
 
-    movq_m2r (*table, mm3);		// mm3 = -C2 -C4 C2 C4
-    movq_r2r (mm5, mm6);		// mm6 = x7 x5 x3 x1
+    movq_m2r (*table, mm3);             // mm3 = -C2 -C4 C2 C4
+    movq_r2r (mm5, mm6);                // mm6 = x7 x5 x3 x1
 
-    movq_m2r (*(table+4), mm4);		// mm4 = C6 C4 C6 C4
-    pmaddwd_r2r (mm0, mm3);		// mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
+    movq_m2r (*(table+4), mm4);         // mm4 = C6 C4 C6 C4
+    pmaddwd_r2r (mm0, mm3);             // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
 
-    pshufw_r2r (mm2, mm2, 0x4e);	// mm2 = x2 x0 x6 x4
+    pshufw_r2r (mm2, mm2, 0x4e);        // mm2 = x2 x0 x6 x4
 }
 
 static inline void mmxext_row (const int16_t * table, const int32_t * rounder)
 {
-    movq_m2r (*(table+8), mm1);		// mm1 = -C5 -C1 C3 C1
-    pmaddwd_r2r (mm2, mm4);		// mm4 = C4*x0+C6*x2 C4*x4+C6*x6
+    movq_m2r (*(table+8), mm1);         // mm1 = -C5 -C1 C3 C1
+    pmaddwd_r2r (mm2, mm4);             // mm4 = C4*x0+C6*x2 C4*x4+C6*x6
 
-    pmaddwd_m2r (*(table+16), mm0);	// mm0 = C4*x4-C6*x6 C4*x0-C6*x2
-    pshufw_r2r (mm6, mm6, 0x4e);	// mm6 = x3 x1 x7 x5
+    pmaddwd_m2r (*(table+16), mm0);     // mm0 = C4*x4-C6*x6 C4*x0-C6*x2
+    pshufw_r2r (mm6, mm6, 0x4e);        // mm6 = x3 x1 x7 x5
 
-    movq_m2r (*(table+12), mm7);	// mm7 = -C7 C3 C7 C5
-    pmaddwd_r2r (mm5, mm1);		// mm1 = -C1*x5-C5*x7 C1*x1+C3*x3
+    movq_m2r (*(table+12), mm7);        // mm7 = -C7 C3 C7 C5
+    pmaddwd_r2r (mm5, mm1);             // mm1 = -C1*x5-C5*x7 C1*x1+C3*x3
 
-    paddd_m2r (*rounder, mm3);		// mm3 += rounder
-    pmaddwd_r2r (mm6, mm7);		// mm7 = C3*x1-C7*x3 C5*x5+C7*x7
+    paddd_m2r (*rounder, mm3);          // mm3 += rounder
+    pmaddwd_r2r (mm6, mm7);             // mm7 = C3*x1-C7*x3 C5*x5+C7*x7
 
-    pmaddwd_m2r (*(table+20), mm2);	// mm2 = C4*x0-C2*x2 -C4*x4+C2*x6
-    paddd_r2r (mm4, mm3);		// mm3 = a1 a0 + rounder
+    pmaddwd_m2r (*(table+20), mm2);     // mm2 = C4*x0-C2*x2 -C4*x4+C2*x6
+    paddd_r2r (mm4, mm3);               // mm3 = a1 a0 + rounder
 
-    pmaddwd_m2r (*(table+24), mm5);	// mm5 = C3*x5-C1*x7 C5*x1-C1*x3
-    movq_r2r (mm3, mm4);		// mm4 = a1 a0 + rounder
+    pmaddwd_m2r (*(table+24), mm5);     // mm5 = C3*x5-C1*x7 C5*x1-C1*x3
+    movq_r2r (mm3, mm4);                // mm4 = a1 a0 + rounder
 
-    pmaddwd_m2r (*(table+28), mm6);	// mm6 = C7*x1-C5*x3 C7*x5+C3*x7
-    paddd_r2r (mm7, mm1);		// mm1 = b1 b0
+    pmaddwd_m2r (*(table+28), mm6);     // mm6 = C7*x1-C5*x3 C7*x5+C3*x7
+    paddd_r2r (mm7, mm1);               // mm1 = b1 b0
 
-    paddd_m2r (*rounder, mm0);		// mm0 += rounder
-    psubd_r2r (mm1, mm3);		// mm3 = a1-b1 a0-b0 + rounder
+    paddd_m2r (*rounder, mm0);          // mm0 += rounder
+    psubd_r2r (mm1, mm3);               // mm3 = a1-b1 a0-b0 + rounder
 
-    psrad_i2r (ROW_SHIFT, mm3);		// mm3 = y6 y7
-    paddd_r2r (mm4, mm1);		// mm1 = a1+b1 a0+b0 + rounder
+    psrad_i2r (ROW_SHIFT, mm3);         // mm3 = y6 y7
+    paddd_r2r (mm4, mm1);               // mm1 = a1+b1 a0+b0 + rounder
 
-    paddd_r2r (mm2, mm0);		// mm0 = a3 a2 + rounder
-    psrad_i2r (ROW_SHIFT, mm1);		// mm1 = y1 y0
+    paddd_r2r (mm2, mm0);               // mm0 = a3 a2 + rounder
+    psrad_i2r (ROW_SHIFT, mm1);         // mm1 = y1 y0
 
-    paddd_r2r (mm6, mm5);		// mm5 = b3 b2
-    movq_r2r (mm0, mm4);		// mm4 = a3 a2 + rounder
+    paddd_r2r (mm6, mm5);               // mm5 = b3 b2
+    movq_r2r (mm0, mm4);                // mm4 = a3 a2 + rounder
 
-    paddd_r2r (mm5, mm0);		// mm0 = a3+b3 a2+b2 + rounder
-    psubd_r2r (mm5, mm4);		// mm4 = a3-b3 a2-b2 + rounder
+    paddd_r2r (mm5, mm0);               // mm0 = a3+b3 a2+b2 + rounder
+    psubd_r2r (mm5, mm4);               // mm4 = a3-b3 a2-b2 + rounder
 }
 
 static inline void mmxext_row_tail (int16_t * row, int store)
 {
-    psrad_i2r (ROW_SHIFT, mm0);		// mm0 = y3 y2
+    psrad_i2r (ROW_SHIFT, mm0);         // mm0 = y3 y2
 
-    psrad_i2r (ROW_SHIFT, mm4);		// mm4 = y4 y5
+    psrad_i2r (ROW_SHIFT, mm4);         // mm4 = y4 y5
 
-    packssdw_r2r (mm0, mm1);		// mm1 = y3 y2 y1 y0
+    packssdw_r2r (mm0, mm1);            // mm1 = y3 y2 y1 y0
 
-    packssdw_r2r (mm3, mm4);		// mm4 = y6 y7 y4 y5
+    packssdw_r2r (mm3, mm4);            // mm4 = y6 y7 y4 y5
 
-    movq_r2m (mm1, *(row+store));	// save y3 y2 y1 y0
-    pshufw_r2r (mm4, mm4, 0xb1);	// mm4 = y7 y6 y5 y4
+    movq_r2m (mm1, *(row+store));       // save y3 y2 y1 y0
+    pshufw_r2r (mm4, mm4, 0xb1);        // mm4 = y7 y6 y5 y4
 
     /* slot */
 
-    movq_r2m (mm4, *(row+store+4));	// save y7 y6 y5 y4
+    movq_r2m (mm4, *(row+store+4));     // save y7 y6 y5 y4
 }
 
 static inline void mmxext_row_mid (int16_t * row, int store,
-				   int offset, const int16_t * table)
+                                   int offset, const int16_t * table)
 {
-    movq_m2r (*(row+offset), mm2);	// mm2 = x6 x4 x2 x0
-    psrad_i2r (ROW_SHIFT, mm0);		// mm0 = y3 y2
+    movq_m2r (*(row+offset), mm2);      // mm2 = x6 x4 x2 x0
+    psrad_i2r (ROW_SHIFT, mm0);         // mm0 = y3 y2
 
-    movq_m2r (*(row+offset+4), mm5);	// mm5 = x7 x5 x3 x1
-    psrad_i2r (ROW_SHIFT, mm4);		// mm4 = y4 y5
+    movq_m2r (*(row+offset+4), mm5);    // mm5 = x7 x5 x3 x1
+    psrad_i2r (ROW_SHIFT, mm4);         // mm4 = y4 y5
 
-    packssdw_r2r (mm0, mm1);		// mm1 = y3 y2 y1 y0
-    movq_r2r (mm5, mm6);		// mm6 = x7 x5 x3 x1
+    packssdw_r2r (mm0, mm1);            // mm1 = y3 y2 y1 y0
+    movq_r2r (mm5, mm6);                // mm6 = x7 x5 x3 x1
 
-    packssdw_r2r (mm3, mm4);		// mm4 = y6 y7 y4 y5
-    movq_r2r (mm2, mm0);		// mm0 = x6 x4 x2 x0
+    packssdw_r2r (mm3, mm4);            // mm4 = y6 y7 y4 y5
+    movq_r2r (mm2, mm0);                // mm0 = x6 x4 x2 x0
 
-    movq_r2m (mm1, *(row+store));	// save y3 y2 y1 y0
-    pshufw_r2r (mm4, mm4, 0xb1);	// mm4 = y7 y6 y5 y4
+    movq_r2m (mm1, *(row+store));       // save y3 y2 y1 y0
+    pshufw_r2r (mm4, mm4, 0xb1);        // mm4 = y7 y6 y5 y4
 
-    movq_m2r (*table, mm3);		// mm3 = -C2 -C4 C2 C4
-    movq_r2m (mm4, *(row+store+4));	// save y7 y6 y5 y4
+    movq_m2r (*table, mm3);             // mm3 = -C2 -C4 C2 C4
+    movq_r2m (mm4, *(row+store+4));     // save y7 y6 y5 y4
 
-    pmaddwd_r2r (mm0, mm3);		// mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
+    pmaddwd_r2r (mm0, mm3);             // mm3 = -C4*x4-C2*x6 C4*x0+C2*x2
 
-    movq_m2r (*(table+4), mm4);		// mm4 = C6 C4 C6 C4
-    pshufw_r2r (mm2, mm2, 0x4e);	// mm2 = x2 x0 x6 x4
+    movq_m2r (*(table+4), mm4);         // mm4 = C6 C4 C6 C4
+    pshufw_r2r (mm2, mm2, 0x4e);        // mm2 = x2 x0 x6 x4
 }
 
 
 /* MMX row IDCT */
 
-#define mmx_table(c1,c2,c3,c4,c5,c6,c7)	{  c4,  c2,  c4,  c6,	\
-					   c4,  c6, -c4, -c2,	\
-					   c1,  c3,  c3, -c7,	\
-					   c5,  c7, -c1, -c5,	\
-					   c4, -c6,  c4, -c2,	\
-					  -c4,  c2,  c4, -c6,	\
-					   c5, -c1,  c7, -c5,	\
-					   c7,  c3,  c3, -c1 }
+#define mmx_table(c1,c2,c3,c4,c5,c6,c7) {  c4,  c2,  c4,  c6,   \
+                                           c4,  c6, -c4, -c2,   \
+                                           c1,  c3,  c3, -c7,   \
+                                           c5,  c7, -c1, -c5,   \
+                                           c4, -c6,  c4, -c2,   \
+                                          -c4,  c2,  c4, -c6,   \
+                                           c5, -c1,  c7, -c5,   \
+                                           c7,  c3,  c3, -c1 }
 
 static inline void mmx_row_head (int16_t * row, int offset, const int16_t * table)
 {
-    movq_m2r (*(row+offset), mm2);	// mm2 = x6 x4 x2 x0
+    movq_m2r (*(row+offset), mm2);      // mm2 = x6 x4 x2 x0
 
-    movq_m2r (*(row+offset+4), mm5);	// mm5 = x7 x5 x3 x1
-    movq_r2r (mm2, mm0);		// mm0 = x6 x4 x2 x0
+    movq_m2r (*(row+offset+4), mm5);    // mm5 = x7 x5 x3 x1
+    movq_r2r (mm2, mm0);                // mm0 = x6 x4 x2 x0
 
-    movq_m2r (*table, mm3);		// mm3 = C6 C4 C2 C4
-    movq_r2r (mm5, mm6);		// mm6 = x7 x5 x3 x1
+    movq_m2r (*table, mm3);             // mm3 = C6 C4 C2 C4
+    movq_r2r (mm5, mm6);                // mm6 = x7 x5 x3 x1
 
-    punpckldq_r2r (mm0, mm0);		// mm0 = x2 x0 x2 x0
+    punpckldq_r2r (mm0, mm0);           // mm0 = x2 x0 x2 x0
 
-    movq_m2r (*(table+4), mm4);		// mm4 = -C2 -C4 C6 C4
-    pmaddwd_r2r (mm0, mm3);		// mm3 = C4*x0+C6*x2 C4*x0+C2*x2
+    movq_m2r (*(table+4), mm4);         // mm4 = -C2 -C4 C6 C4
+    pmaddwd_r2r (mm0, mm3);             // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
 
-    movq_m2r (*(table+8), mm1);		// mm1 = -C7 C3 C3 C1
-    punpckhdq_r2r (mm2, mm2);		// mm2 = x6 x4 x6 x4
+    movq_m2r (*(table+8), mm1);         // mm1 = -C7 C3 C3 C1
+    punpckhdq_r2r (mm2, mm2);           // mm2 = x6 x4 x6 x4
 }
 
 static inline void mmx_row (const int16_t * table, const int32_t * rounder)
 {
-    pmaddwd_r2r (mm2, mm4);		// mm4 = -C4*x4-C2*x6 C4*x4+C6*x6
-    punpckldq_r2r (mm5, mm5);		// mm5 = x3 x1 x3 x1
+    pmaddwd_r2r (mm2, mm4);             // mm4 = -C4*x4-C2*x6 C4*x4+C6*x6
+    punpckldq_r2r (mm5, mm5);           // mm5 = x3 x1 x3 x1
 
-    pmaddwd_m2r (*(table+16), mm0);	// mm0 = C4*x0-C2*x2 C4*x0-C6*x2
-    punpckhdq_r2r (mm6, mm6);		// mm6 = x7 x5 x7 x5
+    pmaddwd_m2r (*(table+16), mm0);     // mm0 = C4*x0-C2*x2 C4*x0-C6*x2
+    punpckhdq_r2r (mm6, mm6);           // mm6 = x7 x5 x7 x5
 
-    movq_m2r (*(table+12), mm7);	// mm7 = -C5 -C1 C7 C5
-    pmaddwd_r2r (mm5, mm1);		// mm1 = C3*x1-C7*x3 C1*x1+C3*x3
+    movq_m2r (*(table+12), mm7);        // mm7 = -C5 -C1 C7 C5
+    pmaddwd_r2r (mm5, mm1);             // mm1 = C3*x1-C7*x3 C1*x1+C3*x3
 
-    paddd_m2r (*rounder, mm3);		// mm3 += rounder
-    pmaddwd_r2r (mm6, mm7);		// mm7 = -C1*x5-C5*x7 C5*x5+C7*x7
+    paddd_m2r (*rounder, mm3);          // mm3 += rounder
+    pmaddwd_r2r (mm6, mm7);             // mm7 = -C1*x5-C5*x7 C5*x5+C7*x7
 
-    pmaddwd_m2r (*(table+20), mm2);	// mm2 = C4*x4-C6*x6 -C4*x4+C2*x6
-    paddd_r2r (mm4, mm3);		// mm3 = a1 a0 + rounder
+    pmaddwd_m2r (*(table+20), mm2);     // mm2 = C4*x4-C6*x6 -C4*x4+C2*x6
+    paddd_r2r (mm4, mm3);               // mm3 = a1 a0 + rounder
 
-    pmaddwd_m2r (*(table+24), mm5);	// mm5 = C7*x1-C5*x3 C5*x1-C1*x3
-    movq_r2r (mm3, mm4);		// mm4 = a1 a0 + rounder
+    pmaddwd_m2r (*(table+24), mm5);     // mm5 = C7*x1-C5*x3 C5*x1-C1*x3
+    movq_r2r (mm3, mm4);                // mm4 = a1 a0 + rounder
 
-    pmaddwd_m2r (*(table+28), mm6);	// mm6 = C3*x5-C1*x7 C7*x5+C3*x7
-    paddd_r2r (mm7, mm1);		// mm1 = b1 b0
+    pmaddwd_m2r (*(table+28), mm6);     // mm6 = C3*x5-C1*x7 C7*x5+C3*x7
+    paddd_r2r (mm7, mm1);               // mm1 = b1 b0
 
-    paddd_m2r (*rounder, mm0);		// mm0 += rounder
-    psubd_r2r (mm1, mm3);		// mm3 = a1-b1 a0-b0 + rounder
+    paddd_m2r (*rounder, mm0);          // mm0 += rounder
+    psubd_r2r (mm1, mm3);               // mm3 = a1-b1 a0-b0 + rounder
 
-    psrad_i2r (ROW_SHIFT, mm3);		// mm3 = y6 y7
-    paddd_r2r (mm4, mm1);		// mm1 = a1+b1 a0+b0 + rounder
+    psrad_i2r (ROW_SHIFT, mm3);         // mm3 = y6 y7
+    paddd_r2r (mm4, mm1);               // mm1 = a1+b1 a0+b0 + rounder
 
-    paddd_r2r (mm2, mm0);		// mm0 = a3 a2 + rounder
-    psrad_i2r (ROW_SHIFT, mm1);		// mm1 = y1 y0
+    paddd_r2r (mm2, mm0);               // mm0 = a3 a2 + rounder
+    psrad_i2r (ROW_SHIFT, mm1);         // mm1 = y1 y0
 
-    paddd_r2r (mm6, mm5);		// mm5 = b3 b2
-    movq_r2r (mm0, mm7);		// mm7 = a3 a2 + rounder
+    paddd_r2r (mm6, mm5);               // mm5 = b3 b2
+    movq_r2r (mm0, mm7);                // mm7 = a3 a2 + rounder
 
-    paddd_r2r (mm5, mm0);		// mm0 = a3+b3 a2+b2 + rounder
-    psubd_r2r (mm5, mm7);		// mm7 = a3-b3 a2-b2 + rounder
+    paddd_r2r (mm5, mm0);               // mm0 = a3+b3 a2+b2 + rounder
+    psubd_r2r (mm5, mm7);               // mm7 = a3-b3 a2-b2 + rounder
 }
 
 static inline void mmx_row_tail (int16_t * row, int store)
 {
-    psrad_i2r (ROW_SHIFT, mm0);		// mm0 = y3 y2
+    psrad_i2r (ROW_SHIFT, mm0);         // mm0 = y3 y2
 
-    psrad_i2r (ROW_SHIFT, mm7);		// mm7 = y4 y5
+    psrad_i2r (ROW_SHIFT, mm7);         // mm7 = y4 y5
 
-    packssdw_r2r (mm0, mm1);		// mm1 = y3 y2 y1 y0
+    packssdw_r2r (mm0, mm1);            // mm1 = y3 y2 y1 y0
 
-    packssdw_r2r (mm3, mm7);		// mm7 = y6 y7 y4 y5
+    packssdw_r2r (mm3, mm7);            // mm7 = y6 y7 y4 y5
 
-    movq_r2m (mm1, *(row+store));	// save y3 y2 y1 y0
-    movq_r2r (mm7, mm4);		// mm4 = y6 y7 y4 y5
+    movq_r2m (mm1, *(row+store));       // save y3 y2 y1 y0
+    movq_r2r (mm7, mm4);                // mm4 = y6 y7 y4 y5
 
-    pslld_i2r (16, mm7);		// mm7 = y7 0 y5 0
+    pslld_i2r (16, mm7);                // mm7 = y7 0 y5 0
 
-    psrld_i2r (16, mm4);		// mm4 = 0 y6 0 y4
+    psrld_i2r (16, mm4);                // mm4 = 0 y6 0 y4
 
-    por_r2r (mm4, mm7);			// mm7 = y7 y6 y5 y4
+    por_r2r (mm4, mm7);                 // mm7 = y7 y6 y5 y4
 
     /* slot */
 
-    movq_r2m (mm7, *(row+store+4));	// save y7 y6 y5 y4
+    movq_r2m (mm7, *(row+store+4));     // save y7 y6 y5 y4
 }
 
 static inline void mmx_row_mid (int16_t * row, int store,
-				int offset, const int16_t * table)
+                                int offset, const int16_t * table)
 {
-    movq_m2r (*(row+offset), mm2);	// mm2 = x6 x4 x2 x0
-    psrad_i2r (ROW_SHIFT, mm0);		// mm0 = y3 y2
+    movq_m2r (*(row+offset), mm2);      // mm2 = x6 x4 x2 x0
+    psrad_i2r (ROW_SHIFT, mm0);         // mm0 = y3 y2
 
-    movq_m2r (*(row+offset+4), mm5);	// mm5 = x7 x5 x3 x1
-    psrad_i2r (ROW_SHIFT, mm7);		// mm7 = y4 y5
+    movq_m2r (*(row+offset+4), mm5);    // mm5 = x7 x5 x3 x1
+    psrad_i2r (ROW_SHIFT, mm7);         // mm7 = y4 y5
 
-    packssdw_r2r (mm0, mm1);		// mm1 = y3 y2 y1 y0
-    movq_r2r (mm5, mm6);		// mm6 = x7 x5 x3 x1
+    packssdw_r2r (mm0, mm1);            // mm1 = y3 y2 y1 y0
+    movq_r2r (mm5, mm6);                // mm6 = x7 x5 x3 x1
 
-    packssdw_r2r (mm3, mm7);		// mm7 = y6 y7 y4 y5
-    movq_r2r (mm2, mm0);		// mm0 = x6 x4 x2 x0
+    packssdw_r2r (mm3, mm7);            // mm7 = y6 y7 y4 y5
+    movq_r2r (mm2, mm0);                // mm0 = x6 x4 x2 x0
 
-    movq_r2m (mm1, *(row+store));	// save y3 y2 y1 y0
-    movq_r2r (mm7, mm1);		// mm1 = y6 y7 y4 y5
+    movq_r2m (mm1, *(row+store));       // save y3 y2 y1 y0
+    movq_r2r (mm7, mm1);                // mm1 = y6 y7 y4 y5
 
-    punpckldq_r2r (mm0, mm0);		// mm0 = x2 x0 x2 x0
-    psrld_i2r (16, mm7);		// mm7 = 0 y6 0 y4
+    punpckldq_r2r (mm0, mm0);           // mm0 = x2 x0 x2 x0
+    psrld_i2r (16, mm7);                // mm7 = 0 y6 0 y4
 
-    movq_m2r (*table, mm3);		// mm3 = C6 C4 C2 C4
-    pslld_i2r (16, mm1);		// mm1 = y7 0 y5 0
+    movq_m2r (*table, mm3);             // mm3 = C6 C4 C2 C4
+    pslld_i2r (16, mm1);                // mm1 = y7 0 y5 0
 
-    movq_m2r (*(table+4), mm4);		// mm4 = -C2 -C4 C6 C4
-    por_r2r (mm1, mm7);			// mm7 = y7 y6 y5 y4
+    movq_m2r (*(table+4), mm4);         // mm4 = -C2 -C4 C6 C4
+    por_r2r (mm1, mm7);                 // mm7 = y7 y6 y5 y4
 
-    movq_m2r (*(table+8), mm1);		// mm1 = -C7 C3 C3 C1
-    punpckhdq_r2r (mm2, mm2);		// mm2 = x6 x4 x6 x4
+    movq_m2r (*(table+8), mm1);         // mm1 = -C7 C3 C3 C1
+    punpckhdq_r2r (mm2, mm2);           // mm2 = x6 x4 x6 x4
 
-    movq_r2m (mm7, *(row+store+4));	// save y7 y6 y5 y4
-    pmaddwd_r2r (mm0, mm3);		// mm3 = C4*x0+C6*x2 C4*x0+C2*x2
+    movq_r2m (mm7, *(row+store+4));     // save y7 y6 y5 y4
+    pmaddwd_r2r (mm0, mm3);             // mm3 = C4*x0+C6*x2 C4*x0+C2*x2
 }
 
 
@@ -403,132 +403,132 @@ static inline void idct_col (int16_t * col, int offset)
     /* column code adapted from peter gubanov */
     /* http://www.elecard.com/peter/idct.shtml */
 
-    movq_m2r (*_T1, mm0);		// mm0 = T1
+    movq_m2r (*_T1, mm0);               // mm0 = T1
 
-    movq_m2r (*(col+offset+1*8), mm1);	// mm1 = x1
-    movq_r2r (mm0, mm2);		// mm2 = T1
+    movq_m2r (*(col+offset+1*8), mm1);  // mm1 = x1
+    movq_r2r (mm0, mm2);                // mm2 = T1
 
-    movq_m2r (*(col+offset+7*8), mm4);	// mm4 = x7
-    pmulhw_r2r (mm1, mm0);		// mm0 = T1*x1
+    movq_m2r (*(col+offset+7*8), mm4);  // mm4 = x7
+    pmulhw_r2r (mm1, mm0);              // mm0 = T1*x1
 
-    movq_m2r (*_T3, mm5);		// mm5 = T3
-    pmulhw_r2r (mm4, mm2);		// mm2 = T1*x7
+    movq_m2r (*_T3, mm5);               // mm5 = T3
+    pmulhw_r2r (mm4, mm2);              // mm2 = T1*x7
 
-    movq_m2r (*(col+offset+5*8), mm6);	// mm6 = x5
-    movq_r2r (mm5, mm7);		// mm7 = T3-1
+    movq_m2r (*(col+offset+5*8), mm6);  // mm6 = x5
+    movq_r2r (mm5, mm7);                // mm7 = T3-1
 
-    movq_m2r (*(col+offset+3*8), mm3);	// mm3 = x3
-    psubsw_r2r (mm4, mm0);		// mm0 = v17
+    movq_m2r (*(col+offset+3*8), mm3);  // mm3 = x3
+    psubsw_r2r (mm4, mm0);              // mm0 = v17
 
-    movq_m2r (*_T2, mm4);		// mm4 = T2
-    pmulhw_r2r (mm3, mm5);		// mm5 = (T3-1)*x3
+    movq_m2r (*_T2, mm4);               // mm4 = T2
+    pmulhw_r2r (mm3, mm5);              // mm5 = (T3-1)*x3
 
-    paddsw_r2r (mm2, mm1);		// mm1 = u17
-    pmulhw_r2r (mm6, mm7);		// mm7 = (T3-1)*x5
+    paddsw_r2r (mm2, mm1);              // mm1 = u17
+    pmulhw_r2r (mm6, mm7);              // mm7 = (T3-1)*x5
 
     /* slot */
 
-    movq_r2r (mm4, mm2);		// mm2 = T2
-    paddsw_r2r (mm3, mm5);		// mm5 = T3*x3
+    movq_r2r (mm4, mm2);                // mm2 = T2
+    paddsw_r2r (mm3, mm5);              // mm5 = T3*x3
 
     pmulhw_m2r (*(col+offset+2*8), mm4);// mm4 = T2*x2
-    paddsw_r2r (mm6, mm7);		// mm7 = T3*x5
+    paddsw_r2r (mm6, mm7);              // mm7 = T3*x5
 
-    psubsw_r2r (mm6, mm5);		// mm5 = v35
-    paddsw_r2r (mm3, mm7);		// mm7 = u35
+    psubsw_r2r (mm6, mm5);              // mm5 = v35
+    paddsw_r2r (mm3, mm7);              // mm7 = u35
 
-    movq_m2r (*(col+offset+6*8), mm3);	// mm3 = x6
-    movq_r2r (mm0, mm6);		// mm6 = v17
+    movq_m2r (*(col+offset+6*8), mm3);  // mm3 = x6
+    movq_r2r (mm0, mm6);                // mm6 = v17
 
-    pmulhw_r2r (mm3, mm2);		// mm2 = T2*x6
-    psubsw_r2r (mm5, mm0);		// mm0 = b3
+    pmulhw_r2r (mm3, mm2);              // mm2 = T2*x6
+    psubsw_r2r (mm5, mm0);              // mm0 = b3
 
-    psubsw_r2r (mm3, mm4);		// mm4 = v26
-    paddsw_r2r (mm6, mm5);		// mm5 = v12
+    psubsw_r2r (mm3, mm4);              // mm4 = v26
+    paddsw_r2r (mm6, mm5);              // mm5 = v12
 
-    movq_r2m (mm0, *(col+offset+3*8));	// save b3 in scratch0
-    movq_r2r (mm1, mm6);		// mm6 = u17
+    movq_r2m (mm0, *(col+offset+3*8));  // save b3 in scratch0
+    movq_r2r (mm1, mm6);                // mm6 = u17
 
     paddsw_m2r (*(col+offset+2*8), mm2);// mm2 = u26
-    paddsw_r2r (mm7, mm6);		// mm6 = b0
+    paddsw_r2r (mm7, mm6);              // mm6 = b0
 
-    psubsw_r2r (mm7, mm1);		// mm1 = u12
-    movq_r2r (mm1, mm7);		// mm7 = u12
+    psubsw_r2r (mm7, mm1);              // mm1 = u12
+    movq_r2r (mm1, mm7);                // mm7 = u12
 
-    movq_m2r (*(col+offset+0*8), mm3);	// mm3 = x0
-    paddsw_r2r (mm5, mm1);		// mm1 = u12+v12
+    movq_m2r (*(col+offset+0*8), mm3);  // mm3 = x0
+    paddsw_r2r (mm5, mm1);              // mm1 = u12+v12
 
-    movq_m2r (*_C4, mm0);		// mm0 = C4/2
-    psubsw_r2r (mm5, mm7);		// mm7 = u12-v12
+    movq_m2r (*_C4, mm0);               // mm0 = C4/2
+    psubsw_r2r (mm5, mm7);              // mm7 = u12-v12
 
-    movq_r2m (mm6, *(col+offset+5*8));	// save b0 in scratch1
-    pmulhw_r2r (mm0, mm1);		// mm1 = b1/2
+    movq_r2m (mm6, *(col+offset+5*8));  // save b0 in scratch1
+    pmulhw_r2r (mm0, mm1);              // mm1 = b1/2
 
-    movq_r2r (mm4, mm6);		// mm6 = v26
-    pmulhw_r2r (mm0, mm7);		// mm7 = b2/2
+    movq_r2r (mm4, mm6);                // mm6 = v26
+    pmulhw_r2r (mm0, mm7);              // mm7 = b2/2
 
-    movq_m2r (*(col+offset+4*8), mm5);	// mm5 = x4
-    movq_r2r (mm3, mm0);		// mm0 = x0
+    movq_m2r (*(col+offset+4*8), mm5);  // mm5 = x4
+    movq_r2r (mm3, mm0);                // mm0 = x0
 
-    psubsw_r2r (mm5, mm3);		// mm3 = v04
-    paddsw_r2r (mm5, mm0);		// mm0 = u04
+    psubsw_r2r (mm5, mm3);              // mm3 = v04
+    paddsw_r2r (mm5, mm0);              // mm0 = u04
 
-    paddsw_r2r (mm3, mm4);		// mm4 = a1
-    movq_r2r (mm0, mm5);		// mm5 = u04
+    paddsw_r2r (mm3, mm4);              // mm4 = a1
+    movq_r2r (mm0, mm5);                // mm5 = u04
 
-    psubsw_r2r (mm6, mm3);		// mm3 = a2
-    paddsw_r2r (mm2, mm5);		// mm5 = a0
+    psubsw_r2r (mm6, mm3);              // mm3 = a2
+    paddsw_r2r (mm2, mm5);              // mm5 = a0
 
-    paddsw_r2r (mm1, mm1);		// mm1 = b1
-    psubsw_r2r (mm2, mm0);		// mm0 = a3
+    paddsw_r2r (mm1, mm1);              // mm1 = b1
+    psubsw_r2r (mm2, mm0);              // mm0 = a3
 
-    paddsw_r2r (mm7, mm7);		// mm7 = b2
-    movq_r2r (mm3, mm2);		// mm2 = a2
+    paddsw_r2r (mm7, mm7);              // mm7 = b2
+    movq_r2r (mm3, mm2);                // mm2 = a2
 
-    movq_r2r (mm4, mm6);		// mm6 = a1
-    paddsw_r2r (mm7, mm3);		// mm3 = a2+b2
+    movq_r2r (mm4, mm6);                // mm6 = a1
+    paddsw_r2r (mm7, mm3);              // mm3 = a2+b2
 
-    psraw_i2r (COL_SHIFT, mm3);		// mm3 = y2
-    paddsw_r2r (mm1, mm4);		// mm4 = a1+b1
+    psraw_i2r (COL_SHIFT, mm3);         // mm3 = y2
+    paddsw_r2r (mm1, mm4);              // mm4 = a1+b1
 
-    psraw_i2r (COL_SHIFT, mm4);		// mm4 = y1
-    psubsw_r2r (mm1, mm6);		// mm6 = a1-b1
+    psraw_i2r (COL_SHIFT, mm4);         // mm4 = y1
+    psubsw_r2r (mm1, mm6);              // mm6 = a1-b1
 
-    movq_m2r (*(col+offset+5*8), mm1);	// mm1 = b0
-    psubsw_r2r (mm7, mm2);		// mm2 = a2-b2
+    movq_m2r (*(col+offset+5*8), mm1);  // mm1 = b0
+    psubsw_r2r (mm7, mm2);              // mm2 = a2-b2
 
-    psraw_i2r (COL_SHIFT, mm6);		// mm6 = y6
-    movq_r2r (mm5, mm7);		// mm7 = a0
+    psraw_i2r (COL_SHIFT, mm6);         // mm6 = y6
+    movq_r2r (mm5, mm7);                // mm7 = a0
 
-    movq_r2m (mm4, *(col+offset+1*8));	// save y1
-    psraw_i2r (COL_SHIFT, mm2);		// mm2 = y5
+    movq_r2m (mm4, *(col+offset+1*8));  // save y1
+    psraw_i2r (COL_SHIFT, mm2);         // mm2 = y5
 
-    movq_r2m (mm3, *(col+offset+2*8));	// save y2
-    paddsw_r2r (mm1, mm5);		// mm5 = a0+b0
+    movq_r2m (mm3, *(col+offset+2*8));  // save y2
+    paddsw_r2r (mm1, mm5);              // mm5 = a0+b0
 
-    movq_m2r (*(col+offset+3*8), mm4);	// mm4 = b3
-    psubsw_r2r (mm1, mm7);		// mm7 = a0-b0
+    movq_m2r (*(col+offset+3*8), mm4);  // mm4 = b3
+    psubsw_r2r (mm1, mm7);              // mm7 = a0-b0
 
-    psraw_i2r (COL_SHIFT, mm5);		// mm5 = y0
-    movq_r2r (mm0, mm3);		// mm3 = a3
+    psraw_i2r (COL_SHIFT, mm5);         // mm5 = y0
+    movq_r2r (mm0, mm3);                // mm3 = a3
 
-    movq_r2m (mm2, *(col+offset+5*8));	// save y5
-    psubsw_r2r (mm4, mm3);		// mm3 = a3-b3
+    movq_r2m (mm2, *(col+offset+5*8));  // save y5
+    psubsw_r2r (mm4, mm3);              // mm3 = a3-b3
 
-    psraw_i2r (COL_SHIFT, mm7);		// mm7 = y7
-    paddsw_r2r (mm0, mm4);		// mm4 = a3+b3
+    psraw_i2r (COL_SHIFT, mm7);         // mm7 = y7
+    paddsw_r2r (mm0, mm4);              // mm4 = a3+b3
 
-    movq_r2m (mm5, *(col+offset+0*8));	// save y0
-    psraw_i2r (COL_SHIFT, mm3);		// mm3 = y4
+    movq_r2m (mm5, *(col+offset+0*8));  // save y0
+    psraw_i2r (COL_SHIFT, mm3);         // mm3 = y4
 
-    movq_r2m (mm6, *(col+offset+6*8));	// save y6
-    psraw_i2r (COL_SHIFT, mm4);		// mm4 = y3
+    movq_r2m (mm6, *(col+offset+6*8));  // save y6
+    psraw_i2r (COL_SHIFT, mm4);         // mm4 = y3
 
-    movq_r2m (mm7, *(col+offset+7*8));	// save y7
+    movq_r2m (mm7, *(col+offset+7*8));  // save y7
 
-    movq_r2m (mm3, *(col+offset+4*8));	// save y4
+    movq_r2m (mm3, *(col+offset+4*8));  // save y4
 
-    movq_r2m (mm4, *(col+offset+3*8));	// save y3
+    movq_r2m (mm4, *(col+offset+3*8));  // save y3
 
 #undef T1
 #undef T2
@@ -540,61 +540,61 @@ static const int32_t rounder0[] ATTR_ALIGN(8) =
     rounder ((1 << (COL_SHIFT - 1)) - 0.5);
 static const int32_t rounder4[] ATTR_ALIGN(8) = rounder (0);
 static const int32_t rounder1[] ATTR_ALIGN(8) =
-    rounder (1.25683487303);	/* C1*(C1/C4+C1+C7)/2 */
+    rounder (1.25683487303);        /* C1*(C1/C4+C1+C7)/2 */
 static const int32_t rounder7[] ATTR_ALIGN(8) =
-    rounder (-0.25);		/* C1*(C7/C4+C7-C1)/2 */
+    rounder (-0.25);                /* C1*(C7/C4+C7-C1)/2 */
 static const int32_t rounder2[] ATTR_ALIGN(8) =
-    rounder (0.60355339059);	/* C2 * (C6+C2)/2 */
+    rounder (0.60355339059);        /* C2 * (C6+C2)/2 */
 static const int32_t rounder6[] ATTR_ALIGN(8) =
-    rounder (-0.25);		/* C2 * (C6-C2)/2 */
+    rounder (-0.25);                /* C2 * (C6-C2)/2 */
 static const int32_t rounder3[] ATTR_ALIGN(8) =
-    rounder (0.087788325588);	/* C3*(-C3/C4+C3+C5)/2 */
+    rounder (0.087788325588);       /* C3*(-C3/C4+C3+C5)/2 */
 static const int32_t rounder5[] ATTR_ALIGN(8) =
-    rounder (-0.441341716183);	/* C3*(-C5/C4+C5-C3)/2 */
+    rounder (-0.441341716183);      /* C3*(-C5/C4+C5-C3)/2 */
 
 #undef COL_SHIFT
 #undef ROW_SHIFT
 
-#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid)	\
-void idct (int16_t * block)					\
-{									\
-    static const int16_t table04[] ATTR_ALIGN(16) =				\
-	table (22725, 21407, 19266, 16384, 12873,  8867, 4520);		\
-    static const int16_t table17[] ATTR_ALIGN(16) =				\
-	table (31521, 29692, 26722, 22725, 17855, 12299, 6270);		\
-    static const int16_t table26[] ATTR_ALIGN(16) =				\
-	table (29692, 27969, 25172, 21407, 16819, 11585, 5906);		\
-    static const int16_t table35[] ATTR_ALIGN(16) =				\
-	table (26722, 25172, 22654, 19266, 15137, 10426, 5315);		\
-									\
-    idct_row_head (block, 0*8, table04);				\
-    idct_row (table04, rounder0);					\
-    idct_row_mid (block, 0*8, 4*8, table04);				\
-    idct_row (table04, rounder4);					\
-    idct_row_mid (block, 4*8, 1*8, table17);				\
-    idct_row (table17, rounder1);					\
-    idct_row_mid (block, 1*8, 7*8, table17);				\
-    idct_row (table17, rounder7);					\
-    idct_row_mid (block, 7*8, 2*8, table26);				\
-    idct_row (table26, rounder2);					\
-    idct_row_mid (block, 2*8, 6*8, table26);				\
-    idct_row (table26, rounder6);					\
-    idct_row_mid (block, 6*8, 3*8, table35);				\
-    idct_row (table35, rounder3);					\
-    idct_row_mid (block, 3*8, 5*8, table35);				\
-    idct_row (table35, rounder5);					\
-    idct_row_tail (block, 5*8);						\
-									\
-    idct_col (block, 0);						\
-    idct_col (block, 4);						\
+#define declare_idct(idct,table,idct_row_head,idct_row,idct_row_tail,idct_row_mid) \
+void idct (int16_t * block)                                             \
+{                                                                       \
+    static const int16_t table04[] ATTR_ALIGN(16) =                     \
+        table (22725, 21407, 19266, 16384, 12873,  8867, 4520);         \
+    static const int16_t table17[] ATTR_ALIGN(16) =                     \
+        table (31521, 29692, 26722, 22725, 17855, 12299, 6270);         \
+    static const int16_t table26[] ATTR_ALIGN(16) =                     \
+        table (29692, 27969, 25172, 21407, 16819, 11585, 5906);         \
+    static const int16_t table35[] ATTR_ALIGN(16) =                     \
+        table (26722, 25172, 22654, 19266, 15137, 10426, 5315);         \
+                                                                        \
+    idct_row_head (block, 0*8, table04);                                \
+    idct_row (table04, rounder0);                                       \
+    idct_row_mid (block, 0*8, 4*8, table04);                            \
+    idct_row (table04, rounder4);                                       \
+    idct_row_mid (block, 4*8, 1*8, table17);                            \
+    idct_row (table17, rounder1);                                       \
+    idct_row_mid (block, 1*8, 7*8, table17);                            \
+    idct_row (table17, rounder7);                                       \
+    idct_row_mid (block, 7*8, 2*8, table26);                            \
+    idct_row (table26, rounder2);                                       \
+    idct_row_mid (block, 2*8, 6*8, table26);                            \
+    idct_row (table26, rounder6);                                       \
+    idct_row_mid (block, 6*8, 3*8, table35);                            \
+    idct_row (table35, rounder3);                                       \
+    idct_row_mid (block, 3*8, 5*8, table35);                            \
+    idct_row (table35, rounder5);                                       \
+    idct_row_tail (block, 5*8);                                         \
+                                                                        \
+    idct_col (block, 0);                                                \
+    idct_col (block, 4);                                                \
 }
 
 void ff_mmx_idct(DCTELEM *block);
 void ff_mmxext_idct(DCTELEM *block);
 
 declare_idct (ff_mmxext_idct, mmxext_table,
-	      mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
+              mmxext_row_head, mmxext_row, mmxext_row_tail, mmxext_row_mid)
 
 declare_idct (ff_mmx_idct, mmx_table,
-	      mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
+              mmx_row_head, mmx_row, mmx_row_tail, mmx_row_mid)
 
diff --git a/src/libffmpeg/libavcodec/i386/idct_mmx_xvid.c b/src/libffmpeg/libavcodec/i386/idct_mmx_xvid.c
index aff57e3fa..7bc6f5f78 100644
--- a/src/libffmpeg/libavcodec/i386/idct_mmx_xvid.c
+++ b/src/libffmpeg/libavcodec/i386/idct_mmx_xvid.c
@@ -16,10 +16,11 @@
 // *  GNU General Public License for more details.
 // *
 // *  You should have received a copy of the GNU General Public License
-// *  along with this program; if not, write to the Free Software
-// *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+// *  along with this program; if not, write to the Free Software Foundation,
+// *  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+//
 // *
-// * $Id: idct_mmx_xvid.c,v 1.1 2005/10/23 02:11:44 miguelfreitas Exp $
+// * $Id: idct_mmx_xvid.c,v 1.2 2006/02/05 14:11:36 miguelfreitas Exp $
 // *
 // ***************************************************************************/
 
@@ -72,13 +73,13 @@
 //-----------------------------------------------------------------------------
 
 
-static const int16_t tg_1_16[4*4] attribute_used __attribute__ ((aligned(8))) = { 
+static const int16_t tg_1_16[4*4] attribute_used __attribute__ ((aligned(8))) = {
   13036,13036,13036,13036,        // tg * (2<<16) + 0.5
   27146,27146,27146,27146,        // tg * (2<<16) + 0.5
   -21746,-21746,-21746,-21746,    // tg * (2<<16) + 0.5
   23170,23170,23170,23170};       // cos * (2<<15) + 0.5
 
-static const int32_t rounder_0[2*8] attribute_used __attribute__ ((aligned(8))) = { 
+static const int32_t rounder_0[2*8] attribute_used __attribute__ ((aligned(8))) = {
   65536,65536,
   3597,3597,
   2260,2260,
@@ -148,7 +149,7 @@ static const int32_t rounder_0[2*8] attribute_used __attribute__ ((aligned(8)))
 //-----------------------------------------------------------------------------
 
 // Table for rows 0,4 - constants are multiplied by cos_4_16
-static const int16_t tab_i_04_mmx[32*4] attribute_used __attribute__ ((aligned(8))) = { 
+static const int16_t tab_i_04_mmx[32*4] attribute_used __attribute__ ((aligned(8))) = {
   16384,16384,16384,-16384,       // movq-> w06 w04 w02 w00
   21407,8867,8867,-21407,         // w07 w05 w03 w01
   16384,-16384,16384,16384,       // w14 w12 w10 w08
@@ -190,7 +191,7 @@ static const int16_t tab_i_04_mmx[32*4] attribute_used __attribute__ ((aligned(8
 //-----------------------------------------------------------------------------
 
 // %3 for rows 0,4 - constants are multiplied by cos_4_16
-static const int16_t tab_i_04_xmm[32*4] attribute_used __attribute__ ((aligned(8))) = { 
+static const int16_t tab_i_04_xmm[32*4] attribute_used __attribute__ ((aligned(8))) = {
   16384,21407,16384,8867,      // movq-> w05 w04 w01 w00
   16384,8867,-16384,-21407,    // w07 w06 w03 w02
   16384,-8867,16384,-21407,    // w13 w12 w09 w08
@@ -501,7 +502,7 @@ asm volatile(
     DCT_8_INV_ROW_MMX(5*16(%0), 5*16(%0), 64*3(%2), 8*5(%1))
     DCT_8_INV_ROW_MMX(6*16(%0), 6*16(%0), 64*2(%2), 8*6(%1))
     DCT_8_INV_ROW_MMX(7*16(%0), 7*16(%0), 64*1(%2), 8*7(%1))
-    
+
             //# Process the columns (4 at a time)
     DCT_8_INV_COL(0(%0), 0(%0))
     DCT_8_INV_COL(8(%0), 8(%0))
@@ -524,7 +525,7 @@ asm volatile(
     DCT_8_INV_ROW_XMM(5*16(%0), 5*16(%0), 64*3(%2), 8*5(%1))
     DCT_8_INV_ROW_XMM(6*16(%0), 6*16(%0), 64*2(%2), 8*6(%1))
     DCT_8_INV_ROW_XMM(7*16(%0), 7*16(%0), 64*1(%2), 8*7(%1))
-    
+
             //# Process the columns (4 at a time)
     DCT_8_INV_COL(0(%0), 0(%0))
     DCT_8_INV_COL(8(%0), 8(%0))
diff --git a/src/libffmpeg/libavcodec/i386/mmx.h b/src/libffmpeg/libavcodec/i386/mmx.h
index f0ef1b79e..df4620e0a 100644
--- a/src/libffmpeg/libavcodec/i386/mmx.h
+++ b/src/libffmpeg/libavcodec/i386/mmx.h
@@ -7,8 +7,18 @@
 
 #ifdef ARCH_X86_64
 #  define REG_a "rax"
+#  define REG_b "rbx"
+#  define REG_c "rcx"
+#  define REG_d "rdx"
+#  define REG_D "rdi"
+#  define REG_S "rsi"
 #else
 #  define REG_a "eax"
+#  define REG_b "ebx"
+#  define REG_c "ecx"
+#  define REG_d "edx"
+#  define REG_D "edi"
+#  define REG_S "esi"
 #endif
 
 /*
@@ -17,257 +27,257 @@
  * values by ULL, lest they be truncated by the compiler)
  */
 
-typedef	union {
-	long long		q;	/* Quadword (64-bit) value */
-	unsigned long long	uq;	/* Unsigned Quadword */
-	int			d[2];	/* 2 Doubleword (32-bit) values */
-	unsigned int		ud[2];	/* 2 Unsigned Doubleword */
-	short			w[4];	/* 4 Word (16-bit) values */
-	unsigned short		uw[4];	/* 4 Unsigned Word */
-	char			b[8];	/* 8 Byte (8-bit) values */
-	unsigned char		ub[8];	/* 8 Unsigned Byte */
-	float			s[2];	/* Single-precision (32-bit) value */
-} mmx_t;	/* On an 8-byte (64-bit) boundary */
-
-
-#define	mmx_i2r(op,imm,reg) \
-	__asm__ __volatile__ (#op " %0, %%" #reg \
-			      : /* nothing */ \
-			      : "i" (imm) )
-
-#define	mmx_m2r(op,mem,reg) \
-	__asm__ __volatile__ (#op " %0, %%" #reg \
-			      : /* nothing */ \
-			      : "m" (mem))
-
-#define	mmx_r2m(op,reg,mem) \
-	__asm__ __volatile__ (#op " %%" #reg ", %0" \
-			      : "=m" (mem) \
-			      : /* nothing */ )
-
-#define	mmx_r2r(op,regs,regd) \
-	__asm__ __volatile__ (#op " %" #regs ", %" #regd)
-
-
-#define	emms() __asm__ __volatile__ ("emms")
-
-#define	movd_m2r(var,reg)	mmx_m2r (movd, var, reg)
-#define	movd_r2m(reg,var)	mmx_r2m (movd, reg, var)
-#define	movd_r2r(regs,regd)	mmx_r2r (movd, regs, regd)
-
-#define	movq_m2r(var,reg)	mmx_m2r (movq, var, reg)
-#define	movq_r2m(reg,var)	mmx_r2m (movq, reg, var)
-#define	movq_r2r(regs,regd)	mmx_r2r (movq, regs, regd)
-
-#define	packssdw_m2r(var,reg)	mmx_m2r (packssdw, var, reg)
-#define	packssdw_r2r(regs,regd) mmx_r2r (packssdw, regs, regd)
-#define	packsswb_m2r(var,reg)	mmx_m2r (packsswb, var, reg)
-#define	packsswb_r2r(regs,regd) mmx_r2r (packsswb, regs, regd)
-
-#define	packuswb_m2r(var,reg)	mmx_m2r (packuswb, var, reg)
-#define	packuswb_r2r(regs,regd) mmx_r2r (packuswb, regs, regd)
-
-#define	paddb_m2r(var,reg)	mmx_m2r (paddb, var, reg)
-#define	paddb_r2r(regs,regd)	mmx_r2r (paddb, regs, regd)
-#define	paddd_m2r(var,reg)	mmx_m2r (paddd, var, reg)
-#define	paddd_r2r(regs,regd)	mmx_r2r (paddd, regs, regd)
-#define	paddw_m2r(var,reg)	mmx_m2r (paddw, var, reg)
-#define	paddw_r2r(regs,regd)	mmx_r2r (paddw, regs, regd)
-
-#define	paddsb_m2r(var,reg)	mmx_m2r (paddsb, var, reg)
-#define	paddsb_r2r(regs,regd)	mmx_r2r (paddsb, regs, regd)
-#define	paddsw_m2r(var,reg)	mmx_m2r (paddsw, var, reg)
-#define	paddsw_r2r(regs,regd)	mmx_r2r (paddsw, regs, regd)
-
-#define	paddusb_m2r(var,reg)	mmx_m2r (paddusb, var, reg)
-#define	paddusb_r2r(regs,regd)	mmx_r2r (paddusb, regs, regd)
-#define	paddusw_m2r(var,reg)	mmx_m2r (paddusw, var, reg)
-#define	paddusw_r2r(regs,regd)	mmx_r2r (paddusw, regs, regd)
-
-#define	pand_m2r(var,reg)	mmx_m2r (pand, var, reg)
-#define	pand_r2r(regs,regd)	mmx_r2r (pand, regs, regd)
-
-#define	pandn_m2r(var,reg)	mmx_m2r (pandn, var, reg)
-#define	pandn_r2r(regs,regd)	mmx_r2r (pandn, regs, regd)
-
-#define	pcmpeqb_m2r(var,reg)	mmx_m2r (pcmpeqb, var, reg)
-#define	pcmpeqb_r2r(regs,regd)	mmx_r2r (pcmpeqb, regs, regd)
-#define	pcmpeqd_m2r(var,reg)	mmx_m2r (pcmpeqd, var, reg)
-#define	pcmpeqd_r2r(regs,regd)	mmx_r2r (pcmpeqd, regs, regd)
-#define	pcmpeqw_m2r(var,reg)	mmx_m2r (pcmpeqw, var, reg)
-#define	pcmpeqw_r2r(regs,regd)	mmx_r2r (pcmpeqw, regs, regd)
-
-#define	pcmpgtb_m2r(var,reg)	mmx_m2r (pcmpgtb, var, reg)
-#define	pcmpgtb_r2r(regs,regd)	mmx_r2r (pcmpgtb, regs, regd)
-#define	pcmpgtd_m2r(var,reg)	mmx_m2r (pcmpgtd, var, reg)
-#define	pcmpgtd_r2r(regs,regd)	mmx_r2r (pcmpgtd, regs, regd)
-#define	pcmpgtw_m2r(var,reg)	mmx_m2r (pcmpgtw, var, reg)
-#define	pcmpgtw_r2r(regs,regd)	mmx_r2r (pcmpgtw, regs, regd)
-
-#define	pmaddwd_m2r(var,reg)	mmx_m2r (pmaddwd, var, reg)
-#define	pmaddwd_r2r(regs,regd)	mmx_r2r (pmaddwd, regs, regd)
-
-#define	pmulhw_m2r(var,reg)	mmx_m2r (pmulhw, var, reg)
-#define	pmulhw_r2r(regs,regd)	mmx_r2r (pmulhw, regs, regd)
-
-#define	pmullw_m2r(var,reg)	mmx_m2r (pmullw, var, reg)
-#define	pmullw_r2r(regs,regd)	mmx_r2r (pmullw, regs, regd)
-
-#define	por_m2r(var,reg)	mmx_m2r (por, var, reg)
-#define	por_r2r(regs,regd)	mmx_r2r (por, regs, regd)
-
-#define	pslld_i2r(imm,reg)	mmx_i2r (pslld, imm, reg)
-#define	pslld_m2r(var,reg)	mmx_m2r (pslld, var, reg)
-#define	pslld_r2r(regs,regd)	mmx_r2r (pslld, regs, regd)
-#define	psllq_i2r(imm,reg)	mmx_i2r (psllq, imm, reg)
-#define	psllq_m2r(var,reg)	mmx_m2r (psllq, var, reg)
-#define	psllq_r2r(regs,regd)	mmx_r2r (psllq, regs, regd)
-#define	psllw_i2r(imm,reg)	mmx_i2r (psllw, imm, reg)
-#define	psllw_m2r(var,reg)	mmx_m2r (psllw, var, reg)
-#define	psllw_r2r(regs,regd)	mmx_r2r (psllw, regs, regd)
-
-#define	psrad_i2r(imm,reg)	mmx_i2r (psrad, imm, reg)
-#define	psrad_m2r(var,reg)	mmx_m2r (psrad, var, reg)
-#define	psrad_r2r(regs,regd)	mmx_r2r (psrad, regs, regd)
-#define	psraw_i2r(imm,reg)	mmx_i2r (psraw, imm, reg)
-#define	psraw_m2r(var,reg)	mmx_m2r (psraw, var, reg)
-#define	psraw_r2r(regs,regd)	mmx_r2r (psraw, regs, regd)
-
-#define	psrld_i2r(imm,reg)	mmx_i2r (psrld, imm, reg)
-#define	psrld_m2r(var,reg)	mmx_m2r (psrld, var, reg)
-#define	psrld_r2r(regs,regd)	mmx_r2r (psrld, regs, regd)
-#define	psrlq_i2r(imm,reg)	mmx_i2r (psrlq, imm, reg)
-#define	psrlq_m2r(var,reg)	mmx_m2r (psrlq, var, reg)
-#define	psrlq_r2r(regs,regd)	mmx_r2r (psrlq, regs, regd)
-#define	psrlw_i2r(imm,reg)	mmx_i2r (psrlw, imm, reg)
-#define	psrlw_m2r(var,reg)	mmx_m2r (psrlw, var, reg)
-#define	psrlw_r2r(regs,regd)	mmx_r2r (psrlw, regs, regd)
-
-#define	psubb_m2r(var,reg)	mmx_m2r (psubb, var, reg)
-#define	psubb_r2r(regs,regd)	mmx_r2r (psubb, regs, regd)
-#define	psubd_m2r(var,reg)	mmx_m2r (psubd, var, reg)
-#define	psubd_r2r(regs,regd)	mmx_r2r (psubd, regs, regd)
-#define	psubw_m2r(var,reg)	mmx_m2r (psubw, var, reg)
-#define	psubw_r2r(regs,regd)	mmx_r2r (psubw, regs, regd)
-
-#define	psubsb_m2r(var,reg)	mmx_m2r (psubsb, var, reg)
-#define	psubsb_r2r(regs,regd)	mmx_r2r (psubsb, regs, regd)
-#define	psubsw_m2r(var,reg)	mmx_m2r (psubsw, var, reg)
-#define	psubsw_r2r(regs,regd)	mmx_r2r (psubsw, regs, regd)
-
-#define	psubusb_m2r(var,reg)	mmx_m2r (psubusb, var, reg)
-#define	psubusb_r2r(regs,regd)	mmx_r2r (psubusb, regs, regd)
-#define	psubusw_m2r(var,reg)	mmx_m2r (psubusw, var, reg)
-#define	psubusw_r2r(regs,regd)	mmx_r2r (psubusw, regs, regd)
-
-#define	punpckhbw_m2r(var,reg)		mmx_m2r (punpckhbw, var, reg)
-#define	punpckhbw_r2r(regs,regd)	mmx_r2r (punpckhbw, regs, regd)
-#define	punpckhdq_m2r(var,reg)		mmx_m2r (punpckhdq, var, reg)
-#define	punpckhdq_r2r(regs,regd)	mmx_r2r (punpckhdq, regs, regd)
-#define	punpckhwd_m2r(var,reg)		mmx_m2r (punpckhwd, var, reg)
-#define	punpckhwd_r2r(regs,regd)	mmx_r2r (punpckhwd, regs, regd)
-
-#define	punpcklbw_m2r(var,reg) 		mmx_m2r (punpcklbw, var, reg)
-#define	punpcklbw_r2r(regs,regd)	mmx_r2r (punpcklbw, regs, regd)
-#define	punpckldq_m2r(var,reg)		mmx_m2r (punpckldq, var, reg)
-#define	punpckldq_r2r(regs,regd)	mmx_r2r (punpckldq, regs, regd)
-#define	punpcklwd_m2r(var,reg)		mmx_m2r (punpcklwd, var, reg)
-#define	punpcklwd_r2r(regs,regd)	mmx_r2r (punpcklwd, regs, regd)
-
-#define	pxor_m2r(var,reg)	mmx_m2r (pxor, var, reg)
-#define	pxor_r2r(regs,regd)	mmx_r2r (pxor, regs, regd)
+typedef        union {
+        long long               q;      /* Quadword (64-bit) value */
+        unsigned long long      uq;     /* Unsigned Quadword */
+        int                     d[2];   /* 2 Doubleword (32-bit) values */
+        unsigned int            ud[2];  /* 2 Unsigned Doubleword */
+        short                   w[4];   /* 4 Word (16-bit) values */
+        unsigned short          uw[4];  /* 4 Unsigned Word */
+        char                    b[8];   /* 8 Byte (8-bit) values */
+        unsigned char           ub[8];  /* 8 Unsigned Byte */
+        float                   s[2];   /* Single-precision (32-bit) value */
+} mmx_t;        /* On an 8-byte (64-bit) boundary */
+
+
+#define         mmx_i2r(op,imm,reg) \
+        __asm__ __volatile__ (#op " %0, %%" #reg \
+                              : /* nothing */ \
+                              : "i" (imm) )
+
+#define         mmx_m2r(op,mem,reg) \
+        __asm__ __volatile__ (#op " %0, %%" #reg \
+                              : /* nothing */ \
+                              : "m" (mem))
+
+#define         mmx_r2m(op,reg,mem) \
+        __asm__ __volatile__ (#op " %%" #reg ", %0" \
+                              : "=m" (mem) \
+                              : /* nothing */ )
+
+#define         mmx_r2r(op,regs,regd) \
+        __asm__ __volatile__ (#op " %" #regs ", %" #regd)
+
+
+#define         emms() __asm__ __volatile__ ("emms")
+
+#define         movd_m2r(var,reg)           mmx_m2r (movd, var, reg)
+#define         movd_r2m(reg,var)           mmx_r2m (movd, reg, var)
+#define         movd_r2r(regs,regd)         mmx_r2r (movd, regs, regd)
+
+#define         movq_m2r(var,reg)           mmx_m2r (movq, var, reg)
+#define         movq_r2m(reg,var)           mmx_r2m (movq, reg, var)
+#define         movq_r2r(regs,regd)         mmx_r2r (movq, regs, regd)
+
+#define         packssdw_m2r(var,reg)       mmx_m2r (packssdw, var, reg)
+#define         packssdw_r2r(regs,regd)     mmx_r2r (packssdw, regs, regd)
+#define         packsswb_m2r(var,reg)       mmx_m2r (packsswb, var, reg)
+#define         packsswb_r2r(regs,regd)     mmx_r2r (packsswb, regs, regd)
+
+#define         packuswb_m2r(var,reg)       mmx_m2r (packuswb, var, reg)
+#define         packuswb_r2r(regs,regd)     mmx_r2r (packuswb, regs, regd)
+
+#define         paddb_m2r(var,reg)          mmx_m2r (paddb, var, reg)
+#define         paddb_r2r(regs,regd)        mmx_r2r (paddb, regs, regd)
+#define         paddd_m2r(var,reg)          mmx_m2r (paddd, var, reg)
+#define         paddd_r2r(regs,regd)        mmx_r2r (paddd, regs, regd)
+#define         paddw_m2r(var,reg)          mmx_m2r (paddw, var, reg)
+#define         paddw_r2r(regs,regd)        mmx_r2r (paddw, regs, regd)
+
+#define         paddsb_m2r(var,reg)         mmx_m2r (paddsb, var, reg)
+#define         paddsb_r2r(regs,regd)       mmx_r2r (paddsb, regs, regd)
+#define         paddsw_m2r(var,reg)         mmx_m2r (paddsw, var, reg)
+#define         paddsw_r2r(regs,regd)       mmx_r2r (paddsw, regs, regd)
+
+#define         paddusb_m2r(var,reg)        mmx_m2r (paddusb, var, reg)
+#define         paddusb_r2r(regs,regd)      mmx_r2r (paddusb, regs, regd)
+#define         paddusw_m2r(var,reg)        mmx_m2r (paddusw, var, reg)
+#define         paddusw_r2r(regs,regd)      mmx_r2r (paddusw, regs, regd)
+
+#define         pand_m2r(var,reg)           mmx_m2r (pand, var, reg)
+#define         pand_r2r(regs,regd)         mmx_r2r (pand, regs, regd)
+
+#define         pandn_m2r(var,reg)          mmx_m2r (pandn, var, reg)
+#define         pandn_r2r(regs,regd)        mmx_r2r (pandn, regs, regd)
+
+#define         pcmpeqb_m2r(var,reg)        mmx_m2r (pcmpeqb, var, reg)
+#define         pcmpeqb_r2r(regs,regd)      mmx_r2r (pcmpeqb, regs, regd)
+#define         pcmpeqd_m2r(var,reg)        mmx_m2r (pcmpeqd, var, reg)
+#define         pcmpeqd_r2r(regs,regd)      mmx_r2r (pcmpeqd, regs, regd)
+#define         pcmpeqw_m2r(var,reg)        mmx_m2r (pcmpeqw, var, reg)
+#define         pcmpeqw_r2r(regs,regd)      mmx_r2r (pcmpeqw, regs, regd)
+
+#define         pcmpgtb_m2r(var,reg)        mmx_m2r (pcmpgtb, var, reg)
+#define         pcmpgtb_r2r(regs,regd)      mmx_r2r (pcmpgtb, regs, regd)
+#define         pcmpgtd_m2r(var,reg)        mmx_m2r (pcmpgtd, var, reg)
+#define         pcmpgtd_r2r(regs,regd)      mmx_r2r (pcmpgtd, regs, regd)
+#define         pcmpgtw_m2r(var,reg)        mmx_m2r (pcmpgtw, var, reg)
+#define         pcmpgtw_r2r(regs,regd)      mmx_r2r (pcmpgtw, regs, regd)
+
+#define         pmaddwd_m2r(var,reg)        mmx_m2r (pmaddwd, var, reg)
+#define         pmaddwd_r2r(regs,regd)      mmx_r2r (pmaddwd, regs, regd)
+
+#define         pmulhw_m2r(var,reg)         mmx_m2r (pmulhw, var, reg)
+#define         pmulhw_r2r(regs,regd)       mmx_r2r (pmulhw, regs, regd)
+
+#define         pmullw_m2r(var,reg)         mmx_m2r (pmullw, var, reg)
+#define         pmullw_r2r(regs,regd)       mmx_r2r (pmullw, regs, regd)
+
+#define         por_m2r(var,reg)            mmx_m2r (por, var, reg)
+#define         por_r2r(regs,regd)          mmx_r2r (por, regs, regd)
+
+#define         pslld_i2r(imm,reg)          mmx_i2r (pslld, imm, reg)
+#define         pslld_m2r(var,reg)          mmx_m2r (pslld, var, reg)
+#define         pslld_r2r(regs,regd)        mmx_r2r (pslld, regs, regd)
+#define         psllq_i2r(imm,reg)          mmx_i2r (psllq, imm, reg)
+#define         psllq_m2r(var,reg)          mmx_m2r (psllq, var, reg)
+#define         psllq_r2r(regs,regd)        mmx_r2r (psllq, regs, regd)
+#define         psllw_i2r(imm,reg)          mmx_i2r (psllw, imm, reg)
+#define         psllw_m2r(var,reg)          mmx_m2r (psllw, var, reg)
+#define         psllw_r2r(regs,regd)        mmx_r2r (psllw, regs, regd)
+
+#define         psrad_i2r(imm,reg)          mmx_i2r (psrad, imm, reg)
+#define         psrad_m2r(var,reg)          mmx_m2r (psrad, var, reg)
+#define         psrad_r2r(regs,regd)        mmx_r2r (psrad, regs, regd)
+#define         psraw_i2r(imm,reg)          mmx_i2r (psraw, imm, reg)
+#define         psraw_m2r(var,reg)          mmx_m2r (psraw, var, reg)
+#define         psraw_r2r(regs,regd)        mmx_r2r (psraw, regs, regd)
+
+#define         psrld_i2r(imm,reg)          mmx_i2r (psrld, imm, reg)
+#define         psrld_m2r(var,reg)          mmx_m2r (psrld, var, reg)
+#define         psrld_r2r(regs,regd)        mmx_r2r (psrld, regs, regd)
+#define         psrlq_i2r(imm,reg)          mmx_i2r (psrlq, imm, reg)
+#define         psrlq_m2r(var,reg)          mmx_m2r (psrlq, var, reg)
+#define         psrlq_r2r(regs,regd)        mmx_r2r (psrlq, regs, regd)
+#define         psrlw_i2r(imm,reg)          mmx_i2r (psrlw, imm, reg)
+#define         psrlw_m2r(var,reg)          mmx_m2r (psrlw, var, reg)
+#define         psrlw_r2r(regs,regd)        mmx_r2r (psrlw, regs, regd)
+
+#define         psubb_m2r(var,reg)          mmx_m2r (psubb, var, reg)
+#define         psubb_r2r(regs,regd)        mmx_r2r (psubb, regs, regd)
+#define         psubd_m2r(var,reg)          mmx_m2r (psubd, var, reg)
+#define         psubd_r2r(regs,regd)        mmx_r2r (psubd, regs, regd)
+#define         psubw_m2r(var,reg)          mmx_m2r (psubw, var, reg)
+#define         psubw_r2r(regs,regd)        mmx_r2r (psubw, regs, regd)
+
+#define         psubsb_m2r(var,reg)         mmx_m2r (psubsb, var, reg)
+#define         psubsb_r2r(regs,regd)       mmx_r2r (psubsb, regs, regd)
+#define         psubsw_m2r(var,reg)         mmx_m2r (psubsw, var, reg)
+#define         psubsw_r2r(regs,regd)       mmx_r2r (psubsw, regs, regd)
+
+#define         psubusb_m2r(var,reg)        mmx_m2r (psubusb, var, reg)
+#define         psubusb_r2r(regs,regd)      mmx_r2r (psubusb, regs, regd)
+#define         psubusw_m2r(var,reg)        mmx_m2r (psubusw, var, reg)
+#define         psubusw_r2r(regs,regd)      mmx_r2r (psubusw, regs, regd)
+
+#define         punpckhbw_m2r(var,reg)      mmx_m2r (punpckhbw, var, reg)
+#define         punpckhbw_r2r(regs,regd)    mmx_r2r (punpckhbw, regs, regd)
+#define         punpckhdq_m2r(var,reg)      mmx_m2r (punpckhdq, var, reg)
+#define         punpckhdq_r2r(regs,regd)    mmx_r2r (punpckhdq, regs, regd)
+#define         punpckhwd_m2r(var,reg)      mmx_m2r (punpckhwd, var, reg)
+#define         punpckhwd_r2r(regs,regd)    mmx_r2r (punpckhwd, regs, regd)
+
+#define         punpcklbw_m2r(var,reg)      mmx_m2r (punpcklbw, var, reg)
+#define         punpcklbw_r2r(regs,regd)    mmx_r2r (punpcklbw, regs, regd)
+#define         punpckldq_m2r(var,reg)      mmx_m2r (punpckldq, var, reg)
+#define         punpckldq_r2r(regs,regd)    mmx_r2r (punpckldq, regs, regd)
+#define         punpcklwd_m2r(var,reg)      mmx_m2r (punpcklwd, var, reg)
+#define         punpcklwd_r2r(regs,regd)    mmx_r2r (punpcklwd, regs, regd)
+
+#define         pxor_m2r(var,reg)           mmx_m2r (pxor, var, reg)
+#define         pxor_r2r(regs,regd)         mmx_r2r (pxor, regs, regd)
 
 
 /* 3DNOW extensions */
 
-#define pavgusb_m2r(var,reg)	mmx_m2r (pavgusb, var, reg)
-#define pavgusb_r2r(regs,regd)	mmx_r2r (pavgusb, regs, regd)
+#define         pavgusb_m2r(var,reg)        mmx_m2r (pavgusb, var, reg)
+#define         pavgusb_r2r(regs,regd)      mmx_r2r (pavgusb, regs, regd)
 
 
 /* AMD MMX extensions - also available in intel SSE */
 
 
-#define mmx_m2ri(op,mem,reg,imm) \
+#define         mmx_m2ri(op,mem,reg,imm) \
         __asm__ __volatile__ (#op " %1, %0, %%" #reg \
                               : /* nothing */ \
                               : "X" (mem), "X" (imm))
-#define mmx_r2ri(op,regs,regd,imm) \
+#define         mmx_r2ri(op,regs,regd,imm) \
         __asm__ __volatile__ (#op " %0, %%" #regs ", %%" #regd \
                               : /* nothing */ \
                               : "X" (imm) )
 
-#define	mmx_fetch(mem,hint) \
-	__asm__ __volatile__ ("prefetch" #hint " %0" \
-			      : /* nothing */ \
-			      : "X" (mem))
+#define         mmx_fetch(mem,hint) \
+        __asm__ __volatile__ ("prefetch" #hint " %0" \
+                              : /* nothing */ \
+                              : "X" (mem))
 
 
-#define	maskmovq(regs,maskreg)		mmx_r2ri (maskmovq, regs, maskreg)
+#define         maskmovq(regs,maskreg)      mmx_r2ri (maskmovq, regs, maskreg)
 
-#define	movntq_r2m(mmreg,var)		mmx_r2m (movntq, mmreg, var)
+#define         movntq_r2m(mmreg,var)       mmx_r2m (movntq, mmreg, var)
 
-#define	pavgb_m2r(var,reg)		mmx_m2r (pavgb, var, reg)
-#define	pavgb_r2r(regs,regd)		mmx_r2r (pavgb, regs, regd)
-#define	pavgw_m2r(var,reg)		mmx_m2r (pavgw, var, reg)
-#define	pavgw_r2r(regs,regd)		mmx_r2r (pavgw, regs, regd)
+#define         pavgb_m2r(var,reg)          mmx_m2r (pavgb, var, reg)
+#define         pavgb_r2r(regs,regd)        mmx_r2r (pavgb, regs, regd)
+#define         pavgw_m2r(var,reg)          mmx_m2r (pavgw, var, reg)
+#define         pavgw_r2r(regs,regd)        mmx_r2r (pavgw, regs, regd)
 
-#define	pextrw_r2r(mmreg,reg,imm)	mmx_r2ri (pextrw, mmreg, reg, imm)
+#define         pextrw_r2r(mmreg,reg,imm)   mmx_r2ri (pextrw, mmreg, reg, imm)
 
-#define	pinsrw_r2r(reg,mmreg,imm)	mmx_r2ri (pinsrw, reg, mmreg, imm)
+#define         pinsrw_r2r(reg,mmreg,imm)   mmx_r2ri (pinsrw, reg, mmreg, imm)
 
-#define	pmaxsw_m2r(var,reg)		mmx_m2r (pmaxsw, var, reg)
-#define	pmaxsw_r2r(regs,regd)		mmx_r2r (pmaxsw, regs, regd)
+#define         pmaxsw_m2r(var,reg)         mmx_m2r (pmaxsw, var, reg)
+#define         pmaxsw_r2r(regs,regd)       mmx_r2r (pmaxsw, regs, regd)
 
-#define	pmaxub_m2r(var,reg)		mmx_m2r (pmaxub, var, reg)
-#define	pmaxub_r2r(regs,regd)		mmx_r2r (pmaxub, regs, regd)
+#define         pmaxub_m2r(var,reg)         mmx_m2r (pmaxub, var, reg)
+#define         pmaxub_r2r(regs,regd)       mmx_r2r (pmaxub, regs, regd)
 
-#define	pminsw_m2r(var,reg)		mmx_m2r (pminsw, var, reg)
-#define	pminsw_r2r(regs,regd)		mmx_r2r (pminsw, regs, regd)
+#define         pminsw_m2r(var,reg)         mmx_m2r (pminsw, var, reg)
+#define         pminsw_r2r(regs,regd)       mmx_r2r (pminsw, regs, regd)
 
-#define	pminub_m2r(var,reg)		mmx_m2r (pminub, var, reg)
-#define	pminub_r2r(regs,regd)		mmx_r2r (pminub, regs, regd)
+#define         pminub_m2r(var,reg)         mmx_m2r (pminub, var, reg)
+#define         pminub_r2r(regs,regd)       mmx_r2r (pminub, regs, regd)
 
-#define	pmovmskb(mmreg,reg) \
-	__asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
+#define         pmovmskb(mmreg,reg) \
+        __asm__ __volatile__ ("movmskps %" #mmreg ", %" #reg)
 
-#define	pmulhuw_m2r(var,reg)		mmx_m2r (pmulhuw, var, reg)
-#define	pmulhuw_r2r(regs,regd)		mmx_r2r (pmulhuw, regs, regd)
+#define         pmulhuw_m2r(var,reg)        mmx_m2r (pmulhuw, var, reg)
+#define         pmulhuw_r2r(regs,regd)      mmx_r2r (pmulhuw, regs, regd)
 
-#define	prefetcht0(mem)			mmx_fetch (mem, t0)
-#define	prefetcht1(mem)			mmx_fetch (mem, t1)
-#define	prefetcht2(mem)			mmx_fetch (mem, t2)
-#define	prefetchnta(mem)		mmx_fetch (mem, nta)
+#define         prefetcht0(mem)             mmx_fetch (mem, t0)
+#define         prefetcht1(mem)             mmx_fetch (mem, t1)
+#define         prefetcht2(mem)             mmx_fetch (mem, t2)
+#define         prefetchnta(mem)            mmx_fetch (mem, nta)
 
-#define	psadbw_m2r(var,reg)		mmx_m2r (psadbw, var, reg)
-#define	psadbw_r2r(regs,regd)		mmx_r2r (psadbw, regs, regd)
+#define         psadbw_m2r(var,reg)         mmx_m2r (psadbw, var, reg)
+#define         psadbw_r2r(regs,regd)       mmx_r2r (psadbw, regs, regd)
 
-#define	pshufw_m2r(var,reg,imm)		mmx_m2ri(pshufw, var, reg, imm)
-#define	pshufw_r2r(regs,regd,imm)	mmx_r2ri(pshufw, regs, regd, imm)
+#define         pshufw_m2r(var,reg,imm)     mmx_m2ri(pshufw, var, reg, imm)
+#define         pshufw_r2r(regs,regd,imm)   mmx_r2ri(pshufw, regs, regd, imm)
 
-#define	sfence() __asm__ __volatile__ ("sfence\n\t")
+#define         sfence() __asm__ __volatile__ ("sfence\n\t")
 
 /* SSE2 */
-#define	pshufhw_m2r(var,reg,imm)	mmx_m2ri(pshufhw, var, reg, imm)
-#define	pshufhw_r2r(regs,regd,imm)	mmx_r2ri(pshufhw, regs, regd, imm)
-#define	pshuflw_m2r(var,reg,imm)	mmx_m2ri(pshuflw, var, reg, imm)
-#define	pshuflw_r2r(regs,regd,imm)	mmx_r2ri(pshuflw, regs, regd, imm)
+#define         pshufhw_m2r(var,reg,imm)    mmx_m2ri(pshufhw, var, reg, imm)
+#define         pshufhw_r2r(regs,regd,imm)  mmx_r2ri(pshufhw, regs, regd, imm)
+#define         pshuflw_m2r(var,reg,imm)    mmx_m2ri(pshuflw, var, reg, imm)
+#define         pshuflw_r2r(regs,regd,imm)  mmx_r2ri(pshuflw, regs, regd, imm)
 
-#define	pshufd_r2r(regs,regd,imm)	mmx_r2ri(pshufd, regs, regd, imm)
+#define         pshufd_r2r(regs,regd,imm)   mmx_r2ri(pshufd, regs, regd, imm)
 
-#define	movdqa_m2r(var,reg)		mmx_m2r (movdqa, var, reg)
-#define	movdqa_r2m(reg,var)		mmx_r2m (movdqa, reg, var)
-#define	movdqa_r2r(regs,regd)		mmx_r2r (movdqa, regs, regd)
-#define	movdqu_m2r(var,reg)		mmx_m2r (movdqu, var, reg)
-#define	movdqu_r2m(reg,var)		mmx_r2m (movdqu, reg, var)
-#define	movdqu_r2r(regs,regd)		mmx_r2r (movdqu, regs, regd)
+#define         movdqa_m2r(var,reg)         mmx_m2r (movdqa, var, reg)
+#define         movdqa_r2m(reg,var)         mmx_r2m (movdqa, reg, var)
+#define         movdqa_r2r(regs,regd)       mmx_r2r (movdqa, regs, regd)
+#define         movdqu_m2r(var,reg)         mmx_m2r (movdqu, var, reg)
+#define         movdqu_r2m(reg,var)         mmx_r2m (movdqu, reg, var)
+#define         movdqu_r2r(regs,regd)       mmx_r2r (movdqu, regs, regd)
 
-#define	pmullw_r2m(reg,var)		mmx_r2m (pmullw, reg, var)
+#define         pmullw_r2m(reg,var)         mmx_r2m (pmullw, reg, var)
 
-#define	pslldq_i2r(imm,reg)		mmx_i2r (pslldq, imm, reg)
-#define	psrldq_i2r(imm,reg)		mmx_i2r (psrldq, imm, reg)
+#define         pslldq_i2r(imm,reg)         mmx_i2r (pslldq, imm, reg)
+#define         psrldq_i2r(imm,reg)         mmx_i2r (psrldq, imm, reg)
 
-#define	punpcklqdq_r2r(regs,regd)	mmx_r2r (punpcklqdq, regs, regd)
-#define	punpckhqdq_r2r(regs,regd)	mmx_r2r (punpckhqdq, regs, regd)
+#define         punpcklqdq_r2r(regs,regd)   mmx_r2r (punpcklqdq, regs, regd)
+#define         punpckhqdq_r2r(regs,regd)   mmx_r2r (punpckhqdq, regs, regd)
 
 
 #endif /* AVCODEC_I386MMX_H */
diff --git a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c
index 1b90f8e40..c14b79384 100644
--- a/src/libffmpeg/libavcodec/i386/motion_est_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/motion_est_mmx.c
@@ -15,7 +15,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * mostly by Michael Niedermayer <michaelni@gmx.at>
  */
@@ -34,33 +34,33 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
 {
     long len= -(stride*h);
     asm volatile(
-        ".balign 16			\n\t"
-        "1:				\n\t"
-        "movq (%1, %%"REG_a"), %%mm0	\n\t"
-        "movq (%2, %%"REG_a"), %%mm2	\n\t"
-        "movq (%2, %%"REG_a"), %%mm4	\n\t"
-        "add %3, %%"REG_a"		\n\t"
-        "psubusb %%mm0, %%mm2		\n\t"
-        "psubusb %%mm4, %%mm0		\n\t"
-        "movq (%1, %%"REG_a"), %%mm1	\n\t"
-        "movq (%2, %%"REG_a"), %%mm3	\n\t"
-        "movq (%2, %%"REG_a"), %%mm5	\n\t"
-        "psubusb %%mm1, %%mm3		\n\t"
-        "psubusb %%mm5, %%mm1		\n\t"
-        "por %%mm2, %%mm0		\n\t"
-        "por %%mm1, %%mm3		\n\t"
-        "movq %%mm0, %%mm1		\n\t"
-        "movq %%mm3, %%mm2		\n\t"
-        "punpcklbw %%mm7, %%mm0		\n\t"
-        "punpckhbw %%mm7, %%mm1		\n\t"
-        "punpcklbw %%mm7, %%mm3		\n\t"
-        "punpckhbw %%mm7, %%mm2		\n\t"
-        "paddw %%mm1, %%mm0		\n\t"
-        "paddw %%mm3, %%mm2		\n\t"
-        "paddw %%mm2, %%mm0		\n\t"
-        "paddw %%mm0, %%mm6		\n\t"
-        "add %3, %%"REG_a"		\n\t"
-        " js 1b				\n\t"
+        ".balign 16                     \n\t"
+        "1:                             \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        "movq (%2, %%"REG_a"), %%mm2    \n\t"
+        "movq (%2, %%"REG_a"), %%mm4    \n\t"
+        "add %3, %%"REG_a"              \n\t"
+        "psubusb %%mm0, %%mm2           \n\t"
+        "psubusb %%mm4, %%mm0           \n\t"
+        "movq (%1, %%"REG_a"), %%mm1    \n\t"
+        "movq (%2, %%"REG_a"), %%mm3    \n\t"
+        "movq (%2, %%"REG_a"), %%mm5    \n\t"
+        "psubusb %%mm1, %%mm3           \n\t"
+        "psubusb %%mm5, %%mm1           \n\t"
+        "por %%mm2, %%mm0               \n\t"
+        "por %%mm1, %%mm3               \n\t"
+        "movq %%mm0, %%mm1              \n\t"
+        "movq %%mm3, %%mm2              \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpckhbw %%mm7, %%mm1         \n\t"
+        "punpcklbw %%mm7, %%mm3         \n\t"
+        "punpckhbw %%mm7, %%mm2         \n\t"
+        "paddw %%mm1, %%mm0             \n\t"
+        "paddw %%mm3, %%mm2             \n\t"
+        "paddw %%mm2, %%mm0             \n\t"
+        "paddw %%mm0, %%mm6             \n\t"
+        "add %3, %%"REG_a"              \n\t"
+        " js 1b                         \n\t"
         : "+a" (len)
         : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride)
     );
@@ -70,19 +70,19 @@ static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
 {
     long len= -(stride*h);
     asm volatile(
-        ".balign 16			\n\t"
-        "1:				\n\t"
-        "movq (%1, %%"REG_a"), %%mm0	\n\t"
-        "movq (%2, %%"REG_a"), %%mm2	\n\t"
-        "psadbw %%mm2, %%mm0		\n\t"
-        "add %3, %%"REG_a"		\n\t"
-        "movq (%1, %%"REG_a"), %%mm1	\n\t"
-        "movq (%2, %%"REG_a"), %%mm3	\n\t"
-        "psadbw %%mm1, %%mm3		\n\t"
-        "paddw %%mm3, %%mm0		\n\t"
-        "paddw %%mm0, %%mm6		\n\t"
-        "add %3, %%"REG_a"		\n\t"
-        " js 1b				\n\t"
+        ".balign 16                     \n\t"
+        "1:                             \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        "movq (%2, %%"REG_a"), %%mm2    \n\t"
+        "psadbw %%mm2, %%mm0            \n\t"
+        "add %3, %%"REG_a"              \n\t"
+        "movq (%1, %%"REG_a"), %%mm1    \n\t"
+        "movq (%2, %%"REG_a"), %%mm3    \n\t"
+        "psadbw %%mm1, %%mm3            \n\t"
+        "paddw %%mm3, %%mm0             \n\t"
+        "paddw %%mm0, %%mm6             \n\t"
+        "add %3, %%"REG_a"              \n\t"
+        " js 1b                         \n\t"
         : "+a" (len)
         : "r" (blk1 - len), "r" (blk2 - len), "r" ((long)stride)
     );
@@ -92,23 +92,23 @@ static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, in
 {
     long len= -(stride*h);
     asm volatile(
-        ".balign 16			\n\t"
-        "1:				\n\t"
-        "movq (%1, %%"REG_a"), %%mm0	\n\t"
-        "movq (%2, %%"REG_a"), %%mm2	\n\t"
-        "pavgb %%mm2, %%mm0		\n\t"
-        "movq (%3, %%"REG_a"), %%mm2	\n\t"
-        "psadbw %%mm2, %%mm0		\n\t"
-        "add %4, %%"REG_a"		\n\t"
-        "movq (%1, %%"REG_a"), %%mm1	\n\t"
-        "movq (%2, %%"REG_a"), %%mm3	\n\t"
-        "pavgb %%mm1, %%mm3		\n\t"
-        "movq (%3, %%"REG_a"), %%mm1	\n\t"
-        "psadbw %%mm1, %%mm3		\n\t"
-        "paddw %%mm3, %%mm0		\n\t"
-        "paddw %%mm0, %%mm6		\n\t"
-        "add %4, %%"REG_a"		\n\t"
-        " js 1b				\n\t"
+        ".balign 16                     \n\t"
+        "1:                             \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        "movq (%2, %%"REG_a"), %%mm2    \n\t"
+        "pavgb %%mm2, %%mm0             \n\t"
+        "movq (%3, %%"REG_a"), %%mm2    \n\t"
+        "psadbw %%mm2, %%mm0            \n\t"
+        "add %4, %%"REG_a"              \n\t"
+        "movq (%1, %%"REG_a"), %%mm1    \n\t"
+        "movq (%2, %%"REG_a"), %%mm3    \n\t"
+        "pavgb %%mm1, %%mm3             \n\t"
+        "movq (%3, %%"REG_a"), %%mm1    \n\t"
+        "psadbw %%mm1, %%mm3            \n\t"
+        "paddw %%mm3, %%mm0             \n\t"
+        "paddw %%mm0, %%mm6             \n\t"
+        "add %4, %%"REG_a"              \n\t"
+        " js 1b                         \n\t"
         : "+a" (len)
         : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride)
     );
@@ -118,34 +118,34 @@ static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
 { //FIXME reuse src
     long len= -(stride*h);
     asm volatile(
-        ".balign 16			\n\t"
-        "movq "MANGLE(bone)", %%mm5	\n\t"
-        "1:				\n\t"
-        "movq (%1, %%"REG_a"), %%mm0	\n\t"
-        "movq (%2, %%"REG_a"), %%mm2	\n\t"
-        "movq 1(%1, %%"REG_a"), %%mm1	\n\t"
-        "movq 1(%2, %%"REG_a"), %%mm3	\n\t"
-        "pavgb %%mm2, %%mm0		\n\t"
-        "pavgb %%mm1, %%mm3		\n\t"
-        "psubusb %%mm5, %%mm3		\n\t"
-        "pavgb %%mm3, %%mm0		\n\t"
-        "movq (%3, %%"REG_a"), %%mm2	\n\t"
-        "psadbw %%mm2, %%mm0		\n\t"
-        "add %4, %%"REG_a"		\n\t"
-        "movq (%1, %%"REG_a"), %%mm1	\n\t"
-        "movq (%2, %%"REG_a"), %%mm3	\n\t"
-        "movq 1(%1, %%"REG_a"), %%mm2	\n\t"
-        "movq 1(%2, %%"REG_a"), %%mm4	\n\t"
-        "pavgb %%mm3, %%mm1		\n\t"
-        "pavgb %%mm4, %%mm2		\n\t"
-        "psubusb %%mm5, %%mm2		\n\t"
-        "pavgb %%mm1, %%mm2		\n\t"
-        "movq (%3, %%"REG_a"), %%mm1	\n\t"
-        "psadbw %%mm1, %%mm2		\n\t"
-        "paddw %%mm2, %%mm0		\n\t"
-        "paddw %%mm0, %%mm6		\n\t"
-        "add %4, %%"REG_a"		\n\t"
-        " js 1b				\n\t"
+        ".balign 16                     \n\t"
+        "movq "MANGLE(bone)", %%mm5     \n\t"
+        "1:                             \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        "movq (%2, %%"REG_a"), %%mm2    \n\t"
+        "movq 1(%1, %%"REG_a"), %%mm1   \n\t"
+        "movq 1(%2, %%"REG_a"), %%mm3   \n\t"
+        "pavgb %%mm2, %%mm0             \n\t"
+        "pavgb %%mm1, %%mm3             \n\t"
+        "psubusb %%mm5, %%mm3           \n\t"
+        "pavgb %%mm3, %%mm0             \n\t"
+        "movq (%3, %%"REG_a"), %%mm2    \n\t"
+        "psadbw %%mm2, %%mm0            \n\t"
+        "add %4, %%"REG_a"              \n\t"
+        "movq (%1, %%"REG_a"), %%mm1    \n\t"
+        "movq (%2, %%"REG_a"), %%mm3    \n\t"
+        "movq 1(%1, %%"REG_a"), %%mm2   \n\t"
+        "movq 1(%2, %%"REG_a"), %%mm4   \n\t"
+        "pavgb %%mm3, %%mm1             \n\t"
+        "pavgb %%mm4, %%mm2             \n\t"
+        "psubusb %%mm5, %%mm2           \n\t"
+        "pavgb %%mm1, %%mm2             \n\t"
+        "movq (%3, %%"REG_a"), %%mm1    \n\t"
+        "psadbw %%mm1, %%mm2            \n\t"
+        "paddw %%mm2, %%mm0             \n\t"
+        "paddw %%mm0, %%mm6             \n\t"
+        "add %4, %%"REG_a"              \n\t"
+        " js 1b                         \n\t"
         : "+a" (len)
         : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" ((long)stride)
     );
@@ -155,35 +155,35 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int
 {
     long len= -(stride*h);
     asm volatile(
-        ".balign 16			\n\t"
-        "1:				\n\t"
-        "movq (%1, %%"REG_a"), %%mm0	\n\t"
-        "movq (%2, %%"REG_a"), %%mm1	\n\t"
-        "movq (%1, %%"REG_a"), %%mm2	\n\t"
-        "movq (%2, %%"REG_a"), %%mm3	\n\t"
-        "punpcklbw %%mm7, %%mm0		\n\t"
-        "punpcklbw %%mm7, %%mm1		\n\t"
-        "punpckhbw %%mm7, %%mm2		\n\t"
-        "punpckhbw %%mm7, %%mm3		\n\t"
-        "paddw %%mm0, %%mm1		\n\t"
-        "paddw %%mm2, %%mm3		\n\t"
-        "movq (%3, %%"REG_a"), %%mm4	\n\t"
-        "movq (%3, %%"REG_a"), %%mm2	\n\t"
-        "paddw %%mm5, %%mm1		\n\t"
-        "paddw %%mm5, %%mm3		\n\t"
-        "psrlw $1, %%mm1		\n\t"
-        "psrlw $1, %%mm3		\n\t"
-        "packuswb %%mm3, %%mm1		\n\t"
-        "psubusb %%mm1, %%mm4		\n\t"
-        "psubusb %%mm2, %%mm1		\n\t"
-        "por %%mm4, %%mm1		\n\t"
-        "movq %%mm1, %%mm0		\n\t"
-        "punpcklbw %%mm7, %%mm0		\n\t"
-        "punpckhbw %%mm7, %%mm1		\n\t"
-        "paddw %%mm1, %%mm0		\n\t"
-        "paddw %%mm0, %%mm6		\n\t"
-        "add %4, %%"REG_a"		\n\t"
-        " js 1b				\n\t"
+        ".balign 16                     \n\t"
+        "1:                             \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        "movq (%2, %%"REG_a"), %%mm1    \n\t"
+        "movq (%1, %%"REG_a"), %%mm2    \n\t"
+        "movq (%2, %%"REG_a"), %%mm3    \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpcklbw %%mm7, %%mm1         \n\t"
+        "punpckhbw %%mm7, %%mm2         \n\t"
+        "punpckhbw %%mm7, %%mm3         \n\t"
+        "paddw %%mm0, %%mm1             \n\t"
+        "paddw %%mm2, %%mm3             \n\t"
+        "movq (%3, %%"REG_a"), %%mm4    \n\t"
+        "movq (%3, %%"REG_a"), %%mm2    \n\t"
+        "paddw %%mm5, %%mm1             \n\t"
+        "paddw %%mm5, %%mm3             \n\t"
+        "psrlw $1, %%mm1                \n\t"
+        "psrlw $1, %%mm3                \n\t"
+        "packuswb %%mm3, %%mm1          \n\t"
+        "psubusb %%mm1, %%mm4           \n\t"
+        "psubusb %%mm2, %%mm1           \n\t"
+        "por %%mm4, %%mm1               \n\t"
+        "movq %%mm1, %%mm0              \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpckhbw %%mm7, %%mm1         \n\t"
+        "paddw %%mm1, %%mm0             \n\t"
+        "paddw %%mm0, %%mm6             \n\t"
+        "add %4, %%"REG_a"              \n\t"
+        " js 1b                         \n\t"
         : "+a" (len)
         : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" ((long)stride)
     );
@@ -193,47 +193,47 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
 {
     long len= -(stride*h);
     asm volatile(
-        ".balign 16			\n\t"
-        "1:				\n\t"
-        "movq (%1, %%"REG_a"), %%mm0	\n\t"
-        "movq (%2, %%"REG_a"), %%mm1	\n\t"
-        "movq %%mm0, %%mm4		\n\t"
-        "movq %%mm1, %%mm2		\n\t"
-        "punpcklbw %%mm7, %%mm0		\n\t"
-        "punpcklbw %%mm7, %%mm1		\n\t"
-        "punpckhbw %%mm7, %%mm4		\n\t"
-        "punpckhbw %%mm7, %%mm2		\n\t"
-        "paddw %%mm1, %%mm0		\n\t"
-        "paddw %%mm2, %%mm4		\n\t"
-        "movq 1(%1, %%"REG_a"), %%mm2	\n\t"
-        "movq 1(%2, %%"REG_a"), %%mm3	\n\t"
-        "movq %%mm2, %%mm1		\n\t"
-        "punpcklbw %%mm7, %%mm2		\n\t"
-        "punpckhbw %%mm7, %%mm1		\n\t"
-        "paddw %%mm0, %%mm2		\n\t"
-        "paddw %%mm4, %%mm1		\n\t"
-        "movq %%mm3, %%mm4		\n\t"
-        "punpcklbw %%mm7, %%mm3		\n\t"
-        "punpckhbw %%mm7, %%mm4		\n\t"
-        "paddw %%mm3, %%mm2		\n\t"
-        "paddw %%mm4, %%mm1		\n\t"
-        "movq (%3, %%"REG_a"), %%mm3	\n\t"
-        "movq (%3, %%"REG_a"), %%mm4	\n\t"
-        "paddw %%mm5, %%mm2		\n\t"
-        "paddw %%mm5, %%mm1		\n\t"
-        "psrlw $2, %%mm2		\n\t"
-        "psrlw $2, %%mm1		\n\t"
-        "packuswb %%mm1, %%mm2		\n\t"
-        "psubusb %%mm2, %%mm3		\n\t"
-        "psubusb %%mm4, %%mm2		\n\t"
-        "por %%mm3, %%mm2		\n\t"
-        "movq %%mm2, %%mm0		\n\t"
-        "punpcklbw %%mm7, %%mm0		\n\t"
-        "punpckhbw %%mm7, %%mm2		\n\t"
-        "paddw %%mm2, %%mm0		\n\t"
-        "paddw %%mm0, %%mm6		\n\t"
-        "add %4, %%"REG_a"		\n\t"
-        " js 1b				\n\t"
+        ".balign 16                     \n\t"
+        "1:                             \n\t"
+        "movq (%1, %%"REG_a"), %%mm0    \n\t"
+        "movq (%2, %%"REG_a"), %%mm1    \n\t"
+        "movq %%mm0, %%mm4              \n\t"
+        "movq %%mm1, %%mm2              \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpcklbw %%mm7, %%mm1         \n\t"
+        "punpckhbw %%mm7, %%mm4         \n\t"
+        "punpckhbw %%mm7, %%mm2         \n\t"
+        "paddw %%mm1, %%mm0             \n\t"
+        "paddw %%mm2, %%mm4             \n\t"
+        "movq 1(%1, %%"REG_a"), %%mm2   \n\t"
+        "movq 1(%2, %%"REG_a"), %%mm3   \n\t"
+        "movq %%mm2, %%mm1              \n\t"
+        "punpcklbw %%mm7, %%mm2         \n\t"
+        "punpckhbw %%mm7, %%mm1         \n\t"
+        "paddw %%mm0, %%mm2             \n\t"
+        "paddw %%mm4, %%mm1             \n\t"
+        "movq %%mm3, %%mm4              \n\t"
+        "punpcklbw %%mm7, %%mm3         \n\t"
+        "punpckhbw %%mm7, %%mm4         \n\t"
+        "paddw %%mm3, %%mm2             \n\t"
+        "paddw %%mm4, %%mm1             \n\t"
+        "movq (%3, %%"REG_a"), %%mm3    \n\t"
+        "movq (%3, %%"REG_a"), %%mm4    \n\t"
+        "paddw %%mm5, %%mm2             \n\t"
+        "paddw %%mm5, %%mm1             \n\t"
+        "psrlw $2, %%mm2                \n\t"
+        "psrlw $2, %%mm1                \n\t"
+        "packuswb %%mm1, %%mm2          \n\t"
+        "psubusb %%mm2, %%mm3           \n\t"
+        "psubusb %%mm4, %%mm2           \n\t"
+        "por %%mm3, %%mm2               \n\t"
+        "movq %%mm2, %%mm0              \n\t"
+        "punpcklbw %%mm7, %%mm0         \n\t"
+        "punpckhbw %%mm7, %%mm2         \n\t"
+        "paddw %%mm2, %%mm0             \n\t"
+        "paddw %%mm0, %%mm6             \n\t"
+        "add %4, %%"REG_a"              \n\t"
+        " js 1b                         \n\t"
         : "+a" (len)
         : "r" (blk1 - len), "r" (blk1 -len + stride), "r" (blk2 - len), "r" ((long)stride)
     );
@@ -243,13 +243,13 @@ static inline int sum_mmx(void)
 {
     int ret;
     asm volatile(
-        "movq %%mm6, %%mm0		\n\t"
-        "psrlq $32, %%mm6		\n\t"
-        "paddw %%mm0, %%mm6		\n\t"
-        "movq %%mm6, %%mm0		\n\t"
-        "psrlq $16, %%mm6		\n\t"
-        "paddw %%mm0, %%mm6		\n\t"
-        "movd %%mm6, %0			\n\t"
+        "movq %%mm6, %%mm0              \n\t"
+        "psrlq $32, %%mm6               \n\t"
+        "paddw %%mm0, %%mm6             \n\t"
+        "movq %%mm6, %%mm0              \n\t"
+        "psrlq $16, %%mm6               \n\t"
+        "paddw %%mm0, %%mm6             \n\t"
+        "movd %%mm6, %0                 \n\t"
         : "=r" (ret)
     );
     return ret&0xFFFF;
@@ -259,7 +259,7 @@ static inline int sum_mmx2(void)
 {
     int ret;
     asm volatile(
-        "movd %%mm6, %0			\n\t"
+        "movd %%mm6, %0                 \n\t"
         : "=r" (ret)
     );
     return ret;
@@ -270,8 +270,8 @@ static inline int sum_mmx2(void)
 static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
 {\
     assert(h==8);\
-    asm volatile("pxor %%mm7, %%mm7		\n\t"\
-                 "pxor %%mm6, %%mm6		\n\t":);\
+    asm volatile("pxor %%mm7, %%mm7     \n\t"\
+                 "pxor %%mm6, %%mm6     \n\t":);\
 \
     sad8_1_ ## suf(blk1, blk2, stride, 8);\
 \
@@ -280,9 +280,9 @@ static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h
 static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
 {\
     assert(h==8);\
-    asm volatile("pxor %%mm7, %%mm7		\n\t"\
-                 "pxor %%mm6, %%mm6		\n\t"\
-                 "movq %0, %%mm5		\n\t"\
+    asm volatile("pxor %%mm7, %%mm7     \n\t"\
+                 "pxor %%mm6, %%mm6     \n\t"\
+                 "movq %0, %%mm5        \n\t"\
                  :: "m"(round_tab[1]) \
                  );\
 \
@@ -294,9 +294,9 @@ static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in
 static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
 {\
     assert(h==8);\
-    asm volatile("pxor %%mm7, %%mm7		\n\t"\
-                 "pxor %%mm6, %%mm6		\n\t"\
-                 "movq %0, %%mm5		\n\t"\
+    asm volatile("pxor %%mm7, %%mm7     \n\t"\
+                 "pxor %%mm6, %%mm6     \n\t"\
+                 "movq %0, %%mm5        \n\t"\
                  :: "m"(round_tab[1]) \
                  );\
 \
@@ -308,9 +308,9 @@ static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, in
 static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
 {\
     assert(h==8);\
-    asm volatile("pxor %%mm7, %%mm7		\n\t"\
-                 "pxor %%mm6, %%mm6		\n\t"\
-                 "movq %0, %%mm5		\n\t"\
+    asm volatile("pxor %%mm7, %%mm7     \n\t"\
+                 "pxor %%mm6, %%mm6     \n\t"\
+                 "movq %0, %%mm5        \n\t"\
                  :: "m"(round_tab[2]) \
                  );\
 \
@@ -321,8 +321,8 @@ static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i
 \
 static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
 {\
-    asm volatile("pxor %%mm7, %%mm7		\n\t"\
-                 "pxor %%mm6, %%mm6		\n\t":);\
+    asm volatile("pxor %%mm7, %%mm7     \n\t"\
+                 "pxor %%mm6, %%mm6     \n\t":);\
 \
     sad8_1_ ## suf(blk1  , blk2  , stride, h);\
     sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\
@@ -331,9 +331,9 @@ static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int
 }\
 static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
 {\
-    asm volatile("pxor %%mm7, %%mm7		\n\t"\
-                 "pxor %%mm6, %%mm6		\n\t"\
-                 "movq %0, %%mm5		\n\t"\
+    asm volatile("pxor %%mm7, %%mm7     \n\t"\
+                 "pxor %%mm6, %%mm6     \n\t"\
+                 "movq %0, %%mm5        \n\t"\
                  :: "m"(round_tab[1]) \
                  );\
 \
@@ -344,9 +344,9 @@ static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i
 }\
 static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
 {\
-    asm volatile("pxor %%mm7, %%mm7		\n\t"\
-                 "pxor %%mm6, %%mm6		\n\t"\
-                 "movq %0, %%mm5		\n\t"\
+    asm volatile("pxor %%mm7, %%mm7     \n\t"\
+                 "pxor %%mm6, %%mm6     \n\t"\
+                 "movq %0, %%mm5        \n\t"\
                  :: "m"(round_tab[1]) \
                  );\
 \
@@ -357,9 +357,9 @@ static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, i
 }\
 static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
 {\
-    asm volatile("pxor %%mm7, %%mm7		\n\t"\
-                 "pxor %%mm6, %%mm6		\n\t"\
-                 "movq %0, %%mm5		\n\t"\
+    asm volatile("pxor %%mm7, %%mm7     \n\t"\
+                 "pxor %%mm6, %%mm6     \n\t"\
+                 "movq %0, %%mm5        \n\t"\
                  :: "m"(round_tab[2]) \
                  );\
 \
@@ -384,16 +384,16 @@ void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx)
         c->pix_abs[1][2] = sad8_y2_mmx;
         c->pix_abs[1][3] = sad8_xy2_mmx;
 
-	c->sad[0]= sad16_mmx;
+        c->sad[0]= sad16_mmx;
         c->sad[1]= sad8_mmx;
     }
     if (mm_flags & MM_MMXEXT) {
-	c->pix_abs[0][0] = sad16_mmx2;
-	c->pix_abs[1][0] = sad8_mmx2;
+        c->pix_abs[0][0] = sad16_mmx2;
+        c->pix_abs[1][0] = sad8_mmx2;
+
+        c->sad[0]= sad16_mmx2;
+        c->sad[1]= sad8_mmx2;
 
-	c->sad[0]= sad16_mmx2;
-	c->sad[1]= sad8_mmx2;
-        
         if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
             c->pix_abs[0][1] = sad16_x2_mmx2;
             c->pix_abs[0][2] = sad16_y2_mmx2;
diff --git a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c
index 70c81f675..f83df3a19 100644
--- a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * Optimized for ia32 cpus by Nick Kurshev <nickols_k@mail.ru>
  * h263, mpeg1, mpeg2 dequantizer & draw_edges by Michael Niedermayer <michaelni@gmx.at>
@@ -40,7 +40,7 @@ static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
     qmul = qscale << 1;
 
     assert(s->block_last_index[n]>=0 || s->h263_aic);
-        
+
     if (!s->h263_aic) {
         if (n < 4)
             level = block[0] * s->y_dc_scale;
@@ -57,52 +57,52 @@ static void dct_unquantize_h263_intra_mmx(MpegEncContext *s,
         nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
 //printf("%d %d  ", qmul, qadd);
 asm volatile(
-		"movd %1, %%mm6			\n\t" //qmul
-		"packssdw %%mm6, %%mm6		\n\t"
-		"packssdw %%mm6, %%mm6		\n\t"
-		"movd %2, %%mm5			\n\t" //qadd
-		"pxor %%mm7, %%mm7		\n\t"
-		"packssdw %%mm5, %%mm5		\n\t"
-		"packssdw %%mm5, %%mm5		\n\t"
-		"psubw %%mm5, %%mm7		\n\t"
-		"pxor %%mm4, %%mm4		\n\t"
-		".balign 16\n\t"
-		"1:				\n\t"
-		"movq (%0, %3), %%mm0		\n\t"
-		"movq 8(%0, %3), %%mm1		\n\t"
-
-		"pmullw %%mm6, %%mm0		\n\t"
-		"pmullw %%mm6, %%mm1		\n\t"
-
-		"movq (%0, %3), %%mm2		\n\t"
-		"movq 8(%0, %3), %%mm3		\n\t"
-
-		"pcmpgtw %%mm4, %%mm2		\n\t" // block[i] < 0 ? -1 : 0
-		"pcmpgtw %%mm4, %%mm3		\n\t" // block[i] < 0 ? -1 : 0
-
-		"pxor %%mm2, %%mm0		\n\t"
-		"pxor %%mm3, %%mm1		\n\t"
-
-		"paddw %%mm7, %%mm0		\n\t"
-		"paddw %%mm7, %%mm1		\n\t"
-
-		"pxor %%mm0, %%mm2		\n\t"
-		"pxor %%mm1, %%mm3		\n\t"
-
-		"pcmpeqw %%mm7, %%mm0		\n\t" // block[i] == 0 ? -1 : 0
-		"pcmpeqw %%mm7, %%mm1		\n\t" // block[i] == 0 ? -1 : 0
-
-		"pandn %%mm2, %%mm0		\n\t"
-		"pandn %%mm3, %%mm1		\n\t"
-
-		"movq %%mm0, (%0, %3)		\n\t"
-		"movq %%mm1, 8(%0, %3)		\n\t"
-
-		"add $16, %3			\n\t"
-		"jng 1b				\n\t"
-		::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
-		: "memory"
-	);
+                "movd %1, %%mm6                 \n\t" //qmul
+                "packssdw %%mm6, %%mm6          \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "movd %2, %%mm5                 \n\t" //qadd
+                "pxor %%mm7, %%mm7              \n\t"
+                "packssdw %%mm5, %%mm5          \n\t"
+                "packssdw %%mm5, %%mm5          \n\t"
+                "psubw %%mm5, %%mm7             \n\t"
+                "pxor %%mm4, %%mm4              \n\t"
+                ".balign 16                     \n\t"
+                "1:                             \n\t"
+                "movq (%0, %3), %%mm0           \n\t"
+                "movq 8(%0, %3), %%mm1          \n\t"
+
+                "pmullw %%mm6, %%mm0            \n\t"
+                "pmullw %%mm6, %%mm1            \n\t"
+
+                "movq (%0, %3), %%mm2           \n\t"
+                "movq 8(%0, %3), %%mm3          \n\t"
+
+                "pcmpgtw %%mm4, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
+                "pcmpgtw %%mm4, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
+
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+
+                "paddw %%mm7, %%mm0             \n\t"
+                "paddw %%mm7, %%mm1             \n\t"
+
+                "pxor %%mm0, %%mm2              \n\t"
+                "pxor %%mm1, %%mm3              \n\t"
+
+                "pcmpeqw %%mm7, %%mm0           \n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw %%mm7, %%mm1           \n\t" // block[i] == 0 ? -1 : 0
+
+                "pandn %%mm2, %%mm0             \n\t"
+                "pandn %%mm3, %%mm1             \n\t"
+
+                "movq %%mm0, (%0, %3)           \n\t"
+                "movq %%mm1, 8(%0, %3)          \n\t"
+
+                "add $16, %3                    \n\t"
+                "jng 1b                         \n\t"
+                ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
+                : "memory"
+        );
         block[0]= level;
 }
 
@@ -116,56 +116,56 @@ static void dct_unquantize_h263_inter_mmx(MpegEncContext *s,
     qadd = (qscale - 1) | 1;
 
     assert(s->block_last_index[n]>=0 || s->h263_aic);
-        
+
     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
 //printf("%d %d  ", qmul, qadd);
 asm volatile(
-		"movd %1, %%mm6			\n\t" //qmul
-		"packssdw %%mm6, %%mm6		\n\t"
-		"packssdw %%mm6, %%mm6		\n\t"
-		"movd %2, %%mm5			\n\t" //qadd
-		"pxor %%mm7, %%mm7		\n\t"
-		"packssdw %%mm5, %%mm5		\n\t"
-		"packssdw %%mm5, %%mm5		\n\t"
-		"psubw %%mm5, %%mm7		\n\t"
-		"pxor %%mm4, %%mm4		\n\t"
-		".balign 16\n\t"
-		"1:				\n\t"
-		"movq (%0, %3), %%mm0		\n\t"
-		"movq 8(%0, %3), %%mm1		\n\t"
-
-		"pmullw %%mm6, %%mm0		\n\t"
-		"pmullw %%mm6, %%mm1		\n\t"
-
-		"movq (%0, %3), %%mm2		\n\t"
-		"movq 8(%0, %3), %%mm3		\n\t"
-
-		"pcmpgtw %%mm4, %%mm2		\n\t" // block[i] < 0 ? -1 : 0
-		"pcmpgtw %%mm4, %%mm3		\n\t" // block[i] < 0 ? -1 : 0
-
-		"pxor %%mm2, %%mm0		\n\t"
-		"pxor %%mm3, %%mm1		\n\t"
-
-		"paddw %%mm7, %%mm0		\n\t"
-		"paddw %%mm7, %%mm1		\n\t"
-
-		"pxor %%mm0, %%mm2		\n\t"
-		"pxor %%mm1, %%mm3		\n\t"
-
-		"pcmpeqw %%mm7, %%mm0		\n\t" // block[i] == 0 ? -1 : 0
-		"pcmpeqw %%mm7, %%mm1		\n\t" // block[i] == 0 ? -1 : 0
-
-		"pandn %%mm2, %%mm0		\n\t"
-		"pandn %%mm3, %%mm1		\n\t"
-
-		"movq %%mm0, (%0, %3)		\n\t"
-		"movq %%mm1, 8(%0, %3)		\n\t"
-
-		"add $16, %3			\n\t"
-		"jng 1b				\n\t"
-		::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
-		: "memory"
-	);
+                "movd %1, %%mm6                 \n\t" //qmul
+                "packssdw %%mm6, %%mm6          \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "movd %2, %%mm5                 \n\t" //qadd
+                "pxor %%mm7, %%mm7              \n\t"
+                "packssdw %%mm5, %%mm5          \n\t"
+                "packssdw %%mm5, %%mm5          \n\t"
+                "psubw %%mm5, %%mm7             \n\t"
+                "pxor %%mm4, %%mm4              \n\t"
+                ".balign 16                     \n\t"
+                "1:                             \n\t"
+                "movq (%0, %3), %%mm0           \n\t"
+                "movq 8(%0, %3), %%mm1          \n\t"
+
+                "pmullw %%mm6, %%mm0            \n\t"
+                "pmullw %%mm6, %%mm1            \n\t"
+
+                "movq (%0, %3), %%mm2           \n\t"
+                "movq 8(%0, %3), %%mm3          \n\t"
+
+                "pcmpgtw %%mm4, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
+                "pcmpgtw %%mm4, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
+
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+
+                "paddw %%mm7, %%mm0             \n\t"
+                "paddw %%mm7, %%mm1             \n\t"
+
+                "pxor %%mm0, %%mm2              \n\t"
+                "pxor %%mm1, %%mm3              \n\t"
+
+                "pcmpeqw %%mm7, %%mm0           \n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw %%mm7, %%mm1           \n\t" // block[i] == 0 ? -1 : 0
+
+                "pandn %%mm2, %%mm0             \n\t"
+                "pandn %%mm3, %%mm1             \n\t"
+
+                "movq %%mm0, (%0, %3)           \n\t"
+                "movq %%mm1, 8(%0, %3)          \n\t"
+
+                "add $16, %3                    \n\t"
+                "jng 1b                         \n\t"
+                ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs))
+                : "memory"
+        );
 }
 
 
@@ -209,61 +209,61 @@ static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s,
 
     nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1;
 
-    if (n < 4) 
+    if (n < 4)
         block0 = block[0] * s->y_dc_scale;
     else
         block0 = block[0] * s->c_dc_scale;
     /* XXX: only mpeg1 */
     quant_matrix = s->intra_matrix;
 asm volatile(
-		"pcmpeqw %%mm7, %%mm7		\n\t"
-		"psrlw $15, %%mm7		\n\t"
-		"movd %2, %%mm6			\n\t"
-		"packssdw %%mm6, %%mm6		\n\t"
-		"packssdw %%mm6, %%mm6		\n\t"
-		"mov %3, %%"REG_a"		\n\t"
-		".balign 16\n\t"
-		"1:				\n\t"
-		"movq (%0, %%"REG_a"), %%mm0	\n\t"
-		"movq 8(%0, %%"REG_a"), %%mm1	\n\t"
-		"movq (%1, %%"REG_a"), %%mm4	\n\t"
-		"movq 8(%1, %%"REG_a"), %%mm5	\n\t"
-		"pmullw %%mm6, %%mm4		\n\t" // q=qscale*quant_matrix[i]
-		"pmullw %%mm6, %%mm5		\n\t" // q=qscale*quant_matrix[i]
-		"pxor %%mm2, %%mm2		\n\t"
-		"pxor %%mm3, %%mm3		\n\t"
-		"pcmpgtw %%mm0, %%mm2		\n\t" // block[i] < 0 ? -1 : 0
-		"pcmpgtw %%mm1, %%mm3		\n\t" // block[i] < 0 ? -1 : 0
-		"pxor %%mm2, %%mm0		\n\t"
-		"pxor %%mm3, %%mm1		\n\t"
-		"psubw %%mm2, %%mm0		\n\t" // abs(block[i])
-		"psubw %%mm3, %%mm1		\n\t" // abs(block[i])
-		"pmullw %%mm4, %%mm0		\n\t" // abs(block[i])*q
-		"pmullw %%mm5, %%mm1		\n\t" // abs(block[i])*q
-		"pxor %%mm4, %%mm4		\n\t"
-		"pxor %%mm5, %%mm5		\n\t" // FIXME slow
-		"pcmpeqw (%0, %%"REG_a"), %%mm4	\n\t" // block[i] == 0 ? -1 : 0
-		"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
-		"psraw $3, %%mm0		\n\t"
-		"psraw $3, %%mm1		\n\t"
-		"psubw %%mm7, %%mm0		\n\t"
-		"psubw %%mm7, %%mm1		\n\t"
-		"por %%mm7, %%mm0		\n\t"
-		"por %%mm7, %%mm1		\n\t"
-		"pxor %%mm2, %%mm0		\n\t"
-		"pxor %%mm3, %%mm1		\n\t"
-		"psubw %%mm2, %%mm0		\n\t"
-		"psubw %%mm3, %%mm1		\n\t"
-		"pandn %%mm0, %%mm4		\n\t"
-		"pandn %%mm1, %%mm5		\n\t"
-		"movq %%mm4, (%0, %%"REG_a")	\n\t"
-		"movq %%mm5, 8(%0, %%"REG_a")	\n\t"
-
-		"add $16, %%"REG_a"		\n\t"
-		"js 1b				\n\t"
-		::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
-		: "%"REG_a, "memory"
-	);    
+                "pcmpeqw %%mm7, %%mm7           \n\t"
+                "psrlw $15, %%mm7               \n\t"
+                "movd %2, %%mm6                 \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "mov %3, %%"REG_a"              \n\t"
+                ".balign 16                     \n\t"
+                "1:                             \n\t"
+                "movq (%0, %%"REG_a"), %%mm0    \n\t"
+                "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
+                "movq (%1, %%"REG_a"), %%mm4    \n\t"
+                "movq 8(%1, %%"REG_a"), %%mm5   \n\t"
+                "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i]
+                "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i]
+                "pxor %%mm2, %%mm2              \n\t"
+                "pxor %%mm3, %%mm3              \n\t"
+                "pcmpgtw %%mm0, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
+                "pcmpgtw %%mm1, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+                "psubw %%mm2, %%mm0             \n\t" // abs(block[i])
+                "psubw %%mm3, %%mm1             \n\t" // abs(block[i])
+                "pmullw %%mm4, %%mm0            \n\t" // abs(block[i])*q
+                "pmullw %%mm5, %%mm1            \n\t" // abs(block[i])*q
+                "pxor %%mm4, %%mm4              \n\t"
+                "pxor %%mm5, %%mm5              \n\t" // FIXME slow
+                "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
+                "psraw $3, %%mm0                \n\t"
+                "psraw $3, %%mm1                \n\t"
+                "psubw %%mm7, %%mm0             \n\t"
+                "psubw %%mm7, %%mm1             \n\t"
+                "por %%mm7, %%mm0               \n\t"
+                "por %%mm7, %%mm1               \n\t"
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+                "psubw %%mm2, %%mm0             \n\t"
+                "psubw %%mm3, %%mm1             \n\t"
+                "pandn %%mm0, %%mm4             \n\t"
+                "pandn %%mm1, %%mm5             \n\t"
+                "movq %%mm4, (%0, %%"REG_a")    \n\t"
+                "movq %%mm5, 8(%0, %%"REG_a")   \n\t"
+
+                "add $16, %%"REG_a"             \n\t"
+                "js 1b                          \n\t"
+                ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
+                : "%"REG_a, "memory"
+        );
     block[0]= block0;
 }
 
@@ -279,58 +279,58 @@ static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s,
 
         quant_matrix = s->inter_matrix;
 asm volatile(
-		"pcmpeqw %%mm7, %%mm7		\n\t"
-		"psrlw $15, %%mm7		\n\t"
-		"movd %2, %%mm6			\n\t"
-		"packssdw %%mm6, %%mm6		\n\t"
-		"packssdw %%mm6, %%mm6		\n\t"
-		"mov %3, %%"REG_a"		\n\t"
-		".balign 16\n\t"
-		"1:				\n\t"
-		"movq (%0, %%"REG_a"), %%mm0	\n\t"
-		"movq 8(%0, %%"REG_a"), %%mm1	\n\t"
-		"movq (%1, %%"REG_a"), %%mm4	\n\t"
-		"movq 8(%1, %%"REG_a"), %%mm5	\n\t"
-		"pmullw %%mm6, %%mm4		\n\t" // q=qscale*quant_matrix[i]
-		"pmullw %%mm6, %%mm5		\n\t" // q=qscale*quant_matrix[i]
-		"pxor %%mm2, %%mm2		\n\t"
-		"pxor %%mm3, %%mm3		\n\t"
-		"pcmpgtw %%mm0, %%mm2		\n\t" // block[i] < 0 ? -1 : 0
-		"pcmpgtw %%mm1, %%mm3		\n\t" // block[i] < 0 ? -1 : 0
-		"pxor %%mm2, %%mm0		\n\t"
-		"pxor %%mm3, %%mm1		\n\t"
-		"psubw %%mm2, %%mm0		\n\t" // abs(block[i])
-		"psubw %%mm3, %%mm1		\n\t" // abs(block[i])
-		"paddw %%mm0, %%mm0		\n\t" // abs(block[i])*2
-		"paddw %%mm1, %%mm1		\n\t" // abs(block[i])*2
-		"paddw %%mm7, %%mm0		\n\t" // abs(block[i])*2 + 1
-		"paddw %%mm7, %%mm1		\n\t" // abs(block[i])*2 + 1
-		"pmullw %%mm4, %%mm0		\n\t" // (abs(block[i])*2 + 1)*q
-		"pmullw %%mm5, %%mm1		\n\t" // (abs(block[i])*2 + 1)*q
-		"pxor %%mm4, %%mm4		\n\t"
-		"pxor %%mm5, %%mm5		\n\t" // FIXME slow
-		"pcmpeqw (%0, %%"REG_a"), %%mm4	\n\t" // block[i] == 0 ? -1 : 0
-		"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
-		"psraw $4, %%mm0		\n\t"
-		"psraw $4, %%mm1		\n\t"
-		"psubw %%mm7, %%mm0		\n\t"
-		"psubw %%mm7, %%mm1		\n\t"
-		"por %%mm7, %%mm0		\n\t"
-		"por %%mm7, %%mm1		\n\t"
-		"pxor %%mm2, %%mm0		\n\t"
-		"pxor %%mm3, %%mm1		\n\t"
-		"psubw %%mm2, %%mm0		\n\t"
-		"psubw %%mm3, %%mm1		\n\t"
-		"pandn %%mm0, %%mm4		\n\t"
-		"pandn %%mm1, %%mm5		\n\t"
-		"movq %%mm4, (%0, %%"REG_a")	\n\t"
-		"movq %%mm5, 8(%0, %%"REG_a")	\n\t"
-
-		"add $16, %%"REG_a"		\n\t"
-		"js 1b				\n\t"
-		::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
-		: "%"REG_a, "memory"
-	);
+                "pcmpeqw %%mm7, %%mm7           \n\t"
+                "psrlw $15, %%mm7               \n\t"
+                "movd %2, %%mm6                 \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "mov %3, %%"REG_a"              \n\t"
+                ".balign 16                     \n\t"
+                "1:                             \n\t"
+                "movq (%0, %%"REG_a"), %%mm0    \n\t"
+                "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
+                "movq (%1, %%"REG_a"), %%mm4    \n\t"
+                "movq 8(%1, %%"REG_a"), %%mm5   \n\t"
+                "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i]
+                "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i]
+                "pxor %%mm2, %%mm2              \n\t"
+                "pxor %%mm3, %%mm3              \n\t"
+                "pcmpgtw %%mm0, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
+                "pcmpgtw %%mm1, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+                "psubw %%mm2, %%mm0             \n\t" // abs(block[i])
+                "psubw %%mm3, %%mm1             \n\t" // abs(block[i])
+                "paddw %%mm0, %%mm0             \n\t" // abs(block[i])*2
+                "paddw %%mm1, %%mm1             \n\t" // abs(block[i])*2
+                "paddw %%mm7, %%mm0             \n\t" // abs(block[i])*2 + 1
+                "paddw %%mm7, %%mm1             \n\t" // abs(block[i])*2 + 1
+                "pmullw %%mm4, %%mm0            \n\t" // (abs(block[i])*2 + 1)*q
+                "pmullw %%mm5, %%mm1            \n\t" // (abs(block[i])*2 + 1)*q
+                "pxor %%mm4, %%mm4              \n\t"
+                "pxor %%mm5, %%mm5              \n\t" // FIXME slow
+                "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
+                "psraw $4, %%mm0                \n\t"
+                "psraw $4, %%mm1                \n\t"
+                "psubw %%mm7, %%mm0             \n\t"
+                "psubw %%mm7, %%mm1             \n\t"
+                "por %%mm7, %%mm0               \n\t"
+                "por %%mm7, %%mm1               \n\t"
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+                "psubw %%mm2, %%mm0             \n\t"
+                "psubw %%mm3, %%mm1             \n\t"
+                "pandn %%mm0, %%mm4             \n\t"
+                "pandn %%mm1, %%mm5             \n\t"
+                "movq %%mm4, (%0, %%"REG_a")    \n\t"
+                "movq %%mm5, 8(%0, %%"REG_a")   \n\t"
+
+                "add $16, %%"REG_a"             \n\t"
+                "js 1b                          \n\t"
+                ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
+                : "%"REG_a, "memory"
+        );
 }
 
 static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
@@ -339,62 +339,62 @@ static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s,
     long nCoeffs;
     const uint16_t *quant_matrix;
     int block0;
-    
+
     assert(s->block_last_index[n]>=0);
 
     if(s->alternate_scan) nCoeffs= 63; //FIXME
     else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
 
-    if (n < 4) 
+    if (n < 4)
         block0 = block[0] * s->y_dc_scale;
     else
         block0 = block[0] * s->c_dc_scale;
     quant_matrix = s->intra_matrix;
 asm volatile(
-		"pcmpeqw %%mm7, %%mm7		\n\t"
-		"psrlw $15, %%mm7		\n\t"
-		"movd %2, %%mm6			\n\t"
-		"packssdw %%mm6, %%mm6		\n\t"
-		"packssdw %%mm6, %%mm6		\n\t"
-		"mov %3, %%"REG_a"		\n\t"
-		".balign 16\n\t"
-		"1:				\n\t"
-		"movq (%0, %%"REG_a"), %%mm0	\n\t"
-		"movq 8(%0, %%"REG_a"), %%mm1	\n\t"
-		"movq (%1, %%"REG_a"), %%mm4	\n\t"
-		"movq 8(%1, %%"REG_a"), %%mm5	\n\t"
-		"pmullw %%mm6, %%mm4		\n\t" // q=qscale*quant_matrix[i]
-		"pmullw %%mm6, %%mm5		\n\t" // q=qscale*quant_matrix[i]
-		"pxor %%mm2, %%mm2		\n\t"
-		"pxor %%mm3, %%mm3		\n\t"
-		"pcmpgtw %%mm0, %%mm2		\n\t" // block[i] < 0 ? -1 : 0
-		"pcmpgtw %%mm1, %%mm3		\n\t" // block[i] < 0 ? -1 : 0
-		"pxor %%mm2, %%mm0		\n\t"
-		"pxor %%mm3, %%mm1		\n\t"
-		"psubw %%mm2, %%mm0		\n\t" // abs(block[i])
-		"psubw %%mm3, %%mm1		\n\t" // abs(block[i])
-		"pmullw %%mm4, %%mm0		\n\t" // abs(block[i])*q
-		"pmullw %%mm5, %%mm1		\n\t" // abs(block[i])*q
-		"pxor %%mm4, %%mm4		\n\t"
-		"pxor %%mm5, %%mm5		\n\t" // FIXME slow
-		"pcmpeqw (%0, %%"REG_a"), %%mm4	\n\t" // block[i] == 0 ? -1 : 0
-		"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
-		"psraw $3, %%mm0		\n\t"
-		"psraw $3, %%mm1		\n\t"
-		"pxor %%mm2, %%mm0		\n\t"
-		"pxor %%mm3, %%mm1		\n\t"
-		"psubw %%mm2, %%mm0		\n\t"
-		"psubw %%mm3, %%mm1		\n\t"
-		"pandn %%mm0, %%mm4		\n\t"
-		"pandn %%mm1, %%mm5		\n\t"
-		"movq %%mm4, (%0, %%"REG_a")	\n\t"
-		"movq %%mm5, 8(%0, %%"REG_a")	\n\t"
-
-		"add $16, %%"REG_a"		\n\t"
-		"jng 1b				\n\t"
-		::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
-		: "%"REG_a, "memory"
-	);    
+                "pcmpeqw %%mm7, %%mm7           \n\t"
+                "psrlw $15, %%mm7               \n\t"
+                "movd %2, %%mm6                 \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "mov %3, %%"REG_a"              \n\t"
+                ".balign 16                     \n\t"
+                "1:                             \n\t"
+                "movq (%0, %%"REG_a"), %%mm0    \n\t"
+                "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
+                "movq (%1, %%"REG_a"), %%mm4    \n\t"
+                "movq 8(%1, %%"REG_a"), %%mm5   \n\t"
+                "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i]
+                "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i]
+                "pxor %%mm2, %%mm2              \n\t"
+                "pxor %%mm3, %%mm3              \n\t"
+                "pcmpgtw %%mm0, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
+                "pcmpgtw %%mm1, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+                "psubw %%mm2, %%mm0             \n\t" // abs(block[i])
+                "psubw %%mm3, %%mm1             \n\t" // abs(block[i])
+                "pmullw %%mm4, %%mm0            \n\t" // abs(block[i])*q
+                "pmullw %%mm5, %%mm1            \n\t" // abs(block[i])*q
+                "pxor %%mm4, %%mm4              \n\t"
+                "pxor %%mm5, %%mm5              \n\t" // FIXME slow
+                "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
+                "psraw $3, %%mm0                \n\t"
+                "psraw $3, %%mm1                \n\t"
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+                "psubw %%mm2, %%mm0             \n\t"
+                "psubw %%mm3, %%mm1             \n\t"
+                "pandn %%mm0, %%mm4             \n\t"
+                "pandn %%mm1, %%mm5             \n\t"
+                "movq %%mm4, (%0, %%"REG_a")    \n\t"
+                "movq %%mm5, 8(%0, %%"REG_a")   \n\t"
+
+                "add $16, %%"REG_a"             \n\t"
+                "jng 1b                         \n\t"
+                ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs)
+                : "%"REG_a, "memory"
+        );
     block[0]= block0;
         //Note, we dont do mismatch control for intra as errors cannot accumulate
 }
@@ -404,7 +404,7 @@ static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
 {
     long nCoeffs;
     const uint16_t *quant_matrix;
-    
+
     assert(s->block_last_index[n]>=0);
 
     if(s->alternate_scan) nCoeffs= 63; //FIXME
@@ -412,71 +412,71 @@ static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s,
 
         quant_matrix = s->inter_matrix;
 asm volatile(
-		"pcmpeqw %%mm7, %%mm7		\n\t"
-                "psrlq $48, %%mm7		\n\t"
-		"movd %2, %%mm6			\n\t"
-		"packssdw %%mm6, %%mm6		\n\t"
-		"packssdw %%mm6, %%mm6		\n\t"
-		"mov %3, %%"REG_a"		\n\t"
-		".balign 16\n\t"
-		"1:				\n\t"
-		"movq (%0, %%"REG_a"), %%mm0	\n\t"
-		"movq 8(%0, %%"REG_a"), %%mm1	\n\t"
-		"movq (%1, %%"REG_a"), %%mm4	\n\t"
-		"movq 8(%1, %%"REG_a"), %%mm5	\n\t"
-		"pmullw %%mm6, %%mm4		\n\t" // q=qscale*quant_matrix[i]
-		"pmullw %%mm6, %%mm5		\n\t" // q=qscale*quant_matrix[i]
-		"pxor %%mm2, %%mm2		\n\t"
-		"pxor %%mm3, %%mm3		\n\t"
-		"pcmpgtw %%mm0, %%mm2		\n\t" // block[i] < 0 ? -1 : 0
-		"pcmpgtw %%mm1, %%mm3		\n\t" // block[i] < 0 ? -1 : 0
-		"pxor %%mm2, %%mm0		\n\t"
-		"pxor %%mm3, %%mm1		\n\t"
-		"psubw %%mm2, %%mm0		\n\t" // abs(block[i])
-		"psubw %%mm3, %%mm1		\n\t" // abs(block[i])
-		"paddw %%mm0, %%mm0		\n\t" // abs(block[i])*2
-		"paddw %%mm1, %%mm1		\n\t" // abs(block[i])*2
-		"pmullw %%mm4, %%mm0		\n\t" // abs(block[i])*2*q
-		"pmullw %%mm5, %%mm1		\n\t" // abs(block[i])*2*q
-		"paddw %%mm4, %%mm0		\n\t" // (abs(block[i])*2 + 1)*q
-		"paddw %%mm5, %%mm1		\n\t" // (abs(block[i])*2 + 1)*q
-		"pxor %%mm4, %%mm4		\n\t"
-		"pxor %%mm5, %%mm5		\n\t" // FIXME slow
-		"pcmpeqw (%0, %%"REG_a"), %%mm4	\n\t" // block[i] == 0 ? -1 : 0
-		"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
-		"psrlw $4, %%mm0		\n\t"
-		"psrlw $4, %%mm1		\n\t"
-		"pxor %%mm2, %%mm0		\n\t"
-		"pxor %%mm3, %%mm1		\n\t"
-		"psubw %%mm2, %%mm0		\n\t"
-		"psubw %%mm3, %%mm1		\n\t"
-		"pandn %%mm0, %%mm4		\n\t"
-		"pandn %%mm1, %%mm5		\n\t"
-                "pxor %%mm4, %%mm7		\n\t"
-                "pxor %%mm5, %%mm7		\n\t"
-		"movq %%mm4, (%0, %%"REG_a")	\n\t"
-		"movq %%mm5, 8(%0, %%"REG_a")	\n\t"
-
-		"add $16, %%"REG_a"		\n\t"
-		"jng 1b				\n\t"
-                "movd 124(%0, %3), %%mm0	\n\t"
-                "movq %%mm7, %%mm6		\n\t"
-                "psrlq $32, %%mm7		\n\t"
-                "pxor %%mm6, %%mm7		\n\t"
-                "movq %%mm7, %%mm6		\n\t"
-                "psrlq $16, %%mm7		\n\t"
-                "pxor %%mm6, %%mm7		\n\t"
-                "pslld $31, %%mm7		\n\t"
-                "psrlq $15, %%mm7		\n\t"
-                "pxor %%mm7, %%mm0		\n\t"
-                "movd %%mm0, 124(%0, %3)	\n\t"
-                
-		::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs)
-		: "%"REG_a, "memory"
-	);
+                "pcmpeqw %%mm7, %%mm7           \n\t"
+                "psrlq $48, %%mm7               \n\t"
+                "movd %2, %%mm6                 \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "packssdw %%mm6, %%mm6          \n\t"
+                "mov %3, %%"REG_a"              \n\t"
+                ".balign 16                     \n\t"
+                "1:                             \n\t"
+                "movq (%0, %%"REG_a"), %%mm0    \n\t"
+                "movq 8(%0, %%"REG_a"), %%mm1   \n\t"
+                "movq (%1, %%"REG_a"), %%mm4    \n\t"
+                "movq 8(%1, %%"REG_a"), %%mm5   \n\t"
+                "pmullw %%mm6, %%mm4            \n\t" // q=qscale*quant_matrix[i]
+                "pmullw %%mm6, %%mm5            \n\t" // q=qscale*quant_matrix[i]
+                "pxor %%mm2, %%mm2              \n\t"
+                "pxor %%mm3, %%mm3              \n\t"
+                "pcmpgtw %%mm0, %%mm2           \n\t" // block[i] < 0 ? -1 : 0
+                "pcmpgtw %%mm1, %%mm3           \n\t" // block[i] < 0 ? -1 : 0
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+                "psubw %%mm2, %%mm0             \n\t" // abs(block[i])
+                "psubw %%mm3, %%mm1             \n\t" // abs(block[i])
+                "paddw %%mm0, %%mm0             \n\t" // abs(block[i])*2
+                "paddw %%mm1, %%mm1             \n\t" // abs(block[i])*2
+                "pmullw %%mm4, %%mm0            \n\t" // abs(block[i])*2*q
+                "pmullw %%mm5, %%mm1            \n\t" // abs(block[i])*2*q
+                "paddw %%mm4, %%mm0             \n\t" // (abs(block[i])*2 + 1)*q
+                "paddw %%mm5, %%mm1             \n\t" // (abs(block[i])*2 + 1)*q
+                "pxor %%mm4, %%mm4              \n\t"
+                "pxor %%mm5, %%mm5              \n\t" // FIXME slow
+                "pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0
+                "pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
+                "psrlw $4, %%mm0                \n\t"
+                "psrlw $4, %%mm1                \n\t"
+                "pxor %%mm2, %%mm0              \n\t"
+                "pxor %%mm3, %%mm1              \n\t"
+                "psubw %%mm2, %%mm0             \n\t"
+                "psubw %%mm3, %%mm1             \n\t"
+                "pandn %%mm0, %%mm4             \n\t"
+                "pandn %%mm1, %%mm5             \n\t"
+                "pxor %%mm4, %%mm7              \n\t"
+                "pxor %%mm5, %%mm7              \n\t"
+                "movq %%mm4, (%0, %%"REG_a")    \n\t"
+                "movq %%mm5, 8(%0, %%"REG_a")   \n\t"
+
+                "add $16, %%"REG_a"             \n\t"
+                "jng 1b                         \n\t"
+                "movd 124(%0, %3), %%mm0        \n\t"
+                "movq %%mm7, %%mm6              \n\t"
+                "psrlq $32, %%mm7               \n\t"
+                "pxor %%mm6, %%mm7              \n\t"
+                "movq %%mm7, %%mm6              \n\t"
+                "psrlq $16, %%mm7               \n\t"
+                "pxor %%mm6, %%mm7              \n\t"
+                "pslld $31, %%mm7               \n\t"
+                "psrlq $15, %%mm7               \n\t"
+                "pxor %%mm7, %%mm0              \n\t"
+                "movd %%mm0, 124(%0, %3)        \n\t"
+
+                ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs)
+                : "%"REG_a, "memory"
+        );
 }
 
-/* draw the edges of width 'w' of an image of size width, height 
+/* draw the edges of width 'w' of an image of size width, height
    this mmx version can only handle w==8 || w==16 */
 static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
 {
@@ -488,79 +488,79 @@ static void draw_edges_mmx(uint8_t *buf, int wrap, int width, int height, int w)
     ptr = buf;
     if(w==8)
     {
-	asm volatile(
-		"1:				\n\t"
-		"movd (%0), %%mm0		\n\t"
-		"punpcklbw %%mm0, %%mm0		\n\t" 
-		"punpcklwd %%mm0, %%mm0		\n\t"
-		"punpckldq %%mm0, %%mm0		\n\t"
-		"movq %%mm0, -8(%0)		\n\t"
-		"movq -8(%0, %2), %%mm1		\n\t"
-		"punpckhbw %%mm1, %%mm1		\n\t"
-		"punpckhwd %%mm1, %%mm1		\n\t"
-		"punpckhdq %%mm1, %%mm1		\n\t"
-		"movq %%mm1, (%0, %2)		\n\t"
-		"add %1, %0			\n\t"
-		"cmp %3, %0			\n\t"
-		" jb 1b				\n\t"
-		: "+r" (ptr)
-		: "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
-	);
+        asm volatile(
+                "1:                             \n\t"
+                "movd (%0), %%mm0               \n\t"
+                "punpcklbw %%mm0, %%mm0         \n\t"
+                "punpcklwd %%mm0, %%mm0         \n\t"
+                "punpckldq %%mm0, %%mm0         \n\t"
+                "movq %%mm0, -8(%0)             \n\t"
+                "movq -8(%0, %2), %%mm1         \n\t"
+                "punpckhbw %%mm1, %%mm1         \n\t"
+                "punpckhwd %%mm1, %%mm1         \n\t"
+                "punpckhdq %%mm1, %%mm1         \n\t"
+                "movq %%mm1, (%0, %2)           \n\t"
+                "add %1, %0                     \n\t"
+                "cmp %3, %0                     \n\t"
+                " jb 1b                         \n\t"
+                : "+r" (ptr)
+                : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
+        );
     }
     else
     {
-	asm volatile(
-		"1:				\n\t"
-		"movd (%0), %%mm0		\n\t"
-		"punpcklbw %%mm0, %%mm0		\n\t" 
-		"punpcklwd %%mm0, %%mm0		\n\t"
-		"punpckldq %%mm0, %%mm0		\n\t"
-		"movq %%mm0, -8(%0)		\n\t"
-		"movq %%mm0, -16(%0)		\n\t"
-		"movq -8(%0, %2), %%mm1		\n\t"
-		"punpckhbw %%mm1, %%mm1		\n\t"
-		"punpckhwd %%mm1, %%mm1		\n\t"
-		"punpckhdq %%mm1, %%mm1		\n\t"
-		"movq %%mm1, (%0, %2)		\n\t"
-		"movq %%mm1, 8(%0, %2)		\n\t"
-		"add %1, %0			\n\t"
-		"cmp %3, %0			\n\t"
-		" jb 1b				\n\t"		
-		: "+r" (ptr)
-		: "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
-	);
+        asm volatile(
+                "1:                             \n\t"
+                "movd (%0), %%mm0               \n\t"
+                "punpcklbw %%mm0, %%mm0         \n\t"
+                "punpcklwd %%mm0, %%mm0         \n\t"
+                "punpckldq %%mm0, %%mm0         \n\t"
+                "movq %%mm0, -8(%0)             \n\t"
+                "movq %%mm0, -16(%0)            \n\t"
+                "movq -8(%0, %2), %%mm1         \n\t"
+                "punpckhbw %%mm1, %%mm1         \n\t"
+                "punpckhwd %%mm1, %%mm1         \n\t"
+                "punpckhdq %%mm1, %%mm1         \n\t"
+                "movq %%mm1, (%0, %2)           \n\t"
+                "movq %%mm1, 8(%0, %2)          \n\t"
+                "add %1, %0                     \n\t"
+                "cmp %3, %0                     \n\t"
+                " jb 1b                         \n\t"
+                : "+r" (ptr)
+                : "r" ((long)wrap), "r" ((long)width), "r" (ptr + wrap*height)
+        );
     }
-    
+
     for(i=0;i<w;i+=4) {
         /* top and bottom (and hopefully also the corners) */
-	ptr= buf - (i + 1) * wrap - w;
-	asm volatile(
-		"1:				\n\t"
-		"movq (%1, %0), %%mm0		\n\t"
-		"movq %%mm0, (%0)		\n\t"
-		"movq %%mm0, (%0, %2)		\n\t"
-		"movq %%mm0, (%0, %2, 2)	\n\t"
-		"movq %%mm0, (%0, %3)		\n\t"
-		"add $8, %0			\n\t"
-		"cmp %4, %0			\n\t"
-		" jb 1b				\n\t"
-		: "+r" (ptr)
-		: "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w)
-	);
-	ptr= last_line + (i + 1) * wrap - w;
-	asm volatile(
-		"1:				\n\t"
-		"movq (%1, %0), %%mm0		\n\t"
-		"movq %%mm0, (%0)		\n\t"
-		"movq %%mm0, (%0, %2)		\n\t"
-		"movq %%mm0, (%0, %2, 2)	\n\t"
-		"movq %%mm0, (%0, %3)		\n\t"
-		"add $8, %0			\n\t"
-		"cmp %4, %0			\n\t"
-		" jb 1b				\n\t"
-		: "+r" (ptr)
-		: "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w)
-	);
+        ptr= buf - (i + 1) * wrap - w;
+        asm volatile(
+                "1:                             \n\t"
+                "movq (%1, %0), %%mm0           \n\t"
+                "movq %%mm0, (%0)               \n\t"
+                "movq %%mm0, (%0, %2)           \n\t"
+                "movq %%mm0, (%0, %2, 2)        \n\t"
+                "movq %%mm0, (%0, %3)           \n\t"
+                "add $8, %0                     \n\t"
+                "cmp %4, %0                     \n\t"
+                " jb 1b                         \n\t"
+                : "+r" (ptr)
+                : "r" ((long)buf - (long)ptr - w), "r" ((long)-wrap), "r" ((long)-wrap*3), "r" (ptr+width+2*w)
+        );
+        ptr= last_line + (i + 1) * wrap - w;
+        asm volatile(
+                "1:                             \n\t"
+                "movq (%1, %0), %%mm0           \n\t"
+                "movq %%mm0, (%0)               \n\t"
+                "movq %%mm0, (%0, %2)           \n\t"
+                "movq %%mm0, (%0, %2, 2)        \n\t"
+                "movq %%mm0, (%0, %3)           \n\t"
+                "add $8, %0                     \n\t"
+                "cmp %4, %0                     \n\t"
+                " jb 1b                         \n\t"
+                : "+r" (ptr)
+                : "r" ((long)last_line - (long)ptr - w), "r" ((long)wrap), "r" ((long)wrap*3), "r" (ptr+width+2*w)
+        );
     }
 }
 
@@ -572,47 +572,47 @@ static void  denoise_dct_mmx(MpegEncContext *s, DCTELEM *block){
     s->dct_count[intra]++;
 
     asm volatile(
-        "pxor %%mm7, %%mm7		\n\t"
-        "1:				\n\t"
-        "pxor %%mm0, %%mm0		\n\t"
-        "pxor %%mm1, %%mm1		\n\t"
-        "movq (%0), %%mm2		\n\t"
-        "movq 8(%0), %%mm3		\n\t"
-        "pcmpgtw %%mm2, %%mm0		\n\t"
-        "pcmpgtw %%mm3, %%mm1		\n\t"
-        "pxor %%mm0, %%mm2		\n\t"
-        "pxor %%mm1, %%mm3		\n\t"
-        "psubw %%mm0, %%mm2		\n\t"
-        "psubw %%mm1, %%mm3		\n\t"
-        "movq %%mm2, %%mm4		\n\t"
-        "movq %%mm3, %%mm5		\n\t"
-        "psubusw (%2), %%mm2		\n\t"
-        "psubusw 8(%2), %%mm3		\n\t"
-        "pxor %%mm0, %%mm2		\n\t"
-        "pxor %%mm1, %%mm3		\n\t"
-        "psubw %%mm0, %%mm2		\n\t"
-        "psubw %%mm1, %%mm3		\n\t"
-        "movq %%mm2, (%0)		\n\t"
-        "movq %%mm3, 8(%0)		\n\t"
-        "movq %%mm4, %%mm2		\n\t"
-        "movq %%mm5, %%mm3		\n\t"
-        "punpcklwd %%mm7, %%mm4		\n\t"
-        "punpckhwd %%mm7, %%mm2		\n\t"
-        "punpcklwd %%mm7, %%mm5		\n\t"
-        "punpckhwd %%mm7, %%mm3		\n\t"
-        "paddd (%1), %%mm4		\n\t"
-        "paddd 8(%1), %%mm2		\n\t"
-        "paddd 16(%1), %%mm5		\n\t"
-        "paddd 24(%1), %%mm3		\n\t"
-        "movq %%mm4, (%1)		\n\t"
-        "movq %%mm2, 8(%1)		\n\t"
-        "movq %%mm5, 16(%1)		\n\t"
-        "movq %%mm3, 24(%1)		\n\t"
-        "add $16, %0			\n\t"
-        "add $32, %1			\n\t"
-        "add $16, %2			\n\t"
-        "cmp %3, %0			\n\t"
-            " jb 1b			\n\t"
+        "pxor %%mm7, %%mm7                      \n\t"
+        "1:                                     \n\t"
+        "pxor %%mm0, %%mm0                      \n\t"
+        "pxor %%mm1, %%mm1                      \n\t"
+        "movq (%0), %%mm2                       \n\t"
+        "movq 8(%0), %%mm3                      \n\t"
+        "pcmpgtw %%mm2, %%mm0                   \n\t"
+        "pcmpgtw %%mm3, %%mm1                   \n\t"
+        "pxor %%mm0, %%mm2                      \n\t"
+        "pxor %%mm1, %%mm3                      \n\t"
+        "psubw %%mm0, %%mm2                     \n\t"
+        "psubw %%mm1, %%mm3                     \n\t"
+        "movq %%mm2, %%mm4                      \n\t"
+        "movq %%mm3, %%mm5                      \n\t"
+        "psubusw (%2), %%mm2                    \n\t"
+        "psubusw 8(%2), %%mm3                   \n\t"
+        "pxor %%mm0, %%mm2                      \n\t"
+        "pxor %%mm1, %%mm3                      \n\t"
+        "psubw %%mm0, %%mm2                     \n\t"
+        "psubw %%mm1, %%mm3                     \n\t"
+        "movq %%mm2, (%0)                       \n\t"
+        "movq %%mm3, 8(%0)                      \n\t"
+        "movq %%mm4, %%mm2                      \n\t"
+        "movq %%mm5, %%mm3                      \n\t"
+        "punpcklwd %%mm7, %%mm4                 \n\t"
+        "punpckhwd %%mm7, %%mm2                 \n\t"
+        "punpcklwd %%mm7, %%mm5                 \n\t"
+        "punpckhwd %%mm7, %%mm3                 \n\t"
+        "paddd (%1), %%mm4                      \n\t"
+        "paddd 8(%1), %%mm2                     \n\t"
+        "paddd 16(%1), %%mm5                    \n\t"
+        "paddd 24(%1), %%mm3                    \n\t"
+        "movq %%mm4, (%1)                       \n\t"
+        "movq %%mm2, 8(%1)                      \n\t"
+        "movq %%mm5, 16(%1)                     \n\t"
+        "movq %%mm3, 24(%1)                     \n\t"
+        "add $16, %0                            \n\t"
+        "add $32, %1                            \n\t"
+        "add $16, %2                            \n\t"
+        "cmp %3, %0                             \n\t"
+            " jb 1b                             \n\t"
         : "+r" (block), "+r" (sum), "+r" (offset)
         : "r"(block+64)
     );
@@ -626,47 +626,47 @@ static void  denoise_dct_sse2(MpegEncContext *s, DCTELEM *block){
     s->dct_count[intra]++;
 
     asm volatile(
-        "pxor %%xmm7, %%xmm7		\n\t"
-        "1:				\n\t"
-        "pxor %%xmm0, %%xmm0		\n\t"
-        "pxor %%xmm1, %%xmm1		\n\t"
-        "movdqa (%0), %%xmm2		\n\t"
-        "movdqa 16(%0), %%xmm3		\n\t"
-        "pcmpgtw %%xmm2, %%xmm0		\n\t"
-        "pcmpgtw %%xmm3, %%xmm1		\n\t"
-        "pxor %%xmm0, %%xmm2		\n\t"
-        "pxor %%xmm1, %%xmm3		\n\t"
-        "psubw %%xmm0, %%xmm2		\n\t"
-        "psubw %%xmm1, %%xmm3		\n\t"
-        "movdqa %%xmm2, %%xmm4		\n\t"
-        "movdqa %%xmm3, %%xmm5		\n\t"
-        "psubusw (%2), %%xmm2		\n\t"
-        "psubusw 16(%2), %%xmm3		\n\t"
-        "pxor %%xmm0, %%xmm2		\n\t"
-        "pxor %%xmm1, %%xmm3		\n\t"
-        "psubw %%xmm0, %%xmm2		\n\t"
-        "psubw %%xmm1, %%xmm3		\n\t"
-        "movdqa %%xmm2, (%0)		\n\t"
-        "movdqa %%xmm3, 16(%0)		\n\t"
-        "movdqa %%xmm4, %%xmm6		\n\t"
-        "movdqa %%xmm5, %%xmm0		\n\t"
-        "punpcklwd %%xmm7, %%xmm4	\n\t"
-        "punpckhwd %%xmm7, %%xmm6	\n\t"
-        "punpcklwd %%xmm7, %%xmm5	\n\t"
-        "punpckhwd %%xmm7, %%xmm0	\n\t"
-        "paddd (%1), %%xmm4		\n\t"
-        "paddd 16(%1), %%xmm6		\n\t"
-        "paddd 32(%1), %%xmm5		\n\t"
-        "paddd 48(%1), %%xmm0		\n\t"
-        "movdqa %%xmm4, (%1)		\n\t"
-        "movdqa %%xmm6, 16(%1)		\n\t"
-        "movdqa %%xmm5, 32(%1)		\n\t"
-        "movdqa %%xmm0, 48(%1)		\n\t"
-        "add $32, %0			\n\t"
-        "add $64, %1			\n\t"
-        "add $32, %2			\n\t"
-        "cmp %3, %0			\n\t"
-            " jb 1b			\n\t"
+        "pxor %%xmm7, %%xmm7                    \n\t"
+        "1:                                     \n\t"
+        "pxor %%xmm0, %%xmm0                    \n\t"
+        "pxor %%xmm1, %%xmm1                    \n\t"
+        "movdqa (%0), %%xmm2                    \n\t"
+        "movdqa 16(%0), %%xmm3                  \n\t"
+        "pcmpgtw %%xmm2, %%xmm0                 \n\t"
+        "pcmpgtw %%xmm3, %%xmm1                 \n\t"
+        "pxor %%xmm0, %%xmm2                    \n\t"
+        "pxor %%xmm1, %%xmm3                    \n\t"
+        "psubw %%xmm0, %%xmm2                   \n\t"
+        "psubw %%xmm1, %%xmm3                   \n\t"
+        "movdqa %%xmm2, %%xmm4                  \n\t"
+        "movdqa %%xmm3, %%xmm5                  \n\t"
+        "psubusw (%2), %%xmm2                   \n\t"
+        "psubusw 16(%2), %%xmm3                 \n\t"
+        "pxor %%xmm0, %%xmm2                    \n\t"
+        "pxor %%xmm1, %%xmm3                    \n\t"
+        "psubw %%xmm0, %%xmm2                   \n\t"
+        "psubw %%xmm1, %%xmm3                   \n\t"
+        "movdqa %%xmm2, (%0)                    \n\t"
+        "movdqa %%xmm3, 16(%0)                  \n\t"
+        "movdqa %%xmm4, %%xmm6                  \n\t"
+        "movdqa %%xmm5, %%xmm0                  \n\t"
+        "punpcklwd %%xmm7, %%xmm4               \n\t"
+        "punpckhwd %%xmm7, %%xmm6               \n\t"
+        "punpcklwd %%xmm7, %%xmm5               \n\t"
+        "punpckhwd %%xmm7, %%xmm0               \n\t"
+        "paddd (%1), %%xmm4                     \n\t"
+        "paddd 16(%1), %%xmm6                   \n\t"
+        "paddd 32(%1), %%xmm5                   \n\t"
+        "paddd 48(%1), %%xmm0                   \n\t"
+        "movdqa %%xmm4, (%1)                    \n\t"
+        "movdqa %%xmm6, 16(%1)                  \n\t"
+        "movdqa %%xmm5, 32(%1)                  \n\t"
+        "movdqa %%xmm0, 48(%1)                  \n\t"
+        "add $32, %0                            \n\t"
+        "add $64, %1                            \n\t"
+        "add $32, %2                            \n\t"
+        "cmp %3, %0                             \n\t"
+            " jb 1b                             \n\t"
         : "+r" (block), "+r" (sum), "+r" (offset)
         : "r"(block+64)
     );
@@ -694,7 +694,7 @@ void MPV_common_init_mmx(MpegEncContext *s)
 {
     if (mm_flags & MM_MMX) {
         const int dct_algo = s->avctx->dct_algo;
-        
+
         s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx;
         s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx;
         s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx;
@@ -703,12 +703,12 @@ void MPV_common_init_mmx(MpegEncContext *s)
         s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx;
 
         draw_edges = draw_edges_mmx;
-        
+
         if (mm_flags & MM_SSE2) {
-	    s->denoise_dct= denoise_dct_sse2;
-	} else {
-    	    s->denoise_dct= denoise_dct_mmx;
-	}
+            s->denoise_dct= denoise_dct_sse2;
+        } else {
+                s->denoise_dct= denoise_dct_mmx;
+        }
 
         if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
             if(mm_flags & MM_SSE2){
diff --git a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c
index 93f156ee5..2c50df232 100644
--- a/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c
+++ b/src/libffmpeg/libavcodec/i386/mpegvideo_mmx_template.c
@@ -15,32 +15,32 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #undef SPREADW
 #undef PMAXW
 #ifdef HAVE_MMX2
 #define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t"
-#define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t"
+#define PMAXW(a,b) "pmaxsw " #a ", " #b "     \n\t"
 #define PMAX(a,b) \
-            "pshufw $0x0E," #a ", " #b "		\n\t"\
-	    PMAXW(b, a)\
-            "pshufw $0x01," #a ", " #b "		\n\t"\
-	    PMAXW(b, a)
+            "pshufw $0x0E," #a ", " #b "        \n\t"\
+            PMAXW(b, a)\
+            "pshufw $0x01," #a ", " #b "        \n\t"\
+            PMAXW(b, a)
 #else
 #define SPREADW(a) \
-	"punpcklwd " #a ", " #a " \n\t"\
-	"punpcklwd " #a ", " #a " \n\t"
+        "punpcklwd " #a ", " #a " \n\t"\
+        "punpcklwd " #a ", " #a " \n\t"
 #define PMAXW(a,b) \
-	"psubusw " #a ", " #b " \n\t"\
-	"paddw " #a ", " #b " \n\t"
+        "psubusw " #a ", " #b " \n\t"\
+        "paddw " #a ", " #b "   \n\t"
 #define PMAX(a,b)  \
-            "movq " #a ", " #b "		\n\t"\
-            "psrlq $32, " #a "			\n\t"\
-	    PMAXW(b, a)\
-            "movq " #a ", " #b "		\n\t"\
-            "psrlq $16, " #a "			\n\t"\
-	    PMAXW(b, a)
+            "movq " #a ", " #b "                \n\t"\
+            "psrlq $32, " #a "                  \n\t"\
+            PMAXW(b, a)\
+            "movq " #a ", " #b "                \n\t"\
+            "psrlq $16, " #a "                  \n\t"\
+            PMAXW(b, a)
 
 #endif
 
@@ -51,8 +51,8 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
     long last_non_zero_p1;
     int level=0, q; //=0 is cuz gcc says uninitalized ...
     const uint16_t *qmat, *bias;
-    __align8 int16_t temp_block[64];
-    
+    DECLARE_ALIGNED_8(int16_t, temp_block[64]);
+
     assert((7&(int)(&temp_block[0])) == 0); //did gcc align it correctly?
 
     //s->fdct (block);
@@ -71,24 +71,24 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
         if (!s->h263_aic) {
 #if 1
         asm volatile (
-        	"mul %%ecx		\n\t"
-        	: "=d" (level), "=a"(dummy)
-        	: "a" ((block[0]>>2) + q), "c" (inverse[q<<1])
+                "mul %%ecx                \n\t"
+                : "=d" (level), "=a"(dummy)
+                : "a" ((block[0]>>2) + q), "c" (inverse[q<<1])
         );
 #else
         asm volatile (
-        	"xorl %%edx, %%edx	\n\t"
-        	"divw %%cx		\n\t"
-        	"movzwl %%ax, %%eax	\n\t"
-        	: "=a" (level)
-        	: "a" ((block[0]>>2) + q), "c" (q<<1)
-        	: "%edx"
+                "xorl %%edx, %%edx        \n\t"
+                "divw %%cx                \n\t"
+                "movzwl %%ax, %%eax       \n\t"
+                : "=a" (level)
+                : "a" ((block[0]>>2) + q), "c" (q<<1)
+                : "%edx"
         );
 #endif
         } else
             /* For AIC we skip quant/dequant of INTRADC */
             level = (block[0] + 4)>>3;
-            
+
         block[0]=0; //avoid fake overflow
 //        temp_block[0] = (block[0] + (q >> 1)) / q;
         last_non_zero_p1 = 1;
@@ -101,96 +101,96 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
     }
 
     if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){
-    
+
         asm volatile(
-            "movd %%"REG_a", %%mm3		\n\t" // last_non_zero_p1
+            "movd %%"REG_a", %%mm3              \n\t" // last_non_zero_p1
             SPREADW(%%mm3)
-            "pxor %%mm7, %%mm7			\n\t" // 0
-            "pxor %%mm4, %%mm4			\n\t" // 0
-            "movq (%2), %%mm5			\n\t" // qmat[0]
-            "pxor %%mm6, %%mm6			\n\t"
-            "psubw (%3), %%mm6			\n\t" // -bias[0]
-            "mov $-128, %%"REG_a"		\n\t"
-            ".balign 16				\n\t"
-            "1:					\n\t"
-            "pxor %%mm1, %%mm1			\n\t" // 0
-            "movq (%1, %%"REG_a"), %%mm0	\n\t" // block[i]
-            "pcmpgtw %%mm0, %%mm1		\n\t" // block[i] <= 0 ? 0xFF : 0x00
-            "pxor %%mm1, %%mm0			\n\t" 
-            "psubw %%mm1, %%mm0			\n\t" // ABS(block[i])
-            "psubusw %%mm6, %%mm0		\n\t" // ABS(block[i]) + bias[0]
-            "pmulhw %%mm5, %%mm0		\n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
-            "por %%mm0, %%mm4			\n\t" 
-            "pxor %%mm1, %%mm0			\n\t" 
-            "psubw %%mm1, %%mm0			\n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
-            "movq %%mm0, (%5, %%"REG_a")	\n\t"
-            "pcmpeqw %%mm7, %%mm0		\n\t" // out==0 ? 0xFF : 0x00
-            "movq (%4, %%"REG_a"), %%mm1	\n\t" 
-            "movq %%mm7, (%1, %%"REG_a")	\n\t" // 0
-            "pandn %%mm1, %%mm0			\n\t"
-	    PMAXW(%%mm0, %%mm3)
-            "add $8, %%"REG_a"			\n\t"
-            " js 1b				\n\t"
-	    PMAX(%%mm3, %%mm0)
-            "movd %%mm3, %%"REG_a"		\n\t"
-            "movzb %%al, %%"REG_a"		\n\t" // last_non_zero_p1
-	    : "+a" (last_non_zero_p1)
+            "pxor %%mm7, %%mm7                  \n\t" // 0
+            "pxor %%mm4, %%mm4                  \n\t" // 0
+            "movq (%2), %%mm5                   \n\t" // qmat[0]
+            "pxor %%mm6, %%mm6                  \n\t"
+            "psubw (%3), %%mm6                  \n\t" // -bias[0]
+            "mov $-128, %%"REG_a"               \n\t"
+            ".balign 16                         \n\t"
+            "1:                                 \n\t"
+            "pxor %%mm1, %%mm1                  \n\t" // 0
+            "movq (%1, %%"REG_a"), %%mm0        \n\t" // block[i]
+            "pcmpgtw %%mm0, %%mm1               \n\t" // block[i] <= 0 ? 0xFF : 0x00
+            "pxor %%mm1, %%mm0                  \n\t"
+            "psubw %%mm1, %%mm0                 \n\t" // ABS(block[i])
+            "psubusw %%mm6, %%mm0               \n\t" // ABS(block[i]) + bias[0]
+            "pmulhw %%mm5, %%mm0                \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
+            "por %%mm0, %%mm4                   \n\t"
+            "pxor %%mm1, %%mm0                  \n\t"
+            "psubw %%mm1, %%mm0                 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
+            "movq %%mm0, (%5, %%"REG_a")        \n\t"
+            "pcmpeqw %%mm7, %%mm0               \n\t" // out==0 ? 0xFF : 0x00
+            "movq (%4, %%"REG_a"), %%mm1        \n\t"
+            "movq %%mm7, (%1, %%"REG_a")        \n\t" // 0
+            "pandn %%mm1, %%mm0                 \n\t"
+            PMAXW(%%mm0, %%mm3)
+            "add $8, %%"REG_a"                  \n\t"
+            " js 1b                             \n\t"
+            PMAX(%%mm3, %%mm0)
+            "movd %%mm3, %%"REG_a"              \n\t"
+            "movzb %%al, %%"REG_a"              \n\t" // last_non_zero_p1
+            : "+a" (last_non_zero_p1)
             : "r" (block+64), "r" (qmat), "r" (bias),
               "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
         );
         // note the asm is split cuz gcc doesnt like that many operands ...
         asm volatile(
-            "movd %1, %%mm1			\n\t" // max_qcoeff
-	    SPREADW(%%mm1)
-            "psubusw %%mm1, %%mm4		\n\t" 
-            "packuswb %%mm4, %%mm4		\n\t"
-            "movd %%mm4, %0			\n\t" // *overflow
+            "movd %1, %%mm1                     \n\t" // max_qcoeff
+            SPREADW(%%mm1)
+            "psubusw %%mm1, %%mm4               \n\t"
+            "packuswb %%mm4, %%mm4              \n\t"
+            "movd %%mm4, %0                     \n\t" // *overflow
         : "=g" (*overflow)
         : "g" (s->max_qcoeff)
         );
     }else{ // FMT_H263
         asm volatile(
-            "movd %%"REG_a", %%mm3		\n\t" // last_non_zero_p1
+            "movd %%"REG_a", %%mm3              \n\t" // last_non_zero_p1
             SPREADW(%%mm3)
-            "pxor %%mm7, %%mm7			\n\t" // 0
-            "pxor %%mm4, %%mm4			\n\t" // 0
-            "mov $-128, %%"REG_a"		\n\t"
-            ".balign 16				\n\t"
-            "1:					\n\t"
-            "pxor %%mm1, %%mm1			\n\t" // 0
-            "movq (%1, %%"REG_a"), %%mm0	\n\t" // block[i]
-            "pcmpgtw %%mm0, %%mm1		\n\t" // block[i] <= 0 ? 0xFF : 0x00
-            "pxor %%mm1, %%mm0			\n\t" 
-            "psubw %%mm1, %%mm0			\n\t" // ABS(block[i])
-            "movq (%3, %%"REG_a"), %%mm6	\n\t" // bias[0]
-            "paddusw %%mm6, %%mm0		\n\t" // ABS(block[i]) + bias[0]
-            "movq (%2, %%"REG_a"), %%mm5		\n\t" // qmat[i]
-            "pmulhw %%mm5, %%mm0		\n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
-            "por %%mm0, %%mm4			\n\t" 
-            "pxor %%mm1, %%mm0			\n\t" 
-            "psubw %%mm1, %%mm0			\n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
-            "movq %%mm0, (%5, %%"REG_a")	\n\t"
-            "pcmpeqw %%mm7, %%mm0		\n\t" // out==0 ? 0xFF : 0x00
-            "movq (%4, %%"REG_a"), %%mm1		\n\t" 
-            "movq %%mm7, (%1, %%"REG_a")		\n\t" // 0
-            "pandn %%mm1, %%mm0			\n\t"
-	    PMAXW(%%mm0, %%mm3)
-            "add $8, %%"REG_a"			\n\t"
-            " js 1b				\n\t"
-	    PMAX(%%mm3, %%mm0)
-            "movd %%mm3, %%"REG_a"		\n\t"
-            "movzb %%al, %%"REG_a"		\n\t" // last_non_zero_p1
-	    : "+a" (last_non_zero_p1)
+            "pxor %%mm7, %%mm7                  \n\t" // 0
+            "pxor %%mm4, %%mm4                  \n\t" // 0
+            "mov $-128, %%"REG_a"               \n\t"
+            ".balign 16                         \n\t"
+            "1:                                 \n\t"
+            "pxor %%mm1, %%mm1                  \n\t" // 0
+            "movq (%1, %%"REG_a"), %%mm0        \n\t" // block[i]
+            "pcmpgtw %%mm0, %%mm1               \n\t" // block[i] <= 0 ? 0xFF : 0x00
+            "pxor %%mm1, %%mm0                  \n\t"
+            "psubw %%mm1, %%mm0                 \n\t" // ABS(block[i])
+            "movq (%3, %%"REG_a"), %%mm6        \n\t" // bias[0]
+            "paddusw %%mm6, %%mm0               \n\t" // ABS(block[i]) + bias[0]
+            "movq (%2, %%"REG_a"), %%mm5        \n\t" // qmat[i]
+            "pmulhw %%mm5, %%mm0                \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
+            "por %%mm0, %%mm4                   \n\t"
+            "pxor %%mm1, %%mm0                  \n\t"
+            "psubw %%mm1, %%mm0                 \n\t" // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
+            "movq %%mm0, (%5, %%"REG_a")        \n\t"
+            "pcmpeqw %%mm7, %%mm0               \n\t" // out==0 ? 0xFF : 0x00
+            "movq (%4, %%"REG_a"), %%mm1        \n\t"
+            "movq %%mm7, (%1, %%"REG_a")        \n\t" // 0
+            "pandn %%mm1, %%mm0                 \n\t"
+            PMAXW(%%mm0, %%mm3)
+            "add $8, %%"REG_a"                  \n\t"
+            " js 1b                             \n\t"
+            PMAX(%%mm3, %%mm0)
+            "movd %%mm3, %%"REG_a"              \n\t"
+            "movzb %%al, %%"REG_a"              \n\t" // last_non_zero_p1
+            : "+a" (last_non_zero_p1)
             : "r" (block+64), "r" (qmat+64), "r" (bias+64),
               "r" (inv_zigzag_direct16+64), "r" (temp_block+64)
         );
         // note the asm is split cuz gcc doesnt like that many operands ...
         asm volatile(
-            "movd %1, %%mm1			\n\t" // max_qcoeff
-	    SPREADW(%%mm1)
-            "psubusw %%mm1, %%mm4		\n\t" 
-            "packuswb %%mm4, %%mm4		\n\t"
-            "movd %%mm4, %0			\n\t" // *overflow
+            "movd %1, %%mm1                     \n\t" // max_qcoeff
+            SPREADW(%%mm1)
+            "psubusw %%mm1, %%mm4               \n\t"
+            "packuswb %%mm4, %%mm4              \n\t"
+            "movd %%mm4, %0                     \n\t" // *overflow
         : "=g" (*overflow)
         : "g" (s->max_qcoeff)
         );
@@ -201,135 +201,135 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
 
     if(s->dsp.idct_permutation_type == FF_SIMPLE_IDCT_PERM){
         if(last_non_zero_p1 <= 1) goto end;
-        block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08]; 
-        block[0x20] = temp_block[0x10]; 
+        block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08];
+        block[0x20] = temp_block[0x10];
         if(last_non_zero_p1 <= 4) goto end;
-        block[0x18] = temp_block[0x09]; block[0x04] = temp_block[0x02]; 
-        block[0x09] = temp_block[0x03]; 
+        block[0x18] = temp_block[0x09]; block[0x04] = temp_block[0x02];
+        block[0x09] = temp_block[0x03];
         if(last_non_zero_p1 <= 7) goto end;
-        block[0x14] = temp_block[0x0A]; block[0x28] = temp_block[0x11]; 
-        block[0x12] = temp_block[0x18]; block[0x02] = temp_block[0x20]; 
+        block[0x14] = temp_block[0x0A]; block[0x28] = temp_block[0x11];
+        block[0x12] = temp_block[0x18]; block[0x02] = temp_block[0x20];
         if(last_non_zero_p1 <= 11) goto end;
-        block[0x1A] = temp_block[0x19]; block[0x24] = temp_block[0x12]; 
-        block[0x19] = temp_block[0x0B]; block[0x01] = temp_block[0x04]; 
-        block[0x0C] = temp_block[0x05]; 
+        block[0x1A] = temp_block[0x19]; block[0x24] = temp_block[0x12];
+        block[0x19] = temp_block[0x0B]; block[0x01] = temp_block[0x04];
+        block[0x0C] = temp_block[0x05];
         if(last_non_zero_p1 <= 16) goto end;
-        block[0x11] = temp_block[0x0C]; block[0x29] = temp_block[0x13]; 
-        block[0x16] = temp_block[0x1A]; block[0x0A] = temp_block[0x21]; 
-        block[0x30] = temp_block[0x28]; block[0x22] = temp_block[0x30]; 
-        block[0x38] = temp_block[0x29]; block[0x06] = temp_block[0x22]; 
+        block[0x11] = temp_block[0x0C]; block[0x29] = temp_block[0x13];
+        block[0x16] = temp_block[0x1A]; block[0x0A] = temp_block[0x21];
+        block[0x30] = temp_block[0x28]; block[0x22] = temp_block[0x30];
+        block[0x38] = temp_block[0x29]; block[0x06] = temp_block[0x22];
         if(last_non_zero_p1 <= 24) goto end;
-        block[0x1B] = temp_block[0x1B]; block[0x21] = temp_block[0x14]; 
-        block[0x1C] = temp_block[0x0D]; block[0x05] = temp_block[0x06]; 
-        block[0x0D] = temp_block[0x07]; block[0x15] = temp_block[0x0E]; 
-        block[0x2C] = temp_block[0x15]; block[0x13] = temp_block[0x1C]; 
+        block[0x1B] = temp_block[0x1B]; block[0x21] = temp_block[0x14];
+        block[0x1C] = temp_block[0x0D]; block[0x05] = temp_block[0x06];
+        block[0x0D] = temp_block[0x07]; block[0x15] = temp_block[0x0E];
+        block[0x2C] = temp_block[0x15]; block[0x13] = temp_block[0x1C];
         if(last_non_zero_p1 <= 32) goto end;
-        block[0x0B] = temp_block[0x23]; block[0x34] = temp_block[0x2A]; 
-        block[0x2A] = temp_block[0x31]; block[0x32] = temp_block[0x38]; 
-        block[0x3A] = temp_block[0x39]; block[0x26] = temp_block[0x32]; 
-        block[0x39] = temp_block[0x2B]; block[0x03] = temp_block[0x24]; 
+        block[0x0B] = temp_block[0x23]; block[0x34] = temp_block[0x2A];
+        block[0x2A] = temp_block[0x31]; block[0x32] = temp_block[0x38];
+        block[0x3A] = temp_block[0x39]; block[0x26] = temp_block[0x32];
+        block[0x39] = temp_block[0x2B]; block[0x03] = temp_block[0x24];
         if(last_non_zero_p1 <= 40) goto end;
-        block[0x1E] = temp_block[0x1D]; block[0x25] = temp_block[0x16]; 
-        block[0x1D] = temp_block[0x0F]; block[0x2D] = temp_block[0x17]; 
-        block[0x17] = temp_block[0x1E]; block[0x0E] = temp_block[0x25]; 
-        block[0x31] = temp_block[0x2C]; block[0x2B] = temp_block[0x33]; 
+        block[0x1E] = temp_block[0x1D]; block[0x25] = temp_block[0x16];
+        block[0x1D] = temp_block[0x0F]; block[0x2D] = temp_block[0x17];
+        block[0x17] = temp_block[0x1E]; block[0x0E] = temp_block[0x25];
+        block[0x31] = temp_block[0x2C]; block[0x2B] = temp_block[0x33];
         if(last_non_zero_p1 <= 48) goto end;
-        block[0x36] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B]; 
-        block[0x23] = temp_block[0x34]; block[0x3C] = temp_block[0x2D]; 
-        block[0x07] = temp_block[0x26]; block[0x1F] = temp_block[0x1F]; 
-        block[0x0F] = temp_block[0x27]; block[0x35] = temp_block[0x2E]; 
+        block[0x36] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
+        block[0x23] = temp_block[0x34]; block[0x3C] = temp_block[0x2D];
+        block[0x07] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
+        block[0x0F] = temp_block[0x27]; block[0x35] = temp_block[0x2E];
         if(last_non_zero_p1 <= 56) goto end;
-        block[0x2E] = temp_block[0x35]; block[0x33] = temp_block[0x3C]; 
-        block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36]; 
-        block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37]; 
+        block[0x2E] = temp_block[0x35]; block[0x33] = temp_block[0x3C];
+        block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36];
+        block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37];
         block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
     }else if(s->dsp.idct_permutation_type == FF_LIBMPEG2_IDCT_PERM){
         if(last_non_zero_p1 <= 1) goto end;
-        block[0x04] = temp_block[0x01]; 
-        block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10]; 
+        block[0x04] = temp_block[0x01];
+        block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
         if(last_non_zero_p1 <= 4) goto end;
-        block[0x0C] = temp_block[0x09]; block[0x01] = temp_block[0x02]; 
-        block[0x05] = temp_block[0x03]; 
+        block[0x0C] = temp_block[0x09]; block[0x01] = temp_block[0x02];
+        block[0x05] = temp_block[0x03];
         if(last_non_zero_p1 <= 7) goto end;
-        block[0x09] = temp_block[0x0A]; block[0x14] = temp_block[0x11]; 
-        block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20]; 
+        block[0x09] = temp_block[0x0A]; block[0x14] = temp_block[0x11];
+        block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
         if(last_non_zero_p1 <= 11) goto end;
-        block[0x1C] = temp_block[0x19]; 
-        block[0x11] = temp_block[0x12]; block[0x0D] = temp_block[0x0B]; 
-        block[0x02] = temp_block[0x04]; block[0x06] = temp_block[0x05]; 
+        block[0x1C] = temp_block[0x19];
+        block[0x11] = temp_block[0x12]; block[0x0D] = temp_block[0x0B];
+        block[0x02] = temp_block[0x04]; block[0x06] = temp_block[0x05];
         if(last_non_zero_p1 <= 16) goto end;
-        block[0x0A] = temp_block[0x0C]; block[0x15] = temp_block[0x13]; 
-        block[0x19] = temp_block[0x1A]; block[0x24] = temp_block[0x21]; 
-        block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30]; 
-        block[0x2C] = temp_block[0x29]; block[0x21] = temp_block[0x22]; 
+        block[0x0A] = temp_block[0x0C]; block[0x15] = temp_block[0x13];
+        block[0x19] = temp_block[0x1A]; block[0x24] = temp_block[0x21];
+        block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
+        block[0x2C] = temp_block[0x29]; block[0x21] = temp_block[0x22];
         if(last_non_zero_p1 <= 24) goto end;
-        block[0x1D] = temp_block[0x1B]; block[0x12] = temp_block[0x14]; 
-        block[0x0E] = temp_block[0x0D]; block[0x03] = temp_block[0x06]; 
-        block[0x07] = temp_block[0x07]; block[0x0B] = temp_block[0x0E]; 
-        block[0x16] = temp_block[0x15]; block[0x1A] = temp_block[0x1C]; 
+        block[0x1D] = temp_block[0x1B]; block[0x12] = temp_block[0x14];
+        block[0x0E] = temp_block[0x0D]; block[0x03] = temp_block[0x06];
+        block[0x07] = temp_block[0x07]; block[0x0B] = temp_block[0x0E];
+        block[0x16] = temp_block[0x15]; block[0x1A] = temp_block[0x1C];
         if(last_non_zero_p1 <= 32) goto end;
-        block[0x25] = temp_block[0x23]; block[0x29] = temp_block[0x2A]; 
-        block[0x34] = temp_block[0x31]; block[0x38] = temp_block[0x38]; 
-        block[0x3C] = temp_block[0x39]; block[0x31] = temp_block[0x32]; 
-        block[0x2D] = temp_block[0x2B]; block[0x22] = temp_block[0x24]; 
+        block[0x25] = temp_block[0x23]; block[0x29] = temp_block[0x2A];
+        block[0x34] = temp_block[0x31]; block[0x38] = temp_block[0x38];
+        block[0x3C] = temp_block[0x39]; block[0x31] = temp_block[0x32];
+        block[0x2D] = temp_block[0x2B]; block[0x22] = temp_block[0x24];
         if(last_non_zero_p1 <= 40) goto end;
-        block[0x1E] = temp_block[0x1D]; block[0x13] = temp_block[0x16]; 
-        block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17]; 
-        block[0x1B] = temp_block[0x1E]; block[0x26] = temp_block[0x25]; 
-        block[0x2A] = temp_block[0x2C]; block[0x35] = temp_block[0x33]; 
+        block[0x1E] = temp_block[0x1D]; block[0x13] = temp_block[0x16];
+        block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
+        block[0x1B] = temp_block[0x1E]; block[0x26] = temp_block[0x25];
+        block[0x2A] = temp_block[0x2C]; block[0x35] = temp_block[0x33];
         if(last_non_zero_p1 <= 48) goto end;
-        block[0x39] = temp_block[0x3A]; block[0x3D] = temp_block[0x3B]; 
-        block[0x32] = temp_block[0x34]; block[0x2E] = temp_block[0x2D]; 
-            block[0x23] = temp_block[0x26]; block[0x1F] = temp_block[0x1F]; 
-        block[0x27] = temp_block[0x27]; block[0x2B] = temp_block[0x2E]; 
+        block[0x39] = temp_block[0x3A]; block[0x3D] = temp_block[0x3B];
+        block[0x32] = temp_block[0x34]; block[0x2E] = temp_block[0x2D];
+            block[0x23] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
+        block[0x27] = temp_block[0x27]; block[0x2B] = temp_block[0x2E];
         if(last_non_zero_p1 <= 56) goto end;
-        block[0x36] = temp_block[0x35]; block[0x3A] = temp_block[0x3C]; 
-        block[0x3E] = temp_block[0x3D]; block[0x33] = temp_block[0x36]; 
-        block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37]; 
+        block[0x36] = temp_block[0x35]; block[0x3A] = temp_block[0x3C];
+        block[0x3E] = temp_block[0x3D]; block[0x33] = temp_block[0x36];
+        block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
         block[0x3B] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
     }else{
         if(last_non_zero_p1 <= 1) goto end;
-        block[0x01] = temp_block[0x01]; 
-        block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10]; 
+        block[0x01] = temp_block[0x01];
+        block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10];
         if(last_non_zero_p1 <= 4) goto end;
-        block[0x09] = temp_block[0x09]; block[0x02] = temp_block[0x02]; 
-        block[0x03] = temp_block[0x03]; 
+        block[0x09] = temp_block[0x09]; block[0x02] = temp_block[0x02];
+        block[0x03] = temp_block[0x03];
         if(last_non_zero_p1 <= 7) goto end;
-        block[0x0A] = temp_block[0x0A]; block[0x11] = temp_block[0x11]; 
-        block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20]; 
+        block[0x0A] = temp_block[0x0A]; block[0x11] = temp_block[0x11];
+        block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20];
         if(last_non_zero_p1 <= 11) goto end;
-        block[0x19] = temp_block[0x19]; 
-        block[0x12] = temp_block[0x12]; block[0x0B] = temp_block[0x0B]; 
-        block[0x04] = temp_block[0x04]; block[0x05] = temp_block[0x05]; 
+        block[0x19] = temp_block[0x19];
+        block[0x12] = temp_block[0x12]; block[0x0B] = temp_block[0x0B];
+        block[0x04] = temp_block[0x04]; block[0x05] = temp_block[0x05];
         if(last_non_zero_p1 <= 16) goto end;
-        block[0x0C] = temp_block[0x0C]; block[0x13] = temp_block[0x13]; 
-        block[0x1A] = temp_block[0x1A]; block[0x21] = temp_block[0x21]; 
-        block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30]; 
-        block[0x29] = temp_block[0x29]; block[0x22] = temp_block[0x22]; 
+        block[0x0C] = temp_block[0x0C]; block[0x13] = temp_block[0x13];
+        block[0x1A] = temp_block[0x1A]; block[0x21] = temp_block[0x21];
+        block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30];
+        block[0x29] = temp_block[0x29]; block[0x22] = temp_block[0x22];
         if(last_non_zero_p1 <= 24) goto end;
-        block[0x1B] = temp_block[0x1B]; block[0x14] = temp_block[0x14]; 
-        block[0x0D] = temp_block[0x0D]; block[0x06] = temp_block[0x06]; 
-        block[0x07] = temp_block[0x07]; block[0x0E] = temp_block[0x0E]; 
-        block[0x15] = temp_block[0x15]; block[0x1C] = temp_block[0x1C]; 
+        block[0x1B] = temp_block[0x1B]; block[0x14] = temp_block[0x14];
+        block[0x0D] = temp_block[0x0D]; block[0x06] = temp_block[0x06];
+        block[0x07] = temp_block[0x07]; block[0x0E] = temp_block[0x0E];
+        block[0x15] = temp_block[0x15]; block[0x1C] = temp_block[0x1C];
         if(last_non_zero_p1 <= 32) goto end;
-        block[0x23] = temp_block[0x23]; block[0x2A] = temp_block[0x2A]; 
-        block[0x31] = temp_block[0x31]; block[0x38] = temp_block[0x38]; 
-        block[0x39] = temp_block[0x39]; block[0x32] = temp_block[0x32]; 
-        block[0x2B] = temp_block[0x2B]; block[0x24] = temp_block[0x24]; 
+        block[0x23] = temp_block[0x23]; block[0x2A] = temp_block[0x2A];
+        block[0x31] = temp_block[0x31]; block[0x38] = temp_block[0x38];
+        block[0x39] = temp_block[0x39]; block[0x32] = temp_block[0x32];
+        block[0x2B] = temp_block[0x2B]; block[0x24] = temp_block[0x24];
         if(last_non_zero_p1 <= 40) goto end;
-        block[0x1D] = temp_block[0x1D]; block[0x16] = temp_block[0x16]; 
-        block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17]; 
-        block[0x1E] = temp_block[0x1E]; block[0x25] = temp_block[0x25]; 
-        block[0x2C] = temp_block[0x2C]; block[0x33] = temp_block[0x33]; 
+        block[0x1D] = temp_block[0x1D]; block[0x16] = temp_block[0x16];
+        block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17];
+        block[0x1E] = temp_block[0x1E]; block[0x25] = temp_block[0x25];
+        block[0x2C] = temp_block[0x2C]; block[0x33] = temp_block[0x33];
         if(last_non_zero_p1 <= 48) goto end;
-        block[0x3A] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B]; 
-        block[0x34] = temp_block[0x34]; block[0x2D] = temp_block[0x2D]; 
-        block[0x26] = temp_block[0x26]; block[0x1F] = temp_block[0x1F]; 
-        block[0x27] = temp_block[0x27]; block[0x2E] = temp_block[0x2E]; 
+        block[0x3A] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B];
+        block[0x34] = temp_block[0x34]; block[0x2D] = temp_block[0x2D];
+        block[0x26] = temp_block[0x26]; block[0x1F] = temp_block[0x1F];
+        block[0x27] = temp_block[0x27]; block[0x2E] = temp_block[0x2E];
         if(last_non_zero_p1 <= 56) goto end;
-        block[0x35] = temp_block[0x35]; block[0x3C] = temp_block[0x3C]; 
-        block[0x3D] = temp_block[0x3D]; block[0x36] = temp_block[0x36]; 
-        block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37]; 
+        block[0x35] = temp_block[0x35]; block[0x3C] = temp_block[0x3C];
+        block[0x3D] = temp_block[0x3D]; block[0x36] = temp_block[0x36];
+        block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37];
         block[0x3E] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
     }
     end:
diff --git a/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c b/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c
index 92a366f21..b033a12b8 100644
--- a/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/simple_idct_mmx.c
@@ -15,7 +15,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "../dsputil.h"
 #include "../simple_idct.h"
@@ -40,8 +40,8 @@
 #define C4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) - 0.5
 #endif
 #define C5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define C6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-#define C7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define C6 8867  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+#define C7 4520  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
 
 #define ROW_SHIFT 11
 #define COL_SHIFT 20 // 6
@@ -50,37 +50,37 @@ static const uint64_t attribute_used __attribute__((aligned(8))) wm1010= 0xFFFF0
 static const uint64_t attribute_used __attribute__((aligned(8))) d40000= 0x0000000000040000ULL;
 
 static const int16_t __attribute__((aligned(8))) coeffs[]= {
-	1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0,
-//	1<<(COL_SHIFT-1), 0, 1<<(COL_SHIFT-1), 0,
-//	0, 1<<(COL_SHIFT-1-16), 0, 1<<(COL_SHIFT-1-16),
-	1<<(ROW_SHIFT-1), 1, 1<<(ROW_SHIFT-1), 0,
-	// the 1 = ((1<<(COL_SHIFT-1))/C4)<<ROW_SHIFT :)
-//	0, 0, 0, 0,
-//	0, 0, 0, 0,
+        1<<(ROW_SHIFT-1), 0, 1<<(ROW_SHIFT-1), 0,
+//        1<<(COL_SHIFT-1), 0, 1<<(COL_SHIFT-1), 0,
+//        0, 1<<(COL_SHIFT-1-16), 0, 1<<(COL_SHIFT-1-16),
+        1<<(ROW_SHIFT-1), 1, 1<<(ROW_SHIFT-1), 0,
+        // the 1 = ((1<<(COL_SHIFT-1))/C4)<<ROW_SHIFT :)
+//        0, 0, 0, 0,
+//        0, 0, 0, 0,
 
  C4,  C4,  C4,  C4,
  C4, -C4,  C4, -C4,
- 
+
  C2,  C6,  C2,  C6,
  C6, -C2,  C6, -C2,
- 
+
  C1,  C3,  C1,  C3,
  C5,  C7,  C5,  C7,
- 
+
  C3, -C7,  C3, -C7,
 -C1, -C5, -C1, -C5,
- 
+
  C5, -C1,  C5, -C1,
  C7,  C3,  C7,  C3,
- 
+
  C7, -C5,  C7, -C5,
  C3, -C1,  C3, -C1
 };
 
 #if 0
 static void unused_var_killer(){
-	int a= wm1010 + d40000;
-	temp[0]=a;
+        int a= wm1010 + d40000;
+        temp[0]=a;
 }
 
 static void inline idctCol (int16_t * col, int16_t *input)
@@ -93,21 +93,21 @@ static void inline idctCol (int16_t * col, int16_t *input)
 #undef C5
 #undef C6
 #undef C7
-	int a0, a1, a2, a3, b0, b1, b2, b3;
-	const int C0 = 23170; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-	const int C1 = 22725; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-	const int C2 = 21407; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-	const int C3 = 19266; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-	const int C4 = 16383; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-	const int C5 = 12873; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-	const int C6 = 8867; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-	const int C7 = 4520; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        int a0, a1, a2, a3, b0, b1, b2, b3;
+        const int C0 = 23170; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C1 = 22725; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C2 = 21407; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C3 = 19266; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C4 = 16383; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C5 = 12873; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C6 = 8867;  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C7 = 4520;  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
 /*
-	if( !(col[8*1] | col[8*2] |col[8*3] |col[8*4] |col[8*5] |col[8*6] | col[8*7])) {
-		col[8*0] = col[8*1] = col[8*2] = col[8*3] = col[8*4] =
-			col[8*5] = col[8*6] = col[8*7] = col[8*0]<<3;
-		return;
-	}*/
+        if( !(col[8*1] | col[8*2] |col[8*3] |col[8*4] |col[8*5] |col[8*6] | col[8*7])) {
+                col[8*0] = col[8*1] = col[8*2] = col[8*3] = col[8*4] =
+                        col[8*5] = col[8*6] = col[8*7] = col[8*0]<<3;
+                return;
+        }*/
 
 col[8*0] = input[8*0 + 0];
 col[8*1] = input[8*2 + 0];
@@ -118,39 +118,39 @@ col[8*5] = input[8*6 + 0];
 col[8*6] = input[8*4 + 1];
 col[8*7] = input[8*6 + 1];
 
-	a0 = C4*col[8*0] + C2*col[8*2] + C4*col[8*4] + C6*col[8*6] + (1<<(COL_SHIFT-1));
-	a1 = C4*col[8*0] + C6*col[8*2] - C4*col[8*4] - C2*col[8*6] + (1<<(COL_SHIFT-1));
-	a2 = C4*col[8*0] - C6*col[8*2] - C4*col[8*4] + C2*col[8*6] + (1<<(COL_SHIFT-1));
-	a3 = C4*col[8*0] - C2*col[8*2] + C4*col[8*4] - C6*col[8*6] + (1<<(COL_SHIFT-1));
-
-	b0 = C1*col[8*1] + C3*col[8*3] + C5*col[8*5] + C7*col[8*7];
-	b1 = C3*col[8*1] - C7*col[8*3] - C1*col[8*5] - C5*col[8*7];
-	b2 = C5*col[8*1] - C1*col[8*3] + C7*col[8*5] + C3*col[8*7];
-	b3 = C7*col[8*1] - C5*col[8*3] + C3*col[8*5] - C1*col[8*7];
-
-	col[8*0] = (a0 + b0) >> COL_SHIFT;
-	col[8*1] = (a1 + b1) >> COL_SHIFT;
-	col[8*2] = (a2 + b2) >> COL_SHIFT;
-	col[8*3] = (a3 + b3) >> COL_SHIFT;
-	col[8*4] = (a3 - b3) >> COL_SHIFT;
-	col[8*5] = (a2 - b2) >> COL_SHIFT;
-	col[8*6] = (a1 - b1) >> COL_SHIFT;
-	col[8*7] = (a0 - b0) >> COL_SHIFT;
+        a0 = C4*col[8*0] + C2*col[8*2] + C4*col[8*4] + C6*col[8*6] + (1<<(COL_SHIFT-1));
+        a1 = C4*col[8*0] + C6*col[8*2] - C4*col[8*4] - C2*col[8*6] + (1<<(COL_SHIFT-1));
+        a2 = C4*col[8*0] - C6*col[8*2] - C4*col[8*4] + C2*col[8*6] + (1<<(COL_SHIFT-1));
+        a3 = C4*col[8*0] - C2*col[8*2] + C4*col[8*4] - C6*col[8*6] + (1<<(COL_SHIFT-1));
+
+        b0 = C1*col[8*1] + C3*col[8*3] + C5*col[8*5] + C7*col[8*7];
+        b1 = C3*col[8*1] - C7*col[8*3] - C1*col[8*5] - C5*col[8*7];
+        b2 = C5*col[8*1] - C1*col[8*3] + C7*col[8*5] + C3*col[8*7];
+        b3 = C7*col[8*1] - C5*col[8*3] + C3*col[8*5] - C1*col[8*7];
+
+        col[8*0] = (a0 + b0) >> COL_SHIFT;
+        col[8*1] = (a1 + b1) >> COL_SHIFT;
+        col[8*2] = (a2 + b2) >> COL_SHIFT;
+        col[8*3] = (a3 + b3) >> COL_SHIFT;
+        col[8*4] = (a3 - b3) >> COL_SHIFT;
+        col[8*5] = (a2 - b2) >> COL_SHIFT;
+        col[8*6] = (a1 - b1) >> COL_SHIFT;
+        col[8*7] = (a0 - b0) >> COL_SHIFT;
 }
 
 static void inline idctRow (int16_t * output, int16_t * input)
 {
-	int16_t row[8];
-
-	int a0, a1, a2, a3, b0, b1, b2, b3;
-	const int C0 = 23170; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-	const int C1 = 22725; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-	const int C2 = 21407; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-	const int C3 = 19266; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-	const int C4 = 16383; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-	const int C5 = 12873; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-	const int C6 = 8867; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
-	const int C7 = 4520; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        int16_t row[8];
+
+        int a0, a1, a2, a3, b0, b1, b2, b3;
+        const int C0 = 23170; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C1 = 22725; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C2 = 21407; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C3 = 19266; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C4 = 16383; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C5 = 12873; //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C6 = 8867;  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
+        const int C7 = 4520;  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
 
 row[0] = input[0];
 row[2] = input[1];
@@ -161,290 +161,290 @@ row[3] = input[9];
 row[5] = input[12];
 row[7] = input[13];
 
-	if( !(row[1] | row[2] |row[3] |row[4] |row[5] |row[6] | row[7]) ) {
-		row[0] = row[1] = row[2] = row[3] = row[4] =
-			row[5] = row[6] = row[7] = row[0]<<3;
-	output[0] = row[0];
-	output[2] = row[1];
-	output[4] = row[2];
-	output[6] = row[3];
-	output[8] = row[4];
-	output[10] = row[5];
-	output[12] = row[6];
-	output[14] = row[7];
-		return;
-	}
-
-	a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + (1<<(ROW_SHIFT-1));
-	a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + (1<<(ROW_SHIFT-1));
-	a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + (1<<(ROW_SHIFT-1));
-	a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + (1<<(ROW_SHIFT-1));
-
-	b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
-	b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
-	b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
-	b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
-
-	row[0] = (a0 + b0) >> ROW_SHIFT;
-	row[1] = (a1 + b1) >> ROW_SHIFT;
-	row[2] = (a2 + b2) >> ROW_SHIFT;
-	row[3] = (a3 + b3) >> ROW_SHIFT;
-	row[4] = (a3 - b3) >> ROW_SHIFT;
-	row[5] = (a2 - b2) >> ROW_SHIFT;
-	row[6] = (a1 - b1) >> ROW_SHIFT;
-	row[7] = (a0 - b0) >> ROW_SHIFT;
-
-	output[0] = row[0];
-	output[2] = row[1];
-	output[4] = row[2];
-	output[6] = row[3];
-	output[8] = row[4];
-	output[10] = row[5];
-	output[12] = row[6];
-	output[14] = row[7];
+        if( !(row[1] | row[2] |row[3] |row[4] |row[5] |row[6] | row[7]) ) {
+                row[0] = row[1] = row[2] = row[3] = row[4] =
+                        row[5] = row[6] = row[7] = row[0]<<3;
+        output[0]  = row[0];
+        output[2]  = row[1];
+        output[4]  = row[2];
+        output[6]  = row[3];
+        output[8]  = row[4];
+        output[10] = row[5];
+        output[12] = row[6];
+        output[14] = row[7];
+                return;
+        }
+
+        a0 = C4*row[0] + C2*row[2] + C4*row[4] + C6*row[6] + (1<<(ROW_SHIFT-1));
+        a1 = C4*row[0] + C6*row[2] - C4*row[4] - C2*row[6] + (1<<(ROW_SHIFT-1));
+        a2 = C4*row[0] - C6*row[2] - C4*row[4] + C2*row[6] + (1<<(ROW_SHIFT-1));
+        a3 = C4*row[0] - C2*row[2] + C4*row[4] - C6*row[6] + (1<<(ROW_SHIFT-1));
+
+        b0 = C1*row[1] + C3*row[3] + C5*row[5] + C7*row[7];
+        b1 = C3*row[1] - C7*row[3] - C1*row[5] - C5*row[7];
+        b2 = C5*row[1] - C1*row[3] + C7*row[5] + C3*row[7];
+        b3 = C7*row[1] - C5*row[3] + C3*row[5] - C1*row[7];
+
+        row[0] = (a0 + b0) >> ROW_SHIFT;
+        row[1] = (a1 + b1) >> ROW_SHIFT;
+        row[2] = (a2 + b2) >> ROW_SHIFT;
+        row[3] = (a3 + b3) >> ROW_SHIFT;
+        row[4] = (a3 - b3) >> ROW_SHIFT;
+        row[5] = (a2 - b2) >> ROW_SHIFT;
+        row[6] = (a1 - b1) >> ROW_SHIFT;
+        row[7] = (a0 - b0) >> ROW_SHIFT;
+
+        output[0]  = row[0];
+        output[2]  = row[1];
+        output[4]  = row[2];
+        output[6]  = row[3];
+        output[8]  = row[4];
+        output[10] = row[5];
+        output[12] = row[6];
+        output[14] = row[7];
 }
 #endif
 
 static inline void idct(int16_t *block)
 {
-	int64_t __attribute__((aligned(8))) align_tmp[16];
-	int16_t * const temp= (int16_t*)align_tmp;
+        int64_t __attribute__((aligned(8))) align_tmp[16];
+        int16_t * const temp= (int16_t*)align_tmp;
 
-	asm volatile(
+        asm volatile(
 #if 0 //Alternative, simpler variant
 
 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
-	"movq " #src0 ", %%mm0			\n\t" /* R4	R0	r4	r0 */\
-	"movq " #src4 ", %%mm1			\n\t" /* R6	R2	r6	r2 */\
-	"movq " #src1 ", %%mm2			\n\t" /* R3	R1	r3	r1 */\
-	"movq " #src5 ", %%mm3			\n\t" /* R7	R5	r7	r5 */\
-	"movq 16(%2), %%mm4			\n\t" /* C4	C4	C4	C4 */\
-	"pmaddwd %%mm0, %%mm4			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 24(%2), %%mm5			\n\t" /* -C4	C4	-C4	C4 */\
-	"pmaddwd %%mm5, %%mm0			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"movq 32(%2), %%mm5			\n\t" /* C6	C2	C6	C2 */\
-	"pmaddwd %%mm1, %%mm5			\n\t" /* C6R6+C2R2	C6r6+C2r2 */\
-	"movq 40(%2), %%mm6			\n\t" /* -C2	C6	-C2	C6 */\
-	"pmaddwd %%mm6, %%mm1			\n\t" /* -C2R6+C6R2	-C2r6+C6r2 */\
-	"movq 48(%2), %%mm7			\n\t" /* C3	C1	C3	C1 */\
-	"pmaddwd %%mm2, %%mm7			\n\t" /* C3R3+C1R1	C3r3+C1r1 */\
-	#rounder ", %%mm4			\n\t"\
-	"movq %%mm4, %%mm6			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"paddd %%mm5, %%mm4			\n\t" /* A0		a0 */\
-	"psubd %%mm5, %%mm6			\n\t" /* A3		a3 */\
-	"movq 56(%2), %%mm5			\n\t" /* C7	C5	C7	C5 */\
-	"pmaddwd %%mm3, %%mm5			\n\t" /* C7R7+C5R5	C7r7+C5r5 */\
-	#rounder ", %%mm0			\n\t"\
-	"paddd %%mm0, %%mm1			\n\t" /* A1		a1 */\
-	"paddd %%mm0, %%mm0			\n\t" \
-	"psubd %%mm1, %%mm0			\n\t" /* A2		a2 */\
-	"pmaddwd 64(%2), %%mm2			\n\t" /* -C7R3+C3R1	-C7r3+C3r1 */\
-	"paddd %%mm5, %%mm7			\n\t" /* B0		b0 */\
-	"movq 72(%2), %%mm5			\n\t" /* -C5	-C1	-C5	-C1 */\
-	"pmaddwd %%mm3, %%mm5			\n\t" /* -C5R7-C1R5	-C5r7-C1r5 */\
-	"paddd %%mm4, %%mm7			\n\t" /* A0+B0		a0+b0 */\
-	"paddd %%mm4, %%mm4			\n\t" /* 2A0		2a0 */\
-	"psubd %%mm7, %%mm4			\n\t" /* A0-B0		a0-b0 */\
-	"paddd %%mm2, %%mm5			\n\t" /* B1		b1 */\
-	"psrad $" #shift ", %%mm7		\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"movq %%mm1, %%mm2			\n\t" /* A1		a1 */\
-	"paddd %%mm5, %%mm1			\n\t" /* A1+B1		a1+b1 */\
-	"psubd %%mm5, %%mm2			\n\t" /* A1-B1		a1-b1 */\
-	"psrad $" #shift ", %%mm1		\n\t"\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"packssdw %%mm1, %%mm7			\n\t" /* A1+B1	a1+b1	A0+B0	a0+b0 */\
-	"packssdw %%mm4, %%mm2			\n\t" /* A0-B0	a0-b0	A1-B1	a1-b1 */\
-	"movq %%mm7, " #dst "			\n\t"\
-	"movq " #src1 ", %%mm1			\n\t" /* R3	R1	r3	r1 */\
-	"movq 80(%2), %%mm4			\n\t" /* -C1	C5	-C1 	C5 */\
-	"movq %%mm2, 24+" #dst "		\n\t"\
-	"pmaddwd %%mm1, %%mm4			\n\t" /* -C1R3+C5R1	-C1r3+C5r1 */\
-	"movq 88(%2), %%mm7			\n\t" /* C3	C7	C3 	C7 */\
-	"pmaddwd 96(%2), %%mm1			\n\t" /* -C5R3+C7R1	-C5r3+C7r1 */\
-	"pmaddwd %%mm3, %%mm7			\n\t" /* C3R7+C7R5	C3r7+C7r5 */\
-	"movq %%mm0, %%mm2			\n\t" /* A2		a2 */\
-	"pmaddwd 104(%2), %%mm3			\n\t" /* -C1R7+C3R5	-C1r7+C3r5 */\
-	"paddd %%mm7, %%mm4			\n\t" /* B2		b2 */\
-	"paddd %%mm4, %%mm2			\n\t" /* A2+B2		a2+b2 */\
-	"psubd %%mm4, %%mm0			\n\t" /* a2-B2		a2-b2 */\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"psrad $" #shift ", %%mm0		\n\t"\
-	"movq %%mm6, %%mm4			\n\t" /* A3		a3 */\
-	"paddd %%mm1, %%mm3			\n\t" /* B3		b3 */\
-	"paddd %%mm3, %%mm6			\n\t" /* A3+B3		a3+b3 */\
-	"psubd %%mm3, %%mm4			\n\t" /* a3-B3		a3-b3 */\
-	"psrad $" #shift ", %%mm6		\n\t"\
-	"packssdw %%mm6, %%mm2			\n\t" /* A3+B3	a3+b3	A2+B2	a2+b2 */\
-	"movq %%mm2, 8+" #dst "			\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"packssdw %%mm0, %%mm4			\n\t" /* A2-B2	a2-b2	A3-B3	a3-b3 */\
-	"movq %%mm4, 16+" #dst "		\n\t"\
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        #rounder ", %%mm4               \n\t"\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq 56(%2), %%mm5             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        #rounder ", %%mm0               \n\t"\
+        "paddd %%mm0, %%mm1             \n\t" /* A1             a1 */\
+        "paddd %%mm0, %%mm0             \n\t" \
+        "psubd %%mm1, %%mm0             \n\t" /* A2             a2 */\
+        "pmaddwd 64(%2), %%mm2          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm5, %%mm7             \n\t" /* B0             b0 */\
+        "movq 72(%2), %%mm5             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "paddd %%mm2, %%mm5             \n\t" /* B1             b1 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm1, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm5, %%mm1             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm5, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm1, %%mm7          \n\t" /* A1+B1  a1+b1   A0+B0   a0+b0 */\
+        "packssdw %%mm4, %%mm2          \n\t" /* A0-B0  a0-b0   A1-B1   a1-b1 */\
+        "movq %%mm7, " #dst "           \n\t"\
+        "movq " #src1 ", %%mm1          \n\t" /* R3     R1      r3      r1 */\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "movq %%mm2, 24+" #dst "        \n\t"\
+        "pmaddwd %%mm1, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "movq 88(%2), %%mm7             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd 96(%2), %%mm1          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm0, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm7, %%mm4             \n\t" /* B2             b2 */\
+        "paddd %%mm4, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm0             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm1, %%mm3             \n\t" /* B3             b3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "packssdw %%mm6, %%mm2          \n\t" /* A3+B3  a3+b3   A2+B2   a2+b2 */\
+        "movq %%mm2, 8+" #dst "         \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm0, %%mm4          \n\t" /* A2-B2  a2-b2   A3-B3   a3-b3 */\
+        "movq %%mm4, 16+" #dst "        \n\t"\
 
 #define COL_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
-	"movq " #src0 ", %%mm0			\n\t" /* R4	R0	r4	r0 */\
-	"movq " #src4 ", %%mm1			\n\t" /* R6	R2	r6	r2 */\
-	"movq " #src1 ", %%mm2			\n\t" /* R3	R1	r3	r1 */\
-	"movq " #src5 ", %%mm3			\n\t" /* R7	R5	r7	r5 */\
-	"movq 16(%2), %%mm4			\n\t" /* C4	C4	C4	C4 */\
-	"pmaddwd %%mm0, %%mm4			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 24(%2), %%mm5			\n\t" /* -C4	C4	-C4	C4 */\
-	"pmaddwd %%mm5, %%mm0			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"movq 32(%2), %%mm5			\n\t" /* C6	C2	C6	C2 */\
-	"pmaddwd %%mm1, %%mm5			\n\t" /* C6R6+C2R2	C6r6+C2r2 */\
-	"movq 40(%2), %%mm6			\n\t" /* -C2	C6	-C2	C6 */\
-	"pmaddwd %%mm6, %%mm1			\n\t" /* -C2R6+C6R2	-C2r6+C6r2 */\
-	#rounder ", %%mm4			\n\t"\
-	"movq %%mm4, %%mm6			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 48(%2), %%mm7			\n\t" /* C3	C1	C3	C1 */\
-	#rounder ", %%mm0			\n\t"\
-	"pmaddwd %%mm2, %%mm7			\n\t" /* C3R3+C1R1	C3r3+C1r1 */\
-	"paddd %%mm5, %%mm4			\n\t" /* A0		a0 */\
-	"psubd %%mm5, %%mm6			\n\t" /* A3		a3 */\
-	"movq %%mm0, %%mm5			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"paddd %%mm1, %%mm0			\n\t" /* A1		a1 */\
-	"psubd %%mm1, %%mm5			\n\t" /* A2		a2 */\
-	"movq 56(%2), %%mm1			\n\t" /* C7	C5	C7	C5 */\
-	"pmaddwd %%mm3, %%mm1			\n\t" /* C7R7+C5R5	C7r7+C5r5 */\
-	"pmaddwd 64(%2), %%mm2			\n\t" /* -C7R3+C3R1	-C7r3+C3r1 */\
-	"paddd %%mm1, %%mm7			\n\t" /* B0		b0 */\
-	"movq 72(%2), %%mm1			\n\t" /* -C5	-C1	-C5	-C1 */\
-	"pmaddwd %%mm3, %%mm1			\n\t" /* -C5R7-C1R5	-C5r7-C1r5 */\
-	"paddd %%mm4, %%mm7			\n\t" /* A0+B0		a0+b0 */\
-	"paddd %%mm4, %%mm4			\n\t" /* 2A0		2a0 */\
-	"psubd %%mm7, %%mm4			\n\t" /* A0-B0		a0-b0 */\
-	"paddd %%mm2, %%mm1			\n\t" /* B1		b1 */\
-	"psrad $" #shift ", %%mm7		\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"movq %%mm0, %%mm2			\n\t" /* A1		a1 */\
-	"paddd %%mm1, %%mm0			\n\t" /* A1+B1		a1+b1 */\
-	"psubd %%mm1, %%mm2			\n\t" /* A1-B1		a1-b1 */\
-	"psrad $" #shift ", %%mm0		\n\t"\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"packssdw %%mm7, %%mm7			\n\t" /* A0+B0	a0+b0 */\
-	"movd %%mm7, " #dst "			\n\t"\
-	"packssdw %%mm0, %%mm0			\n\t" /* A1+B1	a1+b1 */\
-	"movd %%mm0, 16+" #dst "		\n\t"\
-	"packssdw %%mm2, %%mm2			\n\t" /* A1-B1	a1-b1 */\
-	"movd %%mm2, 96+" #dst "		\n\t"\
-	"packssdw %%mm4, %%mm4			\n\t" /* A0-B0	a0-b0 */\
-	"movd %%mm4, 112+" #dst "		\n\t"\
-	"movq " #src1 ", %%mm0			\n\t" /* R3	R1	r3	r1 */\
-	"movq 80(%2), %%mm4			\n\t" /* -C1	C5	-C1 	C5 */\
-	"pmaddwd %%mm0, %%mm4			\n\t" /* -C1R3+C5R1	-C1r3+C5r1 */\
-	"movq 88(%2), %%mm7			\n\t" /* C3	C7	C3 	C7 */\
-	"pmaddwd 96(%2), %%mm0			\n\t" /* -C5R3+C7R1	-C5r3+C7r1 */\
-	"pmaddwd %%mm3, %%mm7			\n\t" /* C3R7+C7R5	C3r7+C7r5 */\
-	"movq %%mm5, %%mm2			\n\t" /* A2		a2 */\
-	"pmaddwd 104(%2), %%mm3			\n\t" /* -C1R7+C3R5	-C1r7+C3r5 */\
-	"paddd %%mm7, %%mm4			\n\t" /* B2		b2 */\
-	"paddd %%mm4, %%mm2			\n\t" /* A2+B2		a2+b2 */\
-	"psubd %%mm4, %%mm5			\n\t" /* a2-B2		a2-b2 */\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"psrad $" #shift ", %%mm5		\n\t"\
-	"movq %%mm6, %%mm4			\n\t" /* A3		a3 */\
-	"paddd %%mm0, %%mm3			\n\t" /* B3		b3 */\
-	"paddd %%mm3, %%mm6			\n\t" /* A3+B3		a3+b3 */\
-	"psubd %%mm3, %%mm4			\n\t" /* a3-B3		a3-b3 */\
-	"psrad $" #shift ", %%mm6		\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"packssdw %%mm2, %%mm2			\n\t" /* A2+B2	a2+b2 */\
-	"packssdw %%mm6, %%mm6			\n\t" /* A3+B3	a3+b3 */\
-	"movd %%mm2, 32+" #dst "		\n\t"\
-	"packssdw %%mm4, %%mm4			\n\t" /* A3-B3	a3-b3 */\
-	"packssdw %%mm5, %%mm5			\n\t" /* A2-B2	a2-b2 */\
-	"movd %%mm6, 48+" #dst "		\n\t"\
-	"movd %%mm4, 64+" #dst "		\n\t"\
-	"movd %%mm5, 80+" #dst "		\n\t"\
-
-	
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        #rounder ", %%mm4               \n\t"\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        #rounder ", %%mm0               \n\t"\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq %%mm0, %%mm5              \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "paddd %%mm1, %%mm0             \n\t" /* A1             a1 */\
+        "psubd %%mm1, %%mm5             \n\t" /* A2             a2 */\
+        "movq 56(%2), %%mm1             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        "pmaddwd 64(%2), %%mm2          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm1, %%mm7             \n\t" /* B0             b0 */\
+        "movq 72(%2), %%mm1             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "paddd %%mm2, %%mm1             \n\t" /* B1             b1 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm0, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm1, %%mm0             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm1, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm7, %%mm7          \n\t" /* A0+B0  a0+b0 */\
+        "movd %%mm7, " #dst "           \n\t"\
+        "packssdw %%mm0, %%mm0          \n\t" /* A1+B1  a1+b1 */\
+        "movd %%mm0, 16+" #dst "        \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A1-B1  a1-b1 */\
+        "movd %%mm2, 96+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A0-B0  a0-b0 */\
+        "movd %%mm4, 112+" #dst "       \n\t"\
+        "movq " #src1 ", %%mm0          \n\t" /* R3     R1      r3      r1 */\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "movq 88(%2), %%mm7             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd 96(%2), %%mm0          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm5, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm7, %%mm4             \n\t" /* B2             b2 */\
+        "paddd %%mm4, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm5             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm5       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm0, %%mm3             \n\t" /* B3             b3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A2+B2  a2+b2 */\
+        "packssdw %%mm6, %%mm6          \n\t" /* A3+B3  a3+b3 */\
+        "movd %%mm2, 32+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A3-B3  a3-b3 */\
+        "packssdw %%mm5, %%mm5          \n\t" /* A2-B2  a2-b2 */\
+        "movd %%mm6, 48+" #dst "        \n\t"\
+        "movd %%mm4, 64+" #dst "        \n\t"\
+        "movd %%mm5, 80+" #dst "        \n\t"\
+
+
 #define DC_COND_ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
-	"movq " #src0 ", %%mm0			\n\t" /* R4	R0	r4	r0 */\
-	"movq " #src4 ", %%mm1			\n\t" /* R6	R2	r6	r2 */\
-	"movq " #src1 ", %%mm2			\n\t" /* R3	R1	r3	r1 */\
-	"movq " #src5 ", %%mm3			\n\t" /* R7	R5	r7	r5 */\
-	"movq "MANGLE(wm1010)", %%mm4		\n\t"\
-	"pand %%mm0, %%mm4			\n\t"\
-	"por %%mm1, %%mm4			\n\t"\
-	"por %%mm2, %%mm4			\n\t"\
-	"por %%mm3, %%mm4			\n\t"\
-	"packssdw %%mm4,%%mm4			\n\t"\
-	"movd %%mm4, %%eax			\n\t"\
-	"orl %%eax, %%eax			\n\t"\
-	"jz 1f					\n\t"\
-	"movq 16(%2), %%mm4			\n\t" /* C4	C4	C4	C4 */\
-	"pmaddwd %%mm0, %%mm4			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 24(%2), %%mm5			\n\t" /* -C4	C4	-C4	C4 */\
-	"pmaddwd %%mm5, %%mm0			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"movq 32(%2), %%mm5			\n\t" /* C6	C2	C6	C2 */\
-	"pmaddwd %%mm1, %%mm5			\n\t" /* C6R6+C2R2	C6r6+C2r2 */\
-	"movq 40(%2), %%mm6			\n\t" /* -C2	C6	-C2	C6 */\
-	"pmaddwd %%mm6, %%mm1			\n\t" /* -C2R6+C6R2	-C2r6+C6r2 */\
-	"movq 48(%2), %%mm7			\n\t" /* C3	C1	C3	C1 */\
-	"pmaddwd %%mm2, %%mm7			\n\t" /* C3R3+C1R1	C3r3+C1r1 */\
-	#rounder ", %%mm4			\n\t"\
-	"movq %%mm4, %%mm6			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"paddd %%mm5, %%mm4			\n\t" /* A0		a0 */\
-	"psubd %%mm5, %%mm6			\n\t" /* A3		a3 */\
-	"movq 56(%2), %%mm5			\n\t" /* C7	C5	C7	C5 */\
-	"pmaddwd %%mm3, %%mm5			\n\t" /* C7R7+C5R5	C7r7+C5r5 */\
-	#rounder ", %%mm0			\n\t"\
-	"paddd %%mm0, %%mm1			\n\t" /* A1		a1 */\
-	"paddd %%mm0, %%mm0			\n\t" \
-	"psubd %%mm1, %%mm0			\n\t" /* A2		a2 */\
-	"pmaddwd 64(%2), %%mm2			\n\t" /* -C7R3+C3R1	-C7r3+C3r1 */\
-	"paddd %%mm5, %%mm7			\n\t" /* B0		b0 */\
-	"movq 72(%2), %%mm5			\n\t" /* -C5	-C1	-C5	-C1 */\
-	"pmaddwd %%mm3, %%mm5			\n\t" /* -C5R7-C1R5	-C5r7-C1r5 */\
-	"paddd %%mm4, %%mm7			\n\t" /* A0+B0		a0+b0 */\
-	"paddd %%mm4, %%mm4			\n\t" /* 2A0		2a0 */\
-	"psubd %%mm7, %%mm4			\n\t" /* A0-B0		a0-b0 */\
-	"paddd %%mm2, %%mm5			\n\t" /* B1		b1 */\
-	"psrad $" #shift ", %%mm7		\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"movq %%mm1, %%mm2			\n\t" /* A1		a1 */\
-	"paddd %%mm5, %%mm1			\n\t" /* A1+B1		a1+b1 */\
-	"psubd %%mm5, %%mm2			\n\t" /* A1-B1		a1-b1 */\
-	"psrad $" #shift ", %%mm1		\n\t"\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"packssdw %%mm1, %%mm7			\n\t" /* A1+B1	a1+b1	A0+B0	a0+b0 */\
-	"packssdw %%mm4, %%mm2			\n\t" /* A0-B0	a0-b0	A1-B1	a1-b1 */\
-	"movq %%mm7, " #dst "			\n\t"\
-	"movq " #src1 ", %%mm1			\n\t" /* R3	R1	r3	r1 */\
-	"movq 80(%2), %%mm4			\n\t" /* -C1	C5	-C1 	C5 */\
-	"movq %%mm2, 24+" #dst "		\n\t"\
-	"pmaddwd %%mm1, %%mm4			\n\t" /* -C1R3+C5R1	-C1r3+C5r1 */\
-	"movq 88(%2), %%mm7			\n\t" /* C3	C7	C3 	C7 */\
-	"pmaddwd 96(%2), %%mm1			\n\t" /* -C5R3+C7R1	-C5r3+C7r1 */\
-	"pmaddwd %%mm3, %%mm7			\n\t" /* C3R7+C7R5	C3r7+C7r5 */\
-	"movq %%mm0, %%mm2			\n\t" /* A2		a2 */\
-	"pmaddwd 104(%2), %%mm3			\n\t" /* -C1R7+C3R5	-C1r7+C3r5 */\
-	"paddd %%mm7, %%mm4			\n\t" /* B2		b2 */\
-	"paddd %%mm4, %%mm2			\n\t" /* A2+B2		a2+b2 */\
-	"psubd %%mm4, %%mm0			\n\t" /* a2-B2		a2-b2 */\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"psrad $" #shift ", %%mm0		\n\t"\
-	"movq %%mm6, %%mm4			\n\t" /* A3		a3 */\
-	"paddd %%mm1, %%mm3			\n\t" /* B3		b3 */\
-	"paddd %%mm3, %%mm6			\n\t" /* A3+B3		a3+b3 */\
-	"psubd %%mm3, %%mm4			\n\t" /* a3-B3		a3-b3 */\
-	"psrad $" #shift ", %%mm6		\n\t"\
-	"packssdw %%mm6, %%mm2			\n\t" /* A3+B3	a3+b3	A2+B2	a2+b2 */\
-	"movq %%mm2, 8+" #dst "			\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"packssdw %%mm0, %%mm4			\n\t" /* A2-B2	a2-b2	A3-B3	a3-b3 */\
-	"movq %%mm4, 16+" #dst "		\n\t"\
-	"jmp 2f					\n\t"\
-	"1:					\n\t"\
-	"pslld $16, %%mm0			\n\t"\
-	"#paddd "MANGLE(d40000)", %%mm0		\n\t"\
-	"psrad $13, %%mm0			\n\t"\
-	"packssdw %%mm0, %%mm0			\n\t"\
-	"movq %%mm0, " #dst "			\n\t"\
-	"movq %%mm0, 8+" #dst "			\n\t"\
-	"movq %%mm0, 16+" #dst "		\n\t"\
-	"movq %%mm0, 24+" #dst "		\n\t"\
-	"2:					\n\t"
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq "MANGLE(wm1010)", %%mm4   \n\t"\
+        "pand %%mm0, %%mm4              \n\t"\
+        "por %%mm1, %%mm4               \n\t"\
+        "por %%mm2, %%mm4               \n\t"\
+        "por %%mm3, %%mm4               \n\t"\
+        "packssdw %%mm4,%%mm4           \n\t"\
+        "movd %%mm4, %%eax              \n\t"\
+        "orl %%eax, %%eax               \n\t"\
+        "jz 1f                          \n\t"\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        #rounder ", %%mm4               \n\t"\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq 56(%2), %%mm5             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        #rounder ", %%mm0               \n\t"\
+        "paddd %%mm0, %%mm1             \n\t" /* A1             a1 */\
+        "paddd %%mm0, %%mm0             \n\t" \
+        "psubd %%mm1, %%mm0             \n\t" /* A2             a2 */\
+        "pmaddwd 64(%2), %%mm2          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm5, %%mm7             \n\t" /* B0             b0 */\
+        "movq 72(%2), %%mm5             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "paddd %%mm2, %%mm5             \n\t" /* B1             b1 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm1, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm5, %%mm1             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm5, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm1, %%mm7          \n\t" /* A1+B1  a1+b1   A0+B0   a0+b0 */\
+        "packssdw %%mm4, %%mm2          \n\t" /* A0-B0  a0-b0   A1-B1   a1-b1 */\
+        "movq %%mm7, " #dst "           \n\t"\
+        "movq " #src1 ", %%mm1          \n\t" /* R3     R1      r3      r1 */\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "movq %%mm2, 24+" #dst "        \n\t"\
+        "pmaddwd %%mm1, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "movq 88(%2), %%mm7             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd 96(%2), %%mm1          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm0, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm7, %%mm4             \n\t" /* B2             b2 */\
+        "paddd %%mm4, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm0             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm1, %%mm3             \n\t" /* B3             b3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "packssdw %%mm6, %%mm2          \n\t" /* A3+B3  a3+b3   A2+B2   a2+b2 */\
+        "movq %%mm2, 8+" #dst "         \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm0, %%mm4          \n\t" /* A2-B2  a2-b2   A3-B3   a3-b3 */\
+        "movq %%mm4, 16+" #dst "        \n\t"\
+        "jmp 2f                         \n\t"\
+        "1:                             \n\t"\
+        "pslld $16, %%mm0               \n\t"\
+        "#paddd "MANGLE(d40000)", %%mm0 \n\t"\
+        "psrad $13, %%mm0               \n\t"\
+        "packssdw %%mm0, %%mm0          \n\t"\
+        "movq %%mm0, " #dst "           \n\t"\
+        "movq %%mm0, 8+" #dst "         \n\t"\
+        "movq %%mm0, 16+" #dst "        \n\t"\
+        "movq %%mm0, 24+" #dst "        \n\t"\
+        "2:                             \n\t"
 
 
 //IDCT(      src0,   src4,   src1,   src5,    dst,    rounder, shift)
@@ -467,236 +467,236 @@ COL_IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
 #else
 
 #define DC_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
-	"movq " #src0 ", %%mm0			\n\t" /* R4	R0	r4	r0 */\
-	"movq " #src4 ", %%mm1			\n\t" /* R6	R2	r6	r2 */\
-	"movq " #src1 ", %%mm2			\n\t" /* R3	R1	r3	r1 */\
-	"movq " #src5 ", %%mm3			\n\t" /* R7	R5	r7	r5 */\
-	"movq "MANGLE(wm1010)", %%mm4		\n\t"\
-	"pand %%mm0, %%mm4			\n\t"\
-	"por %%mm1, %%mm4			\n\t"\
-	"por %%mm2, %%mm4			\n\t"\
-	"por %%mm3, %%mm4			\n\t"\
-	"packssdw %%mm4,%%mm4			\n\t"\
-	"movd %%mm4, %%eax			\n\t"\
-	"orl %%eax, %%eax			\n\t"\
-	"jz 1f					\n\t"\
-	"movq 16(%2), %%mm4			\n\t" /* C4	C4	C4	C4 */\
-	"pmaddwd %%mm0, %%mm4			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 24(%2), %%mm5			\n\t" /* -C4	C4	-C4	C4 */\
-	"pmaddwd %%mm5, %%mm0			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"movq 32(%2), %%mm5			\n\t" /* C6	C2	C6	C2 */\
-	"pmaddwd %%mm1, %%mm5			\n\t" /* C6R6+C2R2	C6r6+C2r2 */\
-	"movq 40(%2), %%mm6			\n\t" /* -C2	C6	-C2	C6 */\
-	"pmaddwd %%mm6, %%mm1			\n\t" /* -C2R6+C6R2	-C2r6+C6r2 */\
-	"movq 48(%2), %%mm7			\n\t" /* C3	C1	C3	C1 */\
-	"pmaddwd %%mm2, %%mm7			\n\t" /* C3R3+C1R1	C3r3+C1r1 */\
-	#rounder ", %%mm4			\n\t"\
-	"movq %%mm4, %%mm6			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"paddd %%mm5, %%mm4			\n\t" /* A0		a0 */\
-	"psubd %%mm5, %%mm6			\n\t" /* A3		a3 */\
-	"movq 56(%2), %%mm5			\n\t" /* C7	C5	C7	C5 */\
-	"pmaddwd %%mm3, %%mm5			\n\t" /* C7R7+C5R5	C7r7+C5r5 */\
-	#rounder ", %%mm0			\n\t"\
-	"paddd %%mm0, %%mm1			\n\t" /* A1		a1 */\
-	"paddd %%mm0, %%mm0			\n\t" \
-	"psubd %%mm1, %%mm0			\n\t" /* A2		a2 */\
-	"pmaddwd 64(%2), %%mm2			\n\t" /* -C7R3+C3R1	-C7r3+C3r1 */\
-	"paddd %%mm5, %%mm7			\n\t" /* B0		b0 */\
-	"movq 72(%2), %%mm5			\n\t" /* -C5	-C1	-C5	-C1 */\
-	"pmaddwd %%mm3, %%mm5			\n\t" /* -C5R7-C1R5	-C5r7-C1r5 */\
-	"paddd %%mm4, %%mm7			\n\t" /* A0+B0		a0+b0 */\
-	"paddd %%mm4, %%mm4			\n\t" /* 2A0		2a0 */\
-	"psubd %%mm7, %%mm4			\n\t" /* A0-B0		a0-b0 */\
-	"paddd %%mm2, %%mm5			\n\t" /* B1		b1 */\
-	"psrad $" #shift ", %%mm7		\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"movq %%mm1, %%mm2			\n\t" /* A1		a1 */\
-	"paddd %%mm5, %%mm1			\n\t" /* A1+B1		a1+b1 */\
-	"psubd %%mm5, %%mm2			\n\t" /* A1-B1		a1-b1 */\
-	"psrad $" #shift ", %%mm1		\n\t"\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"packssdw %%mm1, %%mm7			\n\t" /* A1+B1	a1+b1	A0+B0	a0+b0 */\
-	"packssdw %%mm4, %%mm2			\n\t" /* A0-B0	a0-b0	A1-B1	a1-b1 */\
-	"movq %%mm7, " #dst "			\n\t"\
-	"movq " #src1 ", %%mm1			\n\t" /* R3	R1	r3	r1 */\
-	"movq 80(%2), %%mm4			\n\t" /* -C1	C5	-C1 	C5 */\
-	"movq %%mm2, 24+" #dst "		\n\t"\
-	"pmaddwd %%mm1, %%mm4			\n\t" /* -C1R3+C5R1	-C1r3+C5r1 */\
-	"movq 88(%2), %%mm7			\n\t" /* C3	C7	C3 	C7 */\
-	"pmaddwd 96(%2), %%mm1			\n\t" /* -C5R3+C7R1	-C5r3+C7r1 */\
-	"pmaddwd %%mm3, %%mm7			\n\t" /* C3R7+C7R5	C3r7+C7r5 */\
-	"movq %%mm0, %%mm2			\n\t" /* A2		a2 */\
-	"pmaddwd 104(%2), %%mm3			\n\t" /* -C1R7+C3R5	-C1r7+C3r5 */\
-	"paddd %%mm7, %%mm4			\n\t" /* B2		b2 */\
-	"paddd %%mm4, %%mm2			\n\t" /* A2+B2		a2+b2 */\
-	"psubd %%mm4, %%mm0			\n\t" /* a2-B2		a2-b2 */\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"psrad $" #shift ", %%mm0		\n\t"\
-	"movq %%mm6, %%mm4			\n\t" /* A3		a3 */\
-	"paddd %%mm1, %%mm3			\n\t" /* B3		b3 */\
-	"paddd %%mm3, %%mm6			\n\t" /* A3+B3		a3+b3 */\
-	"psubd %%mm3, %%mm4			\n\t" /* a3-B3		a3-b3 */\
-	"psrad $" #shift ", %%mm6		\n\t"\
-	"packssdw %%mm6, %%mm2			\n\t" /* A3+B3	a3+b3	A2+B2	a2+b2 */\
-	"movq %%mm2, 8+" #dst "			\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"packssdw %%mm0, %%mm4			\n\t" /* A2-B2	a2-b2	A3-B3	a3-b3 */\
-	"movq %%mm4, 16+" #dst "		\n\t"\
-	"jmp 2f					\n\t"\
-	"1:					\n\t"\
-	"pslld $16, %%mm0			\n\t"\
-	"paddd "MANGLE(d40000)", %%mm0		\n\t"\
-	"psrad $13, %%mm0			\n\t"\
-	"packssdw %%mm0, %%mm0			\n\t"\
-	"movq %%mm0, " #dst "			\n\t"\
-	"movq %%mm0, 8+" #dst "			\n\t"\
-	"movq %%mm0, 16+" #dst "		\n\t"\
-	"movq %%mm0, 24+" #dst "		\n\t"\
-	"2:					\n\t"
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq "MANGLE(wm1010)", %%mm4   \n\t"\
+        "pand %%mm0, %%mm4              \n\t"\
+        "por %%mm1, %%mm4               \n\t"\
+        "por %%mm2, %%mm4               \n\t"\
+        "por %%mm3, %%mm4               \n\t"\
+        "packssdw %%mm4,%%mm4           \n\t"\
+        "movd %%mm4, %%eax              \n\t"\
+        "orl %%eax, %%eax               \n\t"\
+        "jz 1f                          \n\t"\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        #rounder ", %%mm4               \n\t"\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq 56(%2), %%mm5             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        #rounder ", %%mm0               \n\t"\
+        "paddd %%mm0, %%mm1             \n\t" /* A1             a1 */\
+        "paddd %%mm0, %%mm0             \n\t" \
+        "psubd %%mm1, %%mm0             \n\t" /* A2             a2 */\
+        "pmaddwd 64(%2), %%mm2          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm5, %%mm7             \n\t" /* B0             b0 */\
+        "movq 72(%2), %%mm5             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "paddd %%mm2, %%mm5             \n\t" /* B1             b1 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm1, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm5, %%mm1             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm5, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm1, %%mm7          \n\t" /* A1+B1  a1+b1   A0+B0   a0+b0 */\
+        "packssdw %%mm4, %%mm2          \n\t" /* A0-B0  a0-b0   A1-B1   a1-b1 */\
+        "movq %%mm7, " #dst "           \n\t"\
+        "movq " #src1 ", %%mm1          \n\t" /* R3     R1      r3      r1 */\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "movq %%mm2, 24+" #dst "        \n\t"\
+        "pmaddwd %%mm1, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "movq 88(%2), %%mm7             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd 96(%2), %%mm1          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm0, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm7, %%mm4             \n\t" /* B2             b2 */\
+        "paddd %%mm4, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm0             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm1, %%mm3             \n\t" /* B3             b3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "packssdw %%mm6, %%mm2          \n\t" /* A3+B3  a3+b3   A2+B2   a2+b2 */\
+        "movq %%mm2, 8+" #dst "         \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm0, %%mm4          \n\t" /* A2-B2  a2-b2   A3-B3   a3-b3 */\
+        "movq %%mm4, 16+" #dst "        \n\t"\
+        "jmp 2f                         \n\t"\
+        "1:                             \n\t"\
+        "pslld $16, %%mm0               \n\t"\
+        "paddd "MANGLE(d40000)", %%mm0  \n\t"\
+        "psrad $13, %%mm0               \n\t"\
+        "packssdw %%mm0, %%mm0          \n\t"\
+        "movq %%mm0, " #dst "           \n\t"\
+        "movq %%mm0, 8+" #dst "         \n\t"\
+        "movq %%mm0, 16+" #dst "        \n\t"\
+        "movq %%mm0, 24+" #dst "        \n\t"\
+        "2:                             \n\t"
 
 #define Z_COND_IDCT(src0, src4, src1, src5, dst, rounder, shift, bt) \
-	"movq " #src0 ", %%mm0			\n\t" /* R4	R0	r4	r0 */\
-	"movq " #src4 ", %%mm1			\n\t" /* R6	R2	r6	r2 */\
-	"movq " #src1 ", %%mm2			\n\t" /* R3	R1	r3	r1 */\
-	"movq " #src5 ", %%mm3			\n\t" /* R7	R5	r7	r5 */\
-	"movq %%mm0, %%mm4			\n\t"\
-	"por %%mm1, %%mm4			\n\t"\
-	"por %%mm2, %%mm4			\n\t"\
-	"por %%mm3, %%mm4			\n\t"\
-	"packssdw %%mm4,%%mm4			\n\t"\
-	"movd %%mm4, %%eax			\n\t"\
-	"orl %%eax, %%eax			\n\t"\
-	"jz " #bt "				\n\t"\
-	"movq 16(%2), %%mm4			\n\t" /* C4	C4	C4	C4 */\
-	"pmaddwd %%mm0, %%mm4			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 24(%2), %%mm5			\n\t" /* -C4	C4	-C4	C4 */\
-	"pmaddwd %%mm5, %%mm0			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"movq 32(%2), %%mm5			\n\t" /* C6	C2	C6	C2 */\
-	"pmaddwd %%mm1, %%mm5			\n\t" /* C6R6+C2R2	C6r6+C2r2 */\
-	"movq 40(%2), %%mm6			\n\t" /* -C2	C6	-C2	C6 */\
-	"pmaddwd %%mm6, %%mm1			\n\t" /* -C2R6+C6R2	-C2r6+C6r2 */\
-	"movq 48(%2), %%mm7			\n\t" /* C3	C1	C3	C1 */\
-	"pmaddwd %%mm2, %%mm7			\n\t" /* C3R3+C1R1	C3r3+C1r1 */\
-	#rounder ", %%mm4			\n\t"\
-	"movq %%mm4, %%mm6			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"paddd %%mm5, %%mm4			\n\t" /* A0		a0 */\
-	"psubd %%mm5, %%mm6			\n\t" /* A3		a3 */\
-	"movq 56(%2), %%mm5			\n\t" /* C7	C5	C7	C5 */\
-	"pmaddwd %%mm3, %%mm5			\n\t" /* C7R7+C5R5	C7r7+C5r5 */\
-	#rounder ", %%mm0			\n\t"\
-	"paddd %%mm0, %%mm1			\n\t" /* A1		a1 */\
-	"paddd %%mm0, %%mm0			\n\t" \
-	"psubd %%mm1, %%mm0			\n\t" /* A2		a2 */\
-	"pmaddwd 64(%2), %%mm2			\n\t" /* -C7R3+C3R1	-C7r3+C3r1 */\
-	"paddd %%mm5, %%mm7			\n\t" /* B0		b0 */\
-	"movq 72(%2), %%mm5			\n\t" /* -C5	-C1	-C5	-C1 */\
-	"pmaddwd %%mm3, %%mm5			\n\t" /* -C5R7-C1R5	-C5r7-C1r5 */\
-	"paddd %%mm4, %%mm7			\n\t" /* A0+B0		a0+b0 */\
-	"paddd %%mm4, %%mm4			\n\t" /* 2A0		2a0 */\
-	"psubd %%mm7, %%mm4			\n\t" /* A0-B0		a0-b0 */\
-	"paddd %%mm2, %%mm5			\n\t" /* B1		b1 */\
-	"psrad $" #shift ", %%mm7		\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"movq %%mm1, %%mm2			\n\t" /* A1		a1 */\
-	"paddd %%mm5, %%mm1			\n\t" /* A1+B1		a1+b1 */\
-	"psubd %%mm5, %%mm2			\n\t" /* A1-B1		a1-b1 */\
-	"psrad $" #shift ", %%mm1		\n\t"\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"packssdw %%mm1, %%mm7			\n\t" /* A1+B1	a1+b1	A0+B0	a0+b0 */\
-	"packssdw %%mm4, %%mm2			\n\t" /* A0-B0	a0-b0	A1-B1	a1-b1 */\
-	"movq %%mm7, " #dst "			\n\t"\
-	"movq " #src1 ", %%mm1			\n\t" /* R3	R1	r3	r1 */\
-	"movq 80(%2), %%mm4			\n\t" /* -C1	C5	-C1 	C5 */\
-	"movq %%mm2, 24+" #dst "		\n\t"\
-	"pmaddwd %%mm1, %%mm4			\n\t" /* -C1R3+C5R1	-C1r3+C5r1 */\
-	"movq 88(%2), %%mm7			\n\t" /* C3	C7	C3 	C7 */\
-	"pmaddwd 96(%2), %%mm1			\n\t" /* -C5R3+C7R1	-C5r3+C7r1 */\
-	"pmaddwd %%mm3, %%mm7			\n\t" /* C3R7+C7R5	C3r7+C7r5 */\
-	"movq %%mm0, %%mm2			\n\t" /* A2		a2 */\
-	"pmaddwd 104(%2), %%mm3			\n\t" /* -C1R7+C3R5	-C1r7+C3r5 */\
-	"paddd %%mm7, %%mm4			\n\t" /* B2		b2 */\
-	"paddd %%mm4, %%mm2			\n\t" /* A2+B2		a2+b2 */\
-	"psubd %%mm4, %%mm0			\n\t" /* a2-B2		a2-b2 */\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"psrad $" #shift ", %%mm0		\n\t"\
-	"movq %%mm6, %%mm4			\n\t" /* A3		a3 */\
-	"paddd %%mm1, %%mm3			\n\t" /* B3		b3 */\
-	"paddd %%mm3, %%mm6			\n\t" /* A3+B3		a3+b3 */\
-	"psubd %%mm3, %%mm4			\n\t" /* a3-B3		a3-b3 */\
-	"psrad $" #shift ", %%mm6		\n\t"\
-	"packssdw %%mm6, %%mm2			\n\t" /* A3+B3	a3+b3	A2+B2	a2+b2 */\
-	"movq %%mm2, 8+" #dst "			\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"packssdw %%mm0, %%mm4			\n\t" /* A2-B2	a2-b2	A3-B3	a3-b3 */\
-	"movq %%mm4, 16+" #dst "		\n\t"\
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq %%mm0, %%mm4              \n\t"\
+        "por %%mm1, %%mm4               \n\t"\
+        "por %%mm2, %%mm4               \n\t"\
+        "por %%mm3, %%mm4               \n\t"\
+        "packssdw %%mm4,%%mm4           \n\t"\
+        "movd %%mm4, %%eax              \n\t"\
+        "orl %%eax, %%eax               \n\t"\
+        "jz " #bt "                     \n\t"\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        #rounder ", %%mm4               \n\t"\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq 56(%2), %%mm5             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        #rounder ", %%mm0               \n\t"\
+        "paddd %%mm0, %%mm1             \n\t" /* A1             a1 */\
+        "paddd %%mm0, %%mm0             \n\t" \
+        "psubd %%mm1, %%mm0             \n\t" /* A2             a2 */\
+        "pmaddwd 64(%2), %%mm2          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm5, %%mm7             \n\t" /* B0             b0 */\
+        "movq 72(%2), %%mm5             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "paddd %%mm2, %%mm5             \n\t" /* B1             b1 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm1, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm5, %%mm1             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm5, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm1, %%mm7          \n\t" /* A1+B1  a1+b1   A0+B0   a0+b0 */\
+        "packssdw %%mm4, %%mm2          \n\t" /* A0-B0  a0-b0   A1-B1   a1-b1 */\
+        "movq %%mm7, " #dst "           \n\t"\
+        "movq " #src1 ", %%mm1          \n\t" /* R3     R1      r3      r1 */\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "movq %%mm2, 24+" #dst "        \n\t"\
+        "pmaddwd %%mm1, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "movq 88(%2), %%mm7             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd 96(%2), %%mm1          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm0, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm7, %%mm4             \n\t" /* B2             b2 */\
+        "paddd %%mm4, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm0             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm1, %%mm3             \n\t" /* B3             b3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "packssdw %%mm6, %%mm2          \n\t" /* A3+B3  a3+b3   A2+B2   a2+b2 */\
+        "movq %%mm2, 8+" #dst "         \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm0, %%mm4          \n\t" /* A2-B2  a2-b2   A3-B3   a3-b3 */\
+        "movq %%mm4, 16+" #dst "        \n\t"\
 
 #define ROW_IDCT(src0, src4, src1, src5, dst, rounder, shift) \
-	"movq " #src0 ", %%mm0			\n\t" /* R4	R0	r4	r0 */\
-	"movq " #src4 ", %%mm1			\n\t" /* R6	R2	r6	r2 */\
-	"movq " #src1 ", %%mm2			\n\t" /* R3	R1	r3	r1 */\
-	"movq " #src5 ", %%mm3			\n\t" /* R7	R5	r7	r5 */\
-	"movq 16(%2), %%mm4			\n\t" /* C4	C4	C4	C4 */\
-	"pmaddwd %%mm0, %%mm4			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 24(%2), %%mm5			\n\t" /* -C4	C4	-C4	C4 */\
-	"pmaddwd %%mm5, %%mm0			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"movq 32(%2), %%mm5			\n\t" /* C6	C2	C6	C2 */\
-	"pmaddwd %%mm1, %%mm5			\n\t" /* C6R6+C2R2	C6r6+C2r2 */\
-	"movq 40(%2), %%mm6			\n\t" /* -C2	C6	-C2	C6 */\
-	"pmaddwd %%mm6, %%mm1			\n\t" /* -C2R6+C6R2	-C2r6+C6r2 */\
-	"movq 48(%2), %%mm7			\n\t" /* C3	C1	C3	C1 */\
-	"pmaddwd %%mm2, %%mm7			\n\t" /* C3R3+C1R1	C3r3+C1r1 */\
-	#rounder ", %%mm4			\n\t"\
-	"movq %%mm4, %%mm6			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"paddd %%mm5, %%mm4			\n\t" /* A0		a0 */\
-	"psubd %%mm5, %%mm6			\n\t" /* A3		a3 */\
-	"movq 56(%2), %%mm5			\n\t" /* C7	C5	C7	C5 */\
-	"pmaddwd %%mm3, %%mm5			\n\t" /* C7R7+C5R5	C7r7+C5r5 */\
-	#rounder ", %%mm0			\n\t"\
-	"paddd %%mm0, %%mm1			\n\t" /* A1		a1 */\
-	"paddd %%mm0, %%mm0			\n\t" \
-	"psubd %%mm1, %%mm0			\n\t" /* A2		a2 */\
-	"pmaddwd 64(%2), %%mm2			\n\t" /* -C7R3+C3R1	-C7r3+C3r1 */\
-	"paddd %%mm5, %%mm7			\n\t" /* B0		b0 */\
-	"movq 72(%2), %%mm5			\n\t" /* -C5	-C1	-C5	-C1 */\
-	"pmaddwd %%mm3, %%mm5			\n\t" /* -C5R7-C1R5	-C5r7-C1r5 */\
-	"paddd %%mm4, %%mm7			\n\t" /* A0+B0		a0+b0 */\
-	"paddd %%mm4, %%mm4			\n\t" /* 2A0		2a0 */\
-	"psubd %%mm7, %%mm4			\n\t" /* A0-B0		a0-b0 */\
-	"paddd %%mm2, %%mm5			\n\t" /* B1		b1 */\
-	"psrad $" #shift ", %%mm7		\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"movq %%mm1, %%mm2			\n\t" /* A1		a1 */\
-	"paddd %%mm5, %%mm1			\n\t" /* A1+B1		a1+b1 */\
-	"psubd %%mm5, %%mm2			\n\t" /* A1-B1		a1-b1 */\
-	"psrad $" #shift ", %%mm1		\n\t"\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"packssdw %%mm1, %%mm7			\n\t" /* A1+B1	a1+b1	A0+B0	a0+b0 */\
-	"packssdw %%mm4, %%mm2			\n\t" /* A0-B0	a0-b0	A1-B1	a1-b1 */\
-	"movq %%mm7, " #dst "			\n\t"\
-	"movq " #src1 ", %%mm1			\n\t" /* R3	R1	r3	r1 */\
-	"movq 80(%2), %%mm4			\n\t" /* -C1	C5	-C1 	C5 */\
-	"movq %%mm2, 24+" #dst "		\n\t"\
-	"pmaddwd %%mm1, %%mm4			\n\t" /* -C1R3+C5R1	-C1r3+C5r1 */\
-	"movq 88(%2), %%mm7			\n\t" /* C3	C7	C3 	C7 */\
-	"pmaddwd 96(%2), %%mm1			\n\t" /* -C5R3+C7R1	-C5r3+C7r1 */\
-	"pmaddwd %%mm3, %%mm7			\n\t" /* C3R7+C7R5	C3r7+C7r5 */\
-	"movq %%mm0, %%mm2			\n\t" /* A2		a2 */\
-	"pmaddwd 104(%2), %%mm3			\n\t" /* -C1R7+C3R5	-C1r7+C3r5 */\
-	"paddd %%mm7, %%mm4			\n\t" /* B2		b2 */\
-	"paddd %%mm4, %%mm2			\n\t" /* A2+B2		a2+b2 */\
-	"psubd %%mm4, %%mm0			\n\t" /* a2-B2		a2-b2 */\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"psrad $" #shift ", %%mm0		\n\t"\
-	"movq %%mm6, %%mm4			\n\t" /* A3		a3 */\
-	"paddd %%mm1, %%mm3			\n\t" /* B3		b3 */\
-	"paddd %%mm3, %%mm6			\n\t" /* A3+B3		a3+b3 */\
-	"psubd %%mm3, %%mm4			\n\t" /* a3-B3		a3-b3 */\
-	"psrad $" #shift ", %%mm6		\n\t"\
-	"packssdw %%mm6, %%mm2			\n\t" /* A3+B3	a3+b3	A2+B2	a2+b2 */\
-	"movq %%mm2, 8+" #dst "			\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"packssdw %%mm0, %%mm4			\n\t" /* A2-B2	a2-b2	A3-B3	a3-b3 */\
-	"movq %%mm4, 16+" #dst "		\n\t"\
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        #rounder ", %%mm4               \n\t"\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq 56(%2), %%mm5             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        #rounder ", %%mm0               \n\t"\
+        "paddd %%mm0, %%mm1             \n\t" /* A1             a1 */\
+        "paddd %%mm0, %%mm0             \n\t" \
+        "psubd %%mm1, %%mm0             \n\t" /* A2             a2 */\
+        "pmaddwd 64(%2), %%mm2          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm5, %%mm7             \n\t" /* B0             b0 */\
+        "movq 72(%2), %%mm5             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm5           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "paddd %%mm2, %%mm5             \n\t" /* B1             b1 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm1, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm5, %%mm1             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm5, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm1, %%mm7          \n\t" /* A1+B1  a1+b1   A0+B0   a0+b0 */\
+        "packssdw %%mm4, %%mm2          \n\t" /* A0-B0  a0-b0   A1-B1   a1-b1 */\
+        "movq %%mm7, " #dst "           \n\t"\
+        "movq " #src1 ", %%mm1          \n\t" /* R3     R1      r3      r1 */\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "movq %%mm2, 24+" #dst "        \n\t"\
+        "pmaddwd %%mm1, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "movq 88(%2), %%mm7             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd 96(%2), %%mm1          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm0, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm7, %%mm4             \n\t" /* B2             b2 */\
+        "paddd %%mm4, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm0             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm1, %%mm3             \n\t" /* B3             b3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "packssdw %%mm6, %%mm2          \n\t" /* A3+B3  a3+b3   A2+B2   a2+b2 */\
+        "movq %%mm2, 8+" #dst "         \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm0, %%mm4          \n\t" /* A2-B2  a2-b2   A3-B3   a3-b3 */\
+        "movq %%mm4, 16+" #dst "        \n\t"\
 
 //IDCT(         src0,   src4,   src1,   src5,    dst,   rounder, shift)
 DC_COND_IDCT(  0(%0),  8(%0), 16(%0), 24(%0),  0(%1),paddd 8(%2), 11)
@@ -706,80 +706,80 @@ Z_COND_IDCT(  96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 1f)
 
 #undef IDCT
 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
-	"movq " #src0 ", %%mm0			\n\t" /* R4	R0	r4	r0 */\
-	"movq " #src4 ", %%mm1			\n\t" /* R6	R2	r6	r2 */\
-	"movq " #src1 ", %%mm2			\n\t" /* R3	R1	r3	r1 */\
-	"movq " #src5 ", %%mm3			\n\t" /* R7	R5	r7	r5 */\
-	"movq 16(%2), %%mm4			\n\t" /* C4	C4	C4	C4 */\
-	"pmaddwd %%mm0, %%mm4			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 24(%2), %%mm5			\n\t" /* -C4	C4	-C4	C4 */\
-	"pmaddwd %%mm5, %%mm0			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"movq 32(%2), %%mm5			\n\t" /* C6	C2	C6	C2 */\
-	"pmaddwd %%mm1, %%mm5			\n\t" /* C6R6+C2R2	C6r6+C2r2 */\
-	"movq 40(%2), %%mm6			\n\t" /* -C2	C6	-C2	C6 */\
-	"pmaddwd %%mm6, %%mm1			\n\t" /* -C2R6+C6R2	-C2r6+C6r2 */\
-	#rounder ", %%mm4			\n\t"\
-	"movq %%mm4, %%mm6			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 48(%2), %%mm7			\n\t" /* C3	C1	C3	C1 */\
-	#rounder ", %%mm0			\n\t"\
-	"pmaddwd %%mm2, %%mm7			\n\t" /* C3R3+C1R1	C3r3+C1r1 */\
-	"paddd %%mm5, %%mm4			\n\t" /* A0		a0 */\
-	"psubd %%mm5, %%mm6			\n\t" /* A3		a3 */\
-	"movq %%mm0, %%mm5			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"paddd %%mm1, %%mm0			\n\t" /* A1		a1 */\
-	"psubd %%mm1, %%mm5			\n\t" /* A2		a2 */\
-	"movq 56(%2), %%mm1			\n\t" /* C7	C5	C7	C5 */\
-	"pmaddwd %%mm3, %%mm1			\n\t" /* C7R7+C5R5	C7r7+C5r5 */\
-	"pmaddwd 64(%2), %%mm2			\n\t" /* -C7R3+C3R1	-C7r3+C3r1 */\
-	"paddd %%mm1, %%mm7			\n\t" /* B0		b0 */\
-	"movq 72(%2), %%mm1			\n\t" /* -C5	-C1	-C5	-C1 */\
-	"pmaddwd %%mm3, %%mm1			\n\t" /* -C5R7-C1R5	-C5r7-C1r5 */\
-	"paddd %%mm4, %%mm7			\n\t" /* A0+B0		a0+b0 */\
-	"paddd %%mm4, %%mm4			\n\t" /* 2A0		2a0 */\
-	"psubd %%mm7, %%mm4			\n\t" /* A0-B0		a0-b0 */\
-	"paddd %%mm2, %%mm1			\n\t" /* B1		b1 */\
-	"psrad $" #shift ", %%mm7		\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"movq %%mm0, %%mm2			\n\t" /* A1		a1 */\
-	"paddd %%mm1, %%mm0			\n\t" /* A1+B1		a1+b1 */\
-	"psubd %%mm1, %%mm2			\n\t" /* A1-B1		a1-b1 */\
-	"psrad $" #shift ", %%mm0		\n\t"\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"packssdw %%mm7, %%mm7			\n\t" /* A0+B0	a0+b0 */\
-	"movd %%mm7, " #dst "			\n\t"\
-	"packssdw %%mm0, %%mm0			\n\t" /* A1+B1	a1+b1 */\
-	"movd %%mm0, 16+" #dst "		\n\t"\
-	"packssdw %%mm2, %%mm2			\n\t" /* A1-B1	a1-b1 */\
-	"movd %%mm2, 96+" #dst "		\n\t"\
-	"packssdw %%mm4, %%mm4			\n\t" /* A0-B0	a0-b0 */\
-	"movd %%mm4, 112+" #dst "		\n\t"\
-	"movq " #src1 ", %%mm0			\n\t" /* R3	R1	r3	r1 */\
-	"movq 80(%2), %%mm4			\n\t" /* -C1	C5	-C1 	C5 */\
-	"pmaddwd %%mm0, %%mm4			\n\t" /* -C1R3+C5R1	-C1r3+C5r1 */\
-	"movq 88(%2), %%mm7			\n\t" /* C3	C7	C3 	C7 */\
-	"pmaddwd 96(%2), %%mm0			\n\t" /* -C5R3+C7R1	-C5r3+C7r1 */\
-	"pmaddwd %%mm3, %%mm7			\n\t" /* C3R7+C7R5	C3r7+C7r5 */\
-	"movq %%mm5, %%mm2			\n\t" /* A2		a2 */\
-	"pmaddwd 104(%2), %%mm3			\n\t" /* -C1R7+C3R5	-C1r7+C3r5 */\
-	"paddd %%mm7, %%mm4			\n\t" /* B2		b2 */\
-	"paddd %%mm4, %%mm2			\n\t" /* A2+B2		a2+b2 */\
-	"psubd %%mm4, %%mm5			\n\t" /* a2-B2		a2-b2 */\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"psrad $" #shift ", %%mm5		\n\t"\
-	"movq %%mm6, %%mm4			\n\t" /* A3		a3 */\
-	"paddd %%mm0, %%mm3			\n\t" /* B3		b3 */\
-	"paddd %%mm3, %%mm6			\n\t" /* A3+B3		a3+b3 */\
-	"psubd %%mm3, %%mm4			\n\t" /* a3-B3		a3-b3 */\
-	"psrad $" #shift ", %%mm6		\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"packssdw %%mm2, %%mm2			\n\t" /* A2+B2	a2+b2 */\
-	"packssdw %%mm6, %%mm6			\n\t" /* A3+B3	a3+b3 */\
-	"movd %%mm2, 32+" #dst "		\n\t"\
-	"packssdw %%mm4, %%mm4			\n\t" /* A3-B3	a3-b3 */\
-	"packssdw %%mm5, %%mm5			\n\t" /* A2-B2	a2-b2 */\
-	"movd %%mm6, 48+" #dst "		\n\t"\
-	"movd %%mm4, 64+" #dst "		\n\t"\
-	"movd %%mm5, 80+" #dst "		\n\t"
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        #rounder ", %%mm4               \n\t"\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        #rounder ", %%mm0               \n\t"\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq %%mm0, %%mm5              \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "paddd %%mm1, %%mm0             \n\t" /* A1             a1 */\
+        "psubd %%mm1, %%mm5             \n\t" /* A2             a2 */\
+        "movq 56(%2), %%mm1             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        "pmaddwd 64(%2), %%mm2          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm1, %%mm7             \n\t" /* B0             b0 */\
+        "movq 72(%2), %%mm1             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "paddd %%mm2, %%mm1             \n\t" /* B1             b1 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm0, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm1, %%mm0             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm1, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm7, %%mm7          \n\t" /* A0+B0  a0+b0 */\
+        "movd %%mm7, " #dst "           \n\t"\
+        "packssdw %%mm0, %%mm0          \n\t" /* A1+B1  a1+b1 */\
+        "movd %%mm0, 16+" #dst "        \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A1-B1  a1-b1 */\
+        "movd %%mm2, 96+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A0-B0  a0-b0 */\
+        "movd %%mm4, 112+" #dst "       \n\t"\
+        "movq " #src1 ", %%mm0          \n\t" /* R3     R1      r3      r1 */\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "movq 88(%2), %%mm7             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd 96(%2), %%mm0          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm5, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm7, %%mm4             \n\t" /* B2             b2 */\
+        "paddd %%mm4, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm5             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm5       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm0, %%mm3             \n\t" /* B3             b3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A2+B2  a2+b2 */\
+        "packssdw %%mm6, %%mm6          \n\t" /* A3+B3  a3+b3 */\
+        "movd %%mm2, 32+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A3-B3  a3-b3 */\
+        "packssdw %%mm5, %%mm5          \n\t" /* A2-B2  a2-b2 */\
+        "movd %%mm6, 48+" #dst "        \n\t"\
+        "movd %%mm4, 64+" #dst "        \n\t"\
+        "movd %%mm5, 80+" #dst "        \n\t"
 
 
 //IDCT(  src0,   src4,   src1,    src5,    dst, rounder, shift)
@@ -787,144 +787,144 @@ IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0),/nop, 20)
 IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0),/nop, 20)
 IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),/nop, 20)
 IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
-	"jmp 9f					\n\t"
+        "jmp 9f                         \n\t"
 
-	"#.balign 16				\n\t"\
-	"4:					\n\t"
+        "#.balign 16                    \n\t"\
+        "4:                             \n\t"
 Z_COND_IDCT(  64(%0), 72(%0), 80(%0), 88(%0), 64(%1),paddd (%2), 11, 6f)
 Z_COND_IDCT(  96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 5f)
 
 #undef IDCT
 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
-	"movq " #src0 ", %%mm0			\n\t" /* R4	R0	r4	r0 */\
-	"movq " #src4 ", %%mm1			\n\t" /* R6	R2	r6	r2 */\
-	"movq " #src5 ", %%mm3			\n\t" /* R7	R5	r7	r5 */\
-	"movq 16(%2), %%mm4			\n\t" /* C4	C4	C4	C4 */\
-	"pmaddwd %%mm0, %%mm4			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 24(%2), %%mm5			\n\t" /* -C4	C4	-C4	C4 */\
-	"pmaddwd %%mm5, %%mm0			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"movq 32(%2), %%mm5			\n\t" /* C6	C2	C6	C2 */\
-	"pmaddwd %%mm1, %%mm5			\n\t" /* C6R6+C2R2	C6r6+C2r2 */\
-	"movq 40(%2), %%mm6			\n\t" /* -C2	C6	-C2	C6 */\
-	"pmaddwd %%mm6, %%mm1			\n\t" /* -C2R6+C6R2	-C2r6+C6r2 */\
-	#rounder ", %%mm4			\n\t"\
-	"movq %%mm4, %%mm6			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	#rounder ", %%mm0			\n\t"\
-	"paddd %%mm5, %%mm4			\n\t" /* A0		a0 */\
-	"psubd %%mm5, %%mm6			\n\t" /* A3		a3 */\
-	"movq %%mm0, %%mm5			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"paddd %%mm1, %%mm0			\n\t" /* A1		a1 */\
-	"psubd %%mm1, %%mm5			\n\t" /* A2		a2 */\
-	"movq 56(%2), %%mm1			\n\t" /* C7	C5	C7	C5 */\
-	"pmaddwd %%mm3, %%mm1			\n\t" /* C7R7+C5R5	C7r7+C5r5 */\
-	"movq 72(%2), %%mm7			\n\t" /* -C5	-C1	-C5	-C1 */\
-	"pmaddwd %%mm3, %%mm7			\n\t" /* -C5R7-C1R5	-C5r7-C1r5 */\
-	"paddd %%mm4, %%mm1			\n\t" /* A0+B0		a0+b0 */\
-	"paddd %%mm4, %%mm4			\n\t" /* 2A0		2a0 */\
-	"psubd %%mm1, %%mm4			\n\t" /* A0-B0		a0-b0 */\
-	"psrad $" #shift ", %%mm1		\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"movq %%mm0, %%mm2			\n\t" /* A1		a1 */\
-	"paddd %%mm7, %%mm0			\n\t" /* A1+B1		a1+b1 */\
-	"psubd %%mm7, %%mm2			\n\t" /* A1-B1		a1-b1 */\
-	"psrad $" #shift ", %%mm0		\n\t"\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"packssdw %%mm1, %%mm1			\n\t" /* A0+B0	a0+b0 */\
-	"movd %%mm1, " #dst "			\n\t"\
-	"packssdw %%mm0, %%mm0			\n\t" /* A1+B1	a1+b1 */\
-	"movd %%mm0, 16+" #dst "		\n\t"\
-	"packssdw %%mm2, %%mm2			\n\t" /* A1-B1	a1-b1 */\
-	"movd %%mm2, 96+" #dst "		\n\t"\
-	"packssdw %%mm4, %%mm4			\n\t" /* A0-B0	a0-b0 */\
-	"movd %%mm4, 112+" #dst "		\n\t"\
-	"movq 88(%2), %%mm1			\n\t" /* C3	C7	C3 	C7 */\
-	"pmaddwd %%mm3, %%mm1			\n\t" /* C3R7+C7R5	C3r7+C7r5 */\
-	"movq %%mm5, %%mm2			\n\t" /* A2		a2 */\
-	"pmaddwd 104(%2), %%mm3			\n\t" /* -C1R7+C3R5	-C1r7+C3r5 */\
-	"paddd %%mm1, %%mm2			\n\t" /* A2+B2		a2+b2 */\
-	"psubd %%mm1, %%mm5			\n\t" /* a2-B2		a2-b2 */\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"psrad $" #shift ", %%mm5		\n\t"\
-	"movq %%mm6, %%mm1			\n\t" /* A3		a3 */\
-	"paddd %%mm3, %%mm6			\n\t" /* A3+B3		a3+b3 */\
-	"psubd %%mm3, %%mm1			\n\t" /* a3-B3		a3-b3 */\
-	"psrad $" #shift ", %%mm6		\n\t"\
-	"psrad $" #shift ", %%mm1		\n\t"\
-	"packssdw %%mm2, %%mm2			\n\t" /* A2+B2	a2+b2 */\
-	"packssdw %%mm6, %%mm6			\n\t" /* A3+B3	a3+b3 */\
-	"movd %%mm2, 32+" #dst "		\n\t"\
-	"packssdw %%mm1, %%mm1			\n\t" /* A3-B3	a3-b3 */\
-	"packssdw %%mm5, %%mm5			\n\t" /* A2-B2	a2-b2 */\
-	"movd %%mm6, 48+" #dst "		\n\t"\
-	"movd %%mm1, 64+" #dst "		\n\t"\
-	"movd %%mm5, 80+" #dst "		\n\t"	
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        #rounder ", %%mm4               \n\t"\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        #rounder ", %%mm0               \n\t"\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq %%mm0, %%mm5              \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "paddd %%mm1, %%mm0             \n\t" /* A1             a1 */\
+        "psubd %%mm1, %%mm5             \n\t" /* A2             a2 */\
+        "movq 56(%2), %%mm1             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        "movq 72(%2), %%mm7             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm1             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm1, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm0, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm7, %%mm0             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm7, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm1, %%mm1          \n\t" /* A0+B0  a0+b0 */\
+        "movd %%mm1, " #dst "           \n\t"\
+        "packssdw %%mm0, %%mm0          \n\t" /* A1+B1  a1+b1 */\
+        "movd %%mm0, 16+" #dst "        \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A1-B1  a1-b1 */\
+        "movd %%mm2, 96+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A0-B0  a0-b0 */\
+        "movd %%mm4, 112+" #dst "       \n\t"\
+        "movq 88(%2), %%mm1             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm5, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm1, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm1, %%mm5             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm5       \n\t"\
+        "movq %%mm6, %%mm1              \n\t" /* A3             a3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm1             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A2+B2  a2+b2 */\
+        "packssdw %%mm6, %%mm6          \n\t" /* A3+B3  a3+b3 */\
+        "movd %%mm2, 32+" #dst "        \n\t"\
+        "packssdw %%mm1, %%mm1          \n\t" /* A3-B3  a3-b3 */\
+        "packssdw %%mm5, %%mm5          \n\t" /* A2-B2  a2-b2 */\
+        "movd %%mm6, 48+" #dst "        \n\t"\
+        "movd %%mm1, 64+" #dst "        \n\t"\
+        "movd %%mm5, 80+" #dst "        \n\t"
 
 //IDCT(  src0,   src4,   src1,    src5,    dst, rounder, shift)
 IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0),/nop, 20)
 IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0),/nop, 20)
 IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),/nop, 20)
 IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
-	"jmp 9f					\n\t"
+        "jmp 9f                         \n\t"
 
-	"#.balign 16				\n\t"\
-	"6:					\n\t"
+        "#.balign 16                    \n\t"\
+        "6:                             \n\t"
 Z_COND_IDCT(  96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 7f)
 
 #undef IDCT
 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
-	"movq " #src0 ", %%mm0			\n\t" /* R4	R0	r4	r0 */\
-	"movq " #src5 ", %%mm3			\n\t" /* R7	R5	r7	r5 */\
-	"movq 16(%2), %%mm4			\n\t" /* C4	C4	C4	C4 */\
-	"pmaddwd %%mm0, %%mm4			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 24(%2), %%mm5			\n\t" /* -C4	C4	-C4	C4 */\
-	"pmaddwd %%mm5, %%mm0			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	#rounder ", %%mm4			\n\t"\
-	"movq %%mm4, %%mm6			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	#rounder ", %%mm0			\n\t"\
-	"movq %%mm0, %%mm5			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"movq 56(%2), %%mm1			\n\t" /* C7	C5	C7	C5 */\
-	"pmaddwd %%mm3, %%mm1			\n\t" /* C7R7+C5R5	C7r7+C5r5 */\
-	"movq 72(%2), %%mm7			\n\t" /* -C5	-C1	-C5	-C1 */\
-	"pmaddwd %%mm3, %%mm7			\n\t" /* -C5R7-C1R5	-C5r7-C1r5 */\
-	"paddd %%mm4, %%mm1			\n\t" /* A0+B0		a0+b0 */\
-	"paddd %%mm4, %%mm4			\n\t" /* 2A0		2a0 */\
-	"psubd %%mm1, %%mm4			\n\t" /* A0-B0		a0-b0 */\
-	"psrad $" #shift ", %%mm1		\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"movq %%mm0, %%mm2			\n\t" /* A1		a1 */\
-	"paddd %%mm7, %%mm0			\n\t" /* A1+B1		a1+b1 */\
-	"psubd %%mm7, %%mm2			\n\t" /* A1-B1		a1-b1 */\
-	"psrad $" #shift ", %%mm0		\n\t"\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"packssdw %%mm1, %%mm1			\n\t" /* A0+B0	a0+b0 */\
-	"movd %%mm1, " #dst "			\n\t"\
-	"packssdw %%mm0, %%mm0			\n\t" /* A1+B1	a1+b1 */\
-	"movd %%mm0, 16+" #dst "		\n\t"\
-	"packssdw %%mm2, %%mm2			\n\t" /* A1-B1	a1-b1 */\
-	"movd %%mm2, 96+" #dst "		\n\t"\
-	"packssdw %%mm4, %%mm4			\n\t" /* A0-B0	a0-b0 */\
-	"movd %%mm4, 112+" #dst "		\n\t"\
-	"movq 88(%2), %%mm1			\n\t" /* C3	C7	C3 	C7 */\
-	"pmaddwd %%mm3, %%mm1			\n\t" /* C3R7+C7R5	C3r7+C7r5 */\
-	"movq %%mm5, %%mm2			\n\t" /* A2		a2 */\
-	"pmaddwd 104(%2), %%mm3			\n\t" /* -C1R7+C3R5	-C1r7+C3r5 */\
-	"paddd %%mm1, %%mm2			\n\t" /* A2+B2		a2+b2 */\
-	"psubd %%mm1, %%mm5			\n\t" /* a2-B2		a2-b2 */\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"psrad $" #shift ", %%mm5		\n\t"\
-	"movq %%mm6, %%mm1			\n\t" /* A3		a3 */\
-	"paddd %%mm3, %%mm6			\n\t" /* A3+B3		a3+b3 */\
-	"psubd %%mm3, %%mm1			\n\t" /* a3-B3		a3-b3 */\
-	"psrad $" #shift ", %%mm6		\n\t"\
-	"psrad $" #shift ", %%mm1		\n\t"\
-	"packssdw %%mm2, %%mm2			\n\t" /* A2+B2	a2+b2 */\
-	"packssdw %%mm6, %%mm6			\n\t" /* A3+B3	a3+b3 */\
-	"movd %%mm2, 32+" #dst "		\n\t"\
-	"packssdw %%mm1, %%mm1			\n\t" /* A3-B3	a3-b3 */\
-	"packssdw %%mm5, %%mm5			\n\t" /* A2-B2	a2-b2 */\
-	"movd %%mm6, 48+" #dst "		\n\t"\
-	"movd %%mm1, 64+" #dst "		\n\t"\
-	"movd %%mm5, 80+" #dst "		\n\t"	
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        #rounder ", %%mm4               \n\t"\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        #rounder ", %%mm0               \n\t"\
+        "movq %%mm0, %%mm5              \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 56(%2), %%mm1             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        "movq 72(%2), %%mm7             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm1             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm1, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm0, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm7, %%mm0             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm7, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm1, %%mm1          \n\t" /* A0+B0  a0+b0 */\
+        "movd %%mm1, " #dst "           \n\t"\
+        "packssdw %%mm0, %%mm0          \n\t" /* A1+B1  a1+b1 */\
+        "movd %%mm0, 16+" #dst "        \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A1-B1  a1-b1 */\
+        "movd %%mm2, 96+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A0-B0  a0-b0 */\
+        "movd %%mm4, 112+" #dst "       \n\t"\
+        "movq 88(%2), %%mm1             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm5, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm1, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm1, %%mm5             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm5       \n\t"\
+        "movq %%mm6, %%mm1              \n\t" /* A3             a3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm1             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A2+B2  a2+b2 */\
+        "packssdw %%mm6, %%mm6          \n\t" /* A3+B3  a3+b3 */\
+        "movd %%mm2, 32+" #dst "        \n\t"\
+        "packssdw %%mm1, %%mm1          \n\t" /* A3-B3  a3-b3 */\
+        "packssdw %%mm5, %%mm5          \n\t" /* A2-B2  a2-b2 */\
+        "movd %%mm6, 48+" #dst "        \n\t"\
+        "movd %%mm1, 64+" #dst "        \n\t"\
+        "movd %%mm5, 80+" #dst "        \n\t"
 
 
 //IDCT(  src0,   src4,   src1,    src5,    dst, rounder, shift)
@@ -932,144 +932,144 @@ IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0),/nop, 20)
 IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0),/nop, 20)
 IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),/nop, 20)
 IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
-	"jmp 9f					\n\t"
+        "jmp 9f                         \n\t"
 
-	"#.balign 16				\n\t"\
-	"2:					\n\t"
+        "#.balign 16                    \n\t"\
+        "2:                             \n\t"
 Z_COND_IDCT(  96(%0),104(%0),112(%0),120(%0), 96(%1),paddd (%2), 11, 3f)
 
 #undef IDCT
 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
-	"movq " #src0 ", %%mm0			\n\t" /* R4	R0	r4	r0 */\
-	"movq " #src1 ", %%mm2			\n\t" /* R3	R1	r3	r1 */\
-	"movq " #src5 ", %%mm3			\n\t" /* R7	R5	r7	r5 */\
-	"movq 16(%2), %%mm4			\n\t" /* C4	C4	C4	C4 */\
-	"pmaddwd %%mm0, %%mm4			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 24(%2), %%mm5			\n\t" /* -C4	C4	-C4	C4 */\
-	"pmaddwd %%mm5, %%mm0			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	#rounder ", %%mm4			\n\t"\
-	"movq %%mm4, %%mm6			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 48(%2), %%mm7			\n\t" /* C3	C1	C3	C1 */\
-	#rounder ", %%mm0			\n\t"\
-	"pmaddwd %%mm2, %%mm7			\n\t" /* C3R3+C1R1	C3r3+C1r1 */\
-	"movq %%mm0, %%mm5			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"movq 56(%2), %%mm1			\n\t" /* C7	C5	C7	C5 */\
-	"pmaddwd %%mm3, %%mm1			\n\t" /* C7R7+C5R5	C7r7+C5r5 */\
-	"pmaddwd 64(%2), %%mm2			\n\t" /* -C7R3+C3R1	-C7r3+C3r1 */\
-	"paddd %%mm1, %%mm7			\n\t" /* B0		b0 */\
-	"movq 72(%2), %%mm1			\n\t" /* -C5	-C1	-C5	-C1 */\
-	"pmaddwd %%mm3, %%mm1			\n\t" /* -C5R7-C1R5	-C5r7-C1r5 */\
-	"paddd %%mm4, %%mm7			\n\t" /* A0+B0		a0+b0 */\
-	"paddd %%mm4, %%mm4			\n\t" /* 2A0		2a0 */\
-	"psubd %%mm7, %%mm4			\n\t" /* A0-B0		a0-b0 */\
-	"paddd %%mm2, %%mm1			\n\t" /* B1		b1 */\
-	"psrad $" #shift ", %%mm7		\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"movq %%mm0, %%mm2			\n\t" /* A1		a1 */\
-	"paddd %%mm1, %%mm0			\n\t" /* A1+B1		a1+b1 */\
-	"psubd %%mm1, %%mm2			\n\t" /* A1-B1		a1-b1 */\
-	"psrad $" #shift ", %%mm0		\n\t"\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"packssdw %%mm7, %%mm7			\n\t" /* A0+B0	a0+b0 */\
-	"movd %%mm7, " #dst "			\n\t"\
-	"packssdw %%mm0, %%mm0			\n\t" /* A1+B1	a1+b1 */\
-	"movd %%mm0, 16+" #dst "		\n\t"\
-	"packssdw %%mm2, %%mm2			\n\t" /* A1-B1	a1-b1 */\
-	"movd %%mm2, 96+" #dst "		\n\t"\
-	"packssdw %%mm4, %%mm4			\n\t" /* A0-B0	a0-b0 */\
-	"movd %%mm4, 112+" #dst "		\n\t"\
-	"movq " #src1 ", %%mm0			\n\t" /* R3	R1	r3	r1 */\
-	"movq 80(%2), %%mm4			\n\t" /* -C1	C5	-C1 	C5 */\
-	"pmaddwd %%mm0, %%mm4			\n\t" /* -C1R3+C5R1	-C1r3+C5r1 */\
-	"movq 88(%2), %%mm7			\n\t" /* C3	C7	C3 	C7 */\
-	"pmaddwd 96(%2), %%mm0			\n\t" /* -C5R3+C7R1	-C5r3+C7r1 */\
-	"pmaddwd %%mm3, %%mm7			\n\t" /* C3R7+C7R5	C3r7+C7r5 */\
-	"movq %%mm5, %%mm2			\n\t" /* A2		a2 */\
-	"pmaddwd 104(%2), %%mm3			\n\t" /* -C1R7+C3R5	-C1r7+C3r5 */\
-	"paddd %%mm7, %%mm4			\n\t" /* B2		b2 */\
-	"paddd %%mm4, %%mm2			\n\t" /* A2+B2		a2+b2 */\
-	"psubd %%mm4, %%mm5			\n\t" /* a2-B2		a2-b2 */\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"psrad $" #shift ", %%mm5		\n\t"\
-	"movq %%mm6, %%mm4			\n\t" /* A3		a3 */\
-	"paddd %%mm0, %%mm3			\n\t" /* B3		b3 */\
-	"paddd %%mm3, %%mm6			\n\t" /* A3+B3		a3+b3 */\
-	"psubd %%mm3, %%mm4			\n\t" /* a3-B3		a3-b3 */\
-	"psrad $" #shift ", %%mm6		\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"packssdw %%mm2, %%mm2			\n\t" /* A2+B2	a2+b2 */\
-	"packssdw %%mm6, %%mm6			\n\t" /* A3+B3	a3+b3 */\
-	"movd %%mm2, 32+" #dst "		\n\t"\
-	"packssdw %%mm4, %%mm4			\n\t" /* A3-B3	a3-b3 */\
-	"packssdw %%mm5, %%mm5			\n\t" /* A2-B2	a2-b2 */\
-	"movd %%mm6, 48+" #dst "		\n\t"\
-	"movd %%mm4, 64+" #dst "		\n\t"\
-	"movd %%mm5, 80+" #dst "		\n\t"
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq " #src5 ", %%mm3          \n\t" /* R7     R5      r7      r5 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        #rounder ", %%mm4               \n\t"\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        #rounder ", %%mm0               \n\t"\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        "movq %%mm0, %%mm5              \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 56(%2), %%mm1             \n\t" /* C7     C5      C7      C5 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* C7R7+C5R5      C7r7+C5r5 */\
+        "pmaddwd 64(%2), %%mm2          \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm1, %%mm7             \n\t" /* B0             b0 */\
+        "movq 72(%2), %%mm1             \n\t" /* -C5    -C1     -C5     -C1 */\
+        "pmaddwd %%mm3, %%mm1           \n\t" /* -C5R7-C1R5     -C5r7-C1r5 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "paddd %%mm2, %%mm1             \n\t" /* B1             b1 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm0, %%mm2              \n\t" /* A1             a1 */\
+        "paddd %%mm1, %%mm0             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm1, %%mm2             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm7, %%mm7          \n\t" /* A0+B0  a0+b0 */\
+        "movd %%mm7, " #dst "           \n\t"\
+        "packssdw %%mm0, %%mm0          \n\t" /* A1+B1  a1+b1 */\
+        "movd %%mm0, 16+" #dst "        \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A1-B1  a1-b1 */\
+        "movd %%mm2, 96+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A0-B0  a0-b0 */\
+        "movd %%mm4, 112+" #dst "       \n\t"\
+        "movq " #src1 ", %%mm0          \n\t" /* R3     R1      r3      r1 */\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "movq 88(%2), %%mm7             \n\t" /* C3     C7      C3      C7 */\
+        "pmaddwd 96(%2), %%mm0          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* C3R7+C7R5      C3r7+C7r5 */\
+        "movq %%mm5, %%mm2              \n\t" /* A2             a2 */\
+        "pmaddwd 104(%2), %%mm3         \n\t" /* -C1R7+C3R5     -C1r7+C3r5 */\
+        "paddd %%mm7, %%mm4             \n\t" /* B2             b2 */\
+        "paddd %%mm4, %%mm2             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm5             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "psrad $" #shift ", %%mm5       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm0, %%mm3             \n\t" /* B3             b3 */\
+        "paddd %%mm3, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm3, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm2, %%mm2          \n\t" /* A2+B2  a2+b2 */\
+        "packssdw %%mm6, %%mm6          \n\t" /* A3+B3  a3+b3 */\
+        "movd %%mm2, 32+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A3-B3  a3-b3 */\
+        "packssdw %%mm5, %%mm5          \n\t" /* A2-B2  a2-b2 */\
+        "movd %%mm6, 48+" #dst "        \n\t"\
+        "movd %%mm4, 64+" #dst "        \n\t"\
+        "movd %%mm5, 80+" #dst "        \n\t"
 
 //IDCT(  src0,   src4,   src1,    src5,    dst, rounder, shift)
 IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0),/nop, 20)
 IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0),/nop, 20)
 IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),/nop, 20)
 IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
-	"jmp 9f					\n\t"
+        "jmp 9f                         \n\t"
 
-	"#.balign 16				\n\t"\
-	"3:					\n\t"
+        "#.balign 16                    \n\t"\
+        "3:                             \n\t"
 #undef IDCT
 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
-	"movq " #src0 ", %%mm0			\n\t" /* R4	R0	r4	r0 */\
-	"movq " #src1 ", %%mm2			\n\t" /* R3	R1	r3	r1 */\
-	"movq 16(%2), %%mm4			\n\t" /* C4	C4	C4	C4 */\
-	"pmaddwd %%mm0, %%mm4			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 24(%2), %%mm5			\n\t" /* -C4	C4	-C4	C4 */\
-	"pmaddwd %%mm5, %%mm0			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	#rounder ", %%mm4			\n\t"\
-	"movq %%mm4, %%mm6			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 48(%2), %%mm7			\n\t" /* C3	C1	C3	C1 */\
-	#rounder ", %%mm0			\n\t"\
-	"pmaddwd %%mm2, %%mm7			\n\t" /* C3R3+C1R1	C3r3+C1r1 */\
-	"movq %%mm0, %%mm5			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"movq 64(%2), %%mm3			\n\t"\
-	"pmaddwd %%mm2, %%mm3			\n\t" /* -C7R3+C3R1	-C7r3+C3r1 */\
-	"paddd %%mm4, %%mm7			\n\t" /* A0+B0		a0+b0 */\
-	"paddd %%mm4, %%mm4			\n\t" /* 2A0		2a0 */\
-	"psubd %%mm7, %%mm4			\n\t" /* A0-B0		a0-b0 */\
-	"psrad $" #shift ", %%mm7		\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"movq %%mm0, %%mm1			\n\t" /* A1		a1 */\
-	"paddd %%mm3, %%mm0			\n\t" /* A1+B1		a1+b1 */\
-	"psubd %%mm3, %%mm1			\n\t" /* A1-B1		a1-b1 */\
-	"psrad $" #shift ", %%mm0		\n\t"\
-	"psrad $" #shift ", %%mm1		\n\t"\
-	"packssdw %%mm7, %%mm7			\n\t" /* A0+B0	a0+b0 */\
-	"movd %%mm7, " #dst "			\n\t"\
-	"packssdw %%mm0, %%mm0			\n\t" /* A1+B1	a1+b1 */\
-	"movd %%mm0, 16+" #dst "		\n\t"\
-	"packssdw %%mm1, %%mm1			\n\t" /* A1-B1	a1-b1 */\
-	"movd %%mm1, 96+" #dst "		\n\t"\
-	"packssdw %%mm4, %%mm4			\n\t" /* A0-B0	a0-b0 */\
-	"movd %%mm4, 112+" #dst "		\n\t"\
-	"movq 80(%2), %%mm4			\n\t" /* -C1	C5	-C1 	C5 */\
-	"pmaddwd %%mm2, %%mm4			\n\t" /* -C1R3+C5R1	-C1r3+C5r1 */\
-	"pmaddwd 96(%2), %%mm2			\n\t" /* -C5R3+C7R1	-C5r3+C7r1 */\
-	"movq %%mm5, %%mm1			\n\t" /* A2		a2 */\
-	"paddd %%mm4, %%mm1			\n\t" /* A2+B2		a2+b2 */\
-	"psubd %%mm4, %%mm5			\n\t" /* a2-B2		a2-b2 */\
-	"psrad $" #shift ", %%mm1		\n\t"\
-	"psrad $" #shift ", %%mm5		\n\t"\
-	"movq %%mm6, %%mm4			\n\t" /* A3		a3 */\
-	"paddd %%mm2, %%mm6			\n\t" /* A3+B3		a3+b3 */\
-	"psubd %%mm2, %%mm4			\n\t" /* a3-B3		a3-b3 */\
-	"psrad $" #shift ", %%mm6		\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"packssdw %%mm1, %%mm1			\n\t" /* A2+B2	a2+b2 */\
-	"packssdw %%mm6, %%mm6			\n\t" /* A3+B3	a3+b3 */\
-	"movd %%mm1, 32+" #dst "		\n\t"\
-	"packssdw %%mm4, %%mm4			\n\t" /* A3-B3	a3-b3 */\
-	"packssdw %%mm5, %%mm5			\n\t" /* A2-B2	a2-b2 */\
-	"movd %%mm6, 48+" #dst "		\n\t"\
-	"movd %%mm4, 64+" #dst "		\n\t"\
-	"movd %%mm5, 80+" #dst "		\n\t"
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        #rounder ", %%mm4               \n\t"\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        #rounder ", %%mm0               \n\t"\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        "movq %%mm0, %%mm5              \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 64(%2), %%mm3             \n\t"\
+        "pmaddwd %%mm2, %%mm3           \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm0, %%mm1              \n\t" /* A1             a1 */\
+        "paddd %%mm3, %%mm0             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm3, %%mm1             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "packssdw %%mm7, %%mm7          \n\t" /* A0+B0  a0+b0 */\
+        "movd %%mm7, " #dst "           \n\t"\
+        "packssdw %%mm0, %%mm0          \n\t" /* A1+B1  a1+b1 */\
+        "movd %%mm0, 16+" #dst "        \n\t"\
+        "packssdw %%mm1, %%mm1          \n\t" /* A1-B1  a1-b1 */\
+        "movd %%mm1, 96+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A0-B0  a0-b0 */\
+        "movd %%mm4, 112+" #dst "       \n\t"\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "pmaddwd %%mm2, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "pmaddwd 96(%2), %%mm2          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "movq %%mm5, %%mm1              \n\t" /* A2             a2 */\
+        "paddd %%mm4, %%mm1             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm5             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "psrad $" #shift ", %%mm5       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm2, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm2, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm1, %%mm1          \n\t" /* A2+B2  a2+b2 */\
+        "packssdw %%mm6, %%mm6          \n\t" /* A3+B3  a3+b3 */\
+        "movd %%mm1, 32+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A3-B3  a3-b3 */\
+        "packssdw %%mm5, %%mm5          \n\t" /* A2-B2  a2-b2 */\
+        "movd %%mm6, 48+" #dst "        \n\t"\
+        "movd %%mm4, 64+" #dst "        \n\t"\
+        "movd %%mm5, 80+" #dst "        \n\t"
 
 
 //IDCT(  src0,   src4,   src1,    src5,    dst, rounder, shift)
@@ -1077,186 +1077,186 @@ IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0),/nop, 20)
 IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0),/nop, 20)
 IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),/nop, 20)
 IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
-	"jmp 9f					\n\t"
+        "jmp 9f                         \n\t"
 
-	"#.balign 16				\n\t"\
-	"5:					\n\t"
+        "#.balign 16                    \n\t"\
+        "5:                             \n\t"
 #undef IDCT
 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
-	"movq " #src0 ", %%mm0			\n\t" /* R4	R0	r4	r0 */\
-	"movq " #src4 ", %%mm1			\n\t" /* R6	R2	r6	r2 */\
-	"movq 16(%2), %%mm4			\n\t" /* C4	C4	C4	C4 */\
-	"pmaddwd %%mm0, %%mm4			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 24(%2), %%mm5			\n\t" /* -C4	C4	-C4	C4 */\
-	"pmaddwd %%mm5, %%mm0			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"movq 32(%2), %%mm5			\n\t" /* C6	C2	C6	C2 */\
-	"pmaddwd %%mm1, %%mm5			\n\t" /* C6R6+C2R2	C6r6+C2r2 */\
-	"movq 40(%2), %%mm6			\n\t" /* -C2	C6	-C2	C6 */\
-	"pmaddwd %%mm6, %%mm1			\n\t" /* -C2R6+C6R2	-C2r6+C6r2 */\
-	#rounder ", %%mm4			\n\t"\
-	"movq %%mm4, %%mm6			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"paddd %%mm5, %%mm4			\n\t" /* A0		a0 */\
-	#rounder ", %%mm0			\n\t"\
-	"psubd %%mm5, %%mm6			\n\t" /* A3		a3 */\
-	"movq %%mm0, %%mm5			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"paddd %%mm1, %%mm0			\n\t" /* A1		a1 */\
-	"psubd %%mm1, %%mm5			\n\t" /* A2		a2 */\
-	"movq 8+" #src0 ", %%mm2		\n\t" /* R4	R0	r4	r0 */\
-	"movq 8+" #src4 ", %%mm3		\n\t" /* R6	R2	r6	r2 */\
-	"movq 16(%2), %%mm1			\n\t" /* C4	C4	C4	C4 */\
-	"pmaddwd %%mm2, %%mm1			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 24(%2), %%mm7			\n\t" /* -C4	C4	-C4	C4 */\
-	"pmaddwd %%mm7, %%mm2			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"movq 32(%2), %%mm7			\n\t" /* C6	C2	C6	C2 */\
-	"pmaddwd %%mm3, %%mm7			\n\t" /* C6R6+C2R2	C6r6+C2r2 */\
-	"pmaddwd 40(%2), %%mm3			\n\t" /* -C2R6+C6R2	-C2r6+C6r2 */\
-	#rounder ", %%mm1			\n\t"\
-	"paddd %%mm1, %%mm7			\n\t" /* A0		a0 */\
-	"paddd %%mm1, %%mm1			\n\t" /* 2C0		2c0 */\
-	#rounder ", %%mm2			\n\t"\
-	"psubd %%mm7, %%mm1			\n\t" /* A3		a3 */\
-	"paddd %%mm2, %%mm3			\n\t" /* A1		a1 */\
-	"paddd %%mm2, %%mm2			\n\t" /* 2C1		2c1 */\
-	"psubd %%mm3, %%mm2			\n\t" /* A2		a2 */\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"psrad $" #shift ", %%mm7		\n\t"\
-	"psrad $" #shift ", %%mm3		\n\t"\
-	"packssdw %%mm7, %%mm4			\n\t" /* A0	a0 */\
-	"movq %%mm4, " #dst "			\n\t"\
-	"psrad $" #shift ", %%mm0		\n\t"\
-	"packssdw %%mm3, %%mm0			\n\t" /* A1	a1 */\
-	"movq %%mm0, 16+" #dst "		\n\t"\
-	"movq %%mm0, 96+" #dst "		\n\t"\
-	"movq %%mm4, 112+" #dst "		\n\t"\
-	"psrad $" #shift ", %%mm5		\n\t"\
-	"psrad $" #shift ", %%mm6		\n\t"\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"packssdw %%mm2, %%mm5			\n\t" /* A2-B2	a2-b2 */\
-	"movq %%mm5, 32+" #dst "		\n\t"\
-	"psrad $" #shift ", %%mm1		\n\t"\
-	"packssdw %%mm1, %%mm6			\n\t" /* A3+B3	a3+b3 */\
-	"movq %%mm6, 48+" #dst "		\n\t"\
-	"movq %%mm6, 64+" #dst "		\n\t"\
-	"movq %%mm5, 80+" #dst "		\n\t"	
-	
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        #rounder ", %%mm4               \n\t"\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        #rounder ", %%mm0               \n\t"\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq %%mm0, %%mm5              \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "paddd %%mm1, %%mm0             \n\t" /* A1             a1 */\
+        "psubd %%mm1, %%mm5             \n\t" /* A2             a2 */\
+        "movq 8+" #src0 ", %%mm2        \n\t" /* R4     R0      r4      r0 */\
+        "movq 8+" #src4 ", %%mm3        \n\t" /* R6     R2      r6      r2 */\
+        "movq 16(%2), %%mm1             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm2, %%mm1           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm7             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm7, %%mm2           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm7             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm3, %%mm7           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "pmaddwd 40(%2), %%mm3          \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        #rounder ", %%mm1               \n\t"\
+        "paddd %%mm1, %%mm7             \n\t" /* A0             a0 */\
+        "paddd %%mm1, %%mm1             \n\t" /* 2C0            2c0 */\
+        #rounder ", %%mm2               \n\t"\
+        "psubd %%mm7, %%mm1             \n\t" /* A3             a3 */\
+        "paddd %%mm2, %%mm3             \n\t" /* A1             a1 */\
+        "paddd %%mm2, %%mm2             \n\t" /* 2C1            2c1 */\
+        "psubd %%mm3, %%mm2             \n\t" /* A2             a2 */\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm3       \n\t"\
+        "packssdw %%mm7, %%mm4          \n\t" /* A0     a0 */\
+        "movq %%mm4, " #dst "           \n\t"\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "packssdw %%mm3, %%mm0          \n\t" /* A1     a1 */\
+        "movq %%mm0, 16+" #dst "        \n\t"\
+        "movq %%mm0, 96+" #dst "        \n\t"\
+        "movq %%mm4, 112+" #dst "       \n\t"\
+        "psrad $" #shift ", %%mm5       \n\t"\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm2, %%mm5          \n\t" /* A2-B2  a2-b2 */\
+        "movq %%mm5, 32+" #dst "        \n\t"\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "packssdw %%mm1, %%mm6          \n\t" /* A3+B3  a3+b3 */\
+        "movq %%mm6, 48+" #dst "        \n\t"\
+        "movq %%mm6, 64+" #dst "        \n\t"\
+        "movq %%mm5, 80+" #dst "        \n\t"
+
 
 //IDCT(  src0,   src4,   src1,    src5,    dst, rounder, shift)
 IDCT(    0(%1), 64(%1), 32(%1),  96(%1),  0(%0),/nop, 20)
 //IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0),/nop, 20)
 IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),/nop, 20)
 //IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
-	"jmp 9f					\n\t"
+        "jmp 9f                         \n\t"
 
 
-	"#.balign 16				\n\t"\
-	"1:					\n\t"
+        "#.balign 16                    \n\t"\
+        "1:                             \n\t"
 #undef IDCT
 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
-	"movq " #src0 ", %%mm0			\n\t" /* R4	R0	r4	r0 */\
-	"movq " #src4 ", %%mm1			\n\t" /* R6	R2	r6	r2 */\
-	"movq " #src1 ", %%mm2			\n\t" /* R3	R1	r3	r1 */\
-	"movq 16(%2), %%mm4			\n\t" /* C4	C4	C4	C4 */\
-	"pmaddwd %%mm0, %%mm4			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 24(%2), %%mm5			\n\t" /* -C4	C4	-C4	C4 */\
-	"pmaddwd %%mm5, %%mm0			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"movq 32(%2), %%mm5			\n\t" /* C6	C2	C6	C2 */\
-	"pmaddwd %%mm1, %%mm5			\n\t" /* C6R6+C2R2	C6r6+C2r2 */\
-	"movq 40(%2), %%mm6			\n\t" /* -C2	C6	-C2	C6 */\
-	"pmaddwd %%mm6, %%mm1			\n\t" /* -C2R6+C6R2	-C2r6+C6r2 */\
-	#rounder ", %%mm4			\n\t"\
-	"movq %%mm4, %%mm6			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 48(%2), %%mm7			\n\t" /* C3	C1	C3	C1 */\
-	#rounder ", %%mm0			\n\t"\
-	"pmaddwd %%mm2, %%mm7			\n\t" /* C3R3+C1R1	C3r3+C1r1 */\
-	"paddd %%mm5, %%mm4			\n\t" /* A0		a0 */\
-	"psubd %%mm5, %%mm6			\n\t" /* A3		a3 */\
-	"movq %%mm0, %%mm5			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"paddd %%mm1, %%mm0			\n\t" /* A1		a1 */\
-	"psubd %%mm1, %%mm5			\n\t" /* A2		a2 */\
-	"movq 64(%2), %%mm1			\n\t"\
-	"pmaddwd %%mm2, %%mm1			\n\t" /* -C7R3+C3R1	-C7r3+C3r1 */\
-	"paddd %%mm4, %%mm7			\n\t" /* A0+B0		a0+b0 */\
-	"paddd %%mm4, %%mm4			\n\t" /* 2A0		2a0 */\
-	"psubd %%mm7, %%mm4			\n\t" /* A0-B0		a0-b0 */\
-	"psrad $" #shift ", %%mm7		\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"movq %%mm0, %%mm3			\n\t" /* A1		a1 */\
-	"paddd %%mm1, %%mm0			\n\t" /* A1+B1		a1+b1 */\
-	"psubd %%mm1, %%mm3			\n\t" /* A1-B1		a1-b1 */\
-	"psrad $" #shift ", %%mm0		\n\t"\
-	"psrad $" #shift ", %%mm3		\n\t"\
-	"packssdw %%mm7, %%mm7			\n\t" /* A0+B0	a0+b0 */\
-	"movd %%mm7, " #dst "			\n\t"\
-	"packssdw %%mm0, %%mm0			\n\t" /* A1+B1	a1+b1 */\
-	"movd %%mm0, 16+" #dst "		\n\t"\
-	"packssdw %%mm3, %%mm3			\n\t" /* A1-B1	a1-b1 */\
-	"movd %%mm3, 96+" #dst "		\n\t"\
-	"packssdw %%mm4, %%mm4			\n\t" /* A0-B0	a0-b0 */\
-	"movd %%mm4, 112+" #dst "		\n\t"\
-	"movq 80(%2), %%mm4			\n\t" /* -C1	C5	-C1 	C5 */\
-	"pmaddwd %%mm2, %%mm4			\n\t" /* -C1R3+C5R1	-C1r3+C5r1 */\
-	"pmaddwd 96(%2), %%mm2			\n\t" /* -C5R3+C7R1	-C5r3+C7r1 */\
-	"movq %%mm5, %%mm3			\n\t" /* A2		a2 */\
-	"paddd %%mm4, %%mm3			\n\t" /* A2+B2		a2+b2 */\
-	"psubd %%mm4, %%mm5			\n\t" /* a2-B2		a2-b2 */\
-	"psrad $" #shift ", %%mm3		\n\t"\
-	"psrad $" #shift ", %%mm5		\n\t"\
-	"movq %%mm6, %%mm4			\n\t" /* A3		a3 */\
-	"paddd %%mm2, %%mm6			\n\t" /* A3+B3		a3+b3 */\
-	"psubd %%mm2, %%mm4			\n\t" /* a3-B3		a3-b3 */\
-	"psrad $" #shift ", %%mm6		\n\t"\
-	"packssdw %%mm3, %%mm3			\n\t" /* A2+B2	a2+b2 */\
-	"movd %%mm3, 32+" #dst "		\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"packssdw %%mm6, %%mm6			\n\t" /* A3+B3	a3+b3 */\
-	"movd %%mm6, 48+" #dst "		\n\t"\
-	"packssdw %%mm4, %%mm4			\n\t" /* A3-B3	a3-b3 */\
-	"packssdw %%mm5, %%mm5			\n\t" /* A2-B2	a2-b2 */\
-	"movd %%mm4, 64+" #dst "		\n\t"\
-	"movd %%mm5, 80+" #dst "		\n\t"
-	
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq " #src4 ", %%mm1          \n\t" /* R6     R2      r6      r2 */\
+        "movq " #src1 ", %%mm2          \n\t" /* R3     R1      r3      r1 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm5             \n\t" /* C6     C2      C6      C2 */\
+        "pmaddwd %%mm1, %%mm5           \n\t" /* C6R6+C2R2      C6r6+C2r2 */\
+        "movq 40(%2), %%mm6             \n\t" /* -C2    C6      -C2     C6 */\
+        "pmaddwd %%mm6, %%mm1           \n\t" /* -C2R6+C6R2     -C2r6+C6r2 */\
+        #rounder ", %%mm4               \n\t"\
+        "movq %%mm4, %%mm6              \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 48(%2), %%mm7             \n\t" /* C3     C1      C3      C1 */\
+        #rounder ", %%mm0               \n\t"\
+        "pmaddwd %%mm2, %%mm7           \n\t" /* C3R3+C1R1      C3r3+C1r1 */\
+        "paddd %%mm5, %%mm4             \n\t" /* A0             a0 */\
+        "psubd %%mm5, %%mm6             \n\t" /* A3             a3 */\
+        "movq %%mm0, %%mm5              \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "paddd %%mm1, %%mm0             \n\t" /* A1             a1 */\
+        "psubd %%mm1, %%mm5             \n\t" /* A2             a2 */\
+        "movq 64(%2), %%mm1             \n\t"\
+        "pmaddwd %%mm2, %%mm1           \n\t" /* -C7R3+C3R1     -C7r3+C3r1 */\
+        "paddd %%mm4, %%mm7             \n\t" /* A0+B0          a0+b0 */\
+        "paddd %%mm4, %%mm4             \n\t" /* 2A0            2a0 */\
+        "psubd %%mm7, %%mm4             \n\t" /* A0-B0          a0-b0 */\
+        "psrad $" #shift ", %%mm7       \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "movq %%mm0, %%mm3              \n\t" /* A1             a1 */\
+        "paddd %%mm1, %%mm0             \n\t" /* A1+B1          a1+b1 */\
+        "psubd %%mm1, %%mm3             \n\t" /* A1-B1          a1-b1 */\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "psrad $" #shift ", %%mm3       \n\t"\
+        "packssdw %%mm7, %%mm7          \n\t" /* A0+B0  a0+b0 */\
+        "movd %%mm7, " #dst "           \n\t"\
+        "packssdw %%mm0, %%mm0          \n\t" /* A1+B1  a1+b1 */\
+        "movd %%mm0, 16+" #dst "        \n\t"\
+        "packssdw %%mm3, %%mm3          \n\t" /* A1-B1  a1-b1 */\
+        "movd %%mm3, 96+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A0-B0  a0-b0 */\
+        "movd %%mm4, 112+" #dst "       \n\t"\
+        "movq 80(%2), %%mm4             \n\t" /* -C1    C5      -C1     C5 */\
+        "pmaddwd %%mm2, %%mm4           \n\t" /* -C1R3+C5R1     -C1r3+C5r1 */\
+        "pmaddwd 96(%2), %%mm2          \n\t" /* -C5R3+C7R1     -C5r3+C7r1 */\
+        "movq %%mm5, %%mm3              \n\t" /* A2             a2 */\
+        "paddd %%mm4, %%mm3             \n\t" /* A2+B2          a2+b2 */\
+        "psubd %%mm4, %%mm5             \n\t" /* a2-B2          a2-b2 */\
+        "psrad $" #shift ", %%mm3       \n\t"\
+        "psrad $" #shift ", %%mm5       \n\t"\
+        "movq %%mm6, %%mm4              \n\t" /* A3             a3 */\
+        "paddd %%mm2, %%mm6             \n\t" /* A3+B3          a3+b3 */\
+        "psubd %%mm2, %%mm4             \n\t" /* a3-B3          a3-b3 */\
+        "psrad $" #shift ", %%mm6       \n\t"\
+        "packssdw %%mm3, %%mm3          \n\t" /* A2+B2  a2+b2 */\
+        "movd %%mm3, 32+" #dst "        \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "packssdw %%mm6, %%mm6          \n\t" /* A3+B3  a3+b3 */\
+        "movd %%mm6, 48+" #dst "        \n\t"\
+        "packssdw %%mm4, %%mm4          \n\t" /* A3-B3  a3-b3 */\
+        "packssdw %%mm5, %%mm5          \n\t" /* A2-B2  a2-b2 */\
+        "movd %%mm4, 64+" #dst "        \n\t"\
+        "movd %%mm5, 80+" #dst "        \n\t"
+
 
 //IDCT(  src0,   src4,   src1,    src5,    dst, rounder, shift)
 IDCT(    (%1), 64(%1), 32(%1),  96(%1),  0(%0),/nop, 20)
 IDCT(   8(%1), 72(%1), 40(%1), 104(%1),  4(%0),/nop, 20)
 IDCT(  16(%1), 80(%1), 48(%1), 112(%1),  8(%0),/nop, 20)
 IDCT(  24(%1), 88(%1), 56(%1), 120(%1), 12(%0),/nop, 20)
-	"jmp 9f					\n\t"
+        "jmp 9f                         \n\t"
 
 
-	"#.balign 16				\n\t"
-	"7:					\n\t"
+        "#.balign 16                    \n\t"
+        "7:                             \n\t"
 #undef IDCT
 #define IDCT(src0, src4, src1, src5, dst, rounder, shift) \
-	"movq " #src0 ", %%mm0			\n\t" /* R4	R0	r4	r0 */\
-	"movq 16(%2), %%mm4			\n\t" /* C4	C4	C4	C4 */\
-	"pmaddwd %%mm0, %%mm4			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 24(%2), %%mm5			\n\t" /* -C4	C4	-C4	C4 */\
-	"pmaddwd %%mm5, %%mm0			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	#rounder ", %%mm4			\n\t"\
-	#rounder ", %%mm0			\n\t"\
-	"psrad $" #shift ", %%mm4		\n\t"\
-	"psrad $" #shift ", %%mm0		\n\t"\
-	"movq 8+" #src0 ", %%mm2		\n\t" /* R4	R0	r4	r0 */\
-	"movq 16(%2), %%mm1			\n\t" /* C4	C4	C4	C4 */\
-	"pmaddwd %%mm2, %%mm1			\n\t" /* C4R4+C4R0	C4r4+C4r0 */\
-	"movq 24(%2), %%mm7			\n\t" /* -C4	C4	-C4	C4 */\
-	"pmaddwd %%mm7, %%mm2			\n\t" /* -C4R4+C4R0	-C4r4+C4r0 */\
-	"movq 32(%2), %%mm7			\n\t" /* C6	C2	C6	C2 */\
-	#rounder ", %%mm1			\n\t"\
-	#rounder ", %%mm2			\n\t"\
-	"psrad $" #shift ", %%mm1		\n\t"\
-	"packssdw %%mm1, %%mm4			\n\t" /* A0	a0 */\
-	"movq %%mm4, " #dst "			\n\t"\
-	"psrad $" #shift ", %%mm2		\n\t"\
-	"packssdw %%mm2, %%mm0			\n\t" /* A1	a1 */\
-	"movq %%mm0, 16+" #dst "		\n\t"\
-	"movq %%mm0, 96+" #dst "		\n\t"\
-	"movq %%mm4, 112+" #dst "		\n\t"\
-	"movq %%mm0, 32+" #dst "		\n\t"\
-	"movq %%mm4, 48+" #dst "		\n\t"\
-	"movq %%mm4, 64+" #dst "		\n\t"\
-	"movq %%mm0, 80+" #dst "		\n\t"	
+        "movq " #src0 ", %%mm0          \n\t" /* R4     R0      r4      r0 */\
+        "movq 16(%2), %%mm4             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm0, %%mm4           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm5             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm5, %%mm0           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        #rounder ", %%mm4               \n\t"\
+        #rounder ", %%mm0               \n\t"\
+        "psrad $" #shift ", %%mm4       \n\t"\
+        "psrad $" #shift ", %%mm0       \n\t"\
+        "movq 8+" #src0 ", %%mm2        \n\t" /* R4     R0      r4      r0 */\
+        "movq 16(%2), %%mm1             \n\t" /* C4     C4      C4      C4 */\
+        "pmaddwd %%mm2, %%mm1           \n\t" /* C4R4+C4R0      C4r4+C4r0 */\
+        "movq 24(%2), %%mm7             \n\t" /* -C4    C4      -C4     C4 */\
+        "pmaddwd %%mm7, %%mm2           \n\t" /* -C4R4+C4R0     -C4r4+C4r0 */\
+        "movq 32(%2), %%mm7             \n\t" /* C6     C2      C6      C2 */\
+        #rounder ", %%mm1               \n\t"\
+        #rounder ", %%mm2               \n\t"\
+        "psrad $" #shift ", %%mm1       \n\t"\
+        "packssdw %%mm1, %%mm4          \n\t" /* A0     a0 */\
+        "movq %%mm4, " #dst "           \n\t"\
+        "psrad $" #shift ", %%mm2       \n\t"\
+        "packssdw %%mm2, %%mm0          \n\t" /* A1     a1 */\
+        "movq %%mm0, 16+" #dst "        \n\t"\
+        "movq %%mm0, 96+" #dst "        \n\t"\
+        "movq %%mm4, 112+" #dst "       \n\t"\
+        "movq %%mm0, 32+" #dst "        \n\t"\
+        "movq %%mm4, 48+" #dst "        \n\t"\
+        "movq %%mm4, 64+" #dst "        \n\t"\
+        "movq %%mm0, 80+" #dst "        \n\t"
 
 //IDCT(  src0,   src4,   src1,    src5,    dst, rounder, shift)
 IDCT(   0(%1), 64(%1), 32(%1),  96(%1),  0(%0),/nop, 20)
@@ -1277,7 +1277,7 @@ Input
  12 32 16 36 52 72 56 76
  05 45 07 47 25 65 27 67
  15 35 17 37 55 75 57 77
-  
+
 Temp
  00 04 10 14 20 24 30 34
  40 44 50 54 60 64 70 74
@@ -1290,9 +1290,9 @@ Temp
 */
 
 "9: \n\t"
-		:: "r" (block), "r" (temp), "r" (coeffs)
-		: "%eax"
-	);
+                :: "r" (block), "r" (temp), "r" (coeffs)
+                : "%eax"
+        );
 }
 
 void ff_simple_idct_mmx(int16_t *block)
diff --git a/src/libffmpeg/libavcodec/i386/vp3dsp_mmx.c b/src/libffmpeg/libavcodec/i386/vp3dsp_mmx.c
index 3d220c1d4..0684531ae 100644
--- a/src/libffmpeg/libavcodec/i386/vp3dsp_mmx.c
+++ b/src/libffmpeg/libavcodec/i386/vp3dsp_mmx.c
@@ -13,7 +13,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
@@ -208,7 +208,7 @@ static const uint16_t idct_cosine_table[7] = {
     I(1) = d1 c1 b1 a1
     I(2) = d2 c2 b2 a2
     I(3) = d3 c3 b3 a3
-    
+
     J(4) = h0 g0 f0 e0
     J(5) = h1 g1 f1 e1
     J(6) = h2 g2 f2 e2
diff --git a/src/libffmpeg/libavcodec/i386/vp3dsp_sse2.c b/src/libffmpeg/libavcodec/i386/vp3dsp_sse2.c
index ed17891bf..cf822f7d4 100644
--- a/src/libffmpeg/libavcodec/i386/vp3dsp_sse2.c
+++ b/src/libffmpeg/libavcodec/i386/vp3dsp_sse2.c
@@ -13,7 +13,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
@@ -24,7 +24,7 @@
 #include "../dsputil.h"
 #include "mmx.h"
 
-static const unsigned short __align16 SSE2_dequant_const[] =
+static DECLARE_ALIGNED_16(const unsigned short, SSE2_dequant_const[]) =
 {
     0,65535,65535,0,0,0,0,0,    // 0x0000 0000 0000 0000 0000 FFFF FFFF 0000
     0,0,0,0,65535,65535,0,0,    // 0x0000 0000 FFFF FFFF 0000 0000 0000 0000
@@ -35,22 +35,22 @@ static const unsigned short __align16 SSE2_dequant_const[] =
     0,0,65535,65535, 0,0,0,0    // 0x0000 0000 0000 0000 FFFF FFFF 0000 0000
 };
 
-static const unsigned int __align16 eight_data[] =
-{ 
-    0x00080008, 
+static DECLARE_ALIGNED_16(const unsigned int, eight_data[]) =
+{
+    0x00080008,
+    0x00080008,
     0x00080008,
-    0x00080008, 
-    0x00080008 
-}; 
+    0x00080008
+};
 
-static const unsigned short __align16 SSE2_idct_data[7 * 8] =
+static DECLARE_ALIGNED_16(const unsigned short, SSE2_idct_data[7 * 8]) =
 {
-    64277,64277,64277,64277,64277,64277,64277,64277, 
-    60547,60547,60547,60547,60547,60547,60547,60547, 
-    54491,54491,54491,54491,54491,54491,54491,54491, 
-    46341,46341,46341,46341,46341,46341,46341,46341, 
-    36410,36410,36410,36410,36410,36410,36410,36410, 
-    25080,25080,25080,25080,25080,25080,25080,25080, 
+    64277,64277,64277,64277,64277,64277,64277,64277,
+    60547,60547,60547,60547,60547,60547,60547,60547,
+    54491,54491,54491,54491,54491,54491,54491,54491,
+    46341,46341,46341,46341,46341,46341,46341,46341,
+    36410,36410,36410,36410,36410,36410,36410,36410,
+    25080,25080,25080,25080,25080,25080,25080,25080,
     12785,12785,12785,12785,12785,12785,12785,12785
 };
 
@@ -820,6 +820,6 @@ void ff_vp3_idct_sse2(int16_t *input_data)
     SSE2_Row_IDCT();
 
     SSE2_Transpose();
-        
+
     SSE2_Column_IDCT();
 }
diff --git a/src/libffmpeg/libavcodec/idcinvideo.c b/src/libffmpeg/libavcodec/idcinvideo.c
index f5df5a49d..7e7e6aab1 100644
--- a/src/libffmpeg/libavcodec/idcinvideo.c
+++ b/src/libffmpeg/libavcodec/idcinvideo.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
diff --git a/src/libffmpeg/libavcodec/imgconvert.c b/src/libffmpeg/libavcodec/imgconvert.c
index afb66fa85..850f9b04f 100644
--- a/src/libffmpeg/libavcodec/imgconvert.c
+++ b/src/libffmpeg/libavcodec/imgconvert.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
@@ -71,7 +71,7 @@ static PixFmtInfo pix_fmt_info[PIX_FMT_NB] = {
         .color_type = FF_COLOR_YUV,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
-        .x_chroma_shift = 1, .y_chroma_shift = 1, 
+        .x_chroma_shift = 1, .y_chroma_shift = 1,
     },
     [PIX_FMT_YUV422P] = {
         .name = "yuv422p",
@@ -79,7 +79,7 @@ static PixFmtInfo pix_fmt_info[PIX_FMT_NB] = {
         .color_type = FF_COLOR_YUV,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
-        .x_chroma_shift = 1, .y_chroma_shift = 0, 
+        .x_chroma_shift = 1, .y_chroma_shift = 0,
     },
     [PIX_FMT_YUV444P] = {
         .name = "yuv444p",
@@ -87,7 +87,7 @@ static PixFmtInfo pix_fmt_info[PIX_FMT_NB] = {
         .color_type = FF_COLOR_YUV,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
-        .x_chroma_shift = 0, .y_chroma_shift = 0, 
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
     [PIX_FMT_YUV422] = {
         .name = "yuv422",
@@ -129,7 +129,7 @@ static PixFmtInfo pix_fmt_info[PIX_FMT_NB] = {
         .color_type = FF_COLOR_YUV_JPEG,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
-        .x_chroma_shift = 1, .y_chroma_shift = 1, 
+        .x_chroma_shift = 1, .y_chroma_shift = 1,
     },
     [PIX_FMT_YUVJ422P] = {
         .name = "yuvj422p",
@@ -137,7 +137,7 @@ static PixFmtInfo pix_fmt_info[PIX_FMT_NB] = {
         .color_type = FF_COLOR_YUV_JPEG,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
-        .x_chroma_shift = 1, .y_chroma_shift = 0, 
+        .x_chroma_shift = 1, .y_chroma_shift = 0,
     },
     [PIX_FMT_YUVJ444P] = {
         .name = "yuvj444p",
@@ -145,7 +145,7 @@ static PixFmtInfo pix_fmt_info[PIX_FMT_NB] = {
         .color_type = FF_COLOR_YUV_JPEG,
         .pixel_type = FF_PIXEL_PLANAR,
         .depth = 8,
-        .x_chroma_shift = 0, .y_chroma_shift = 0, 
+        .x_chroma_shift = 0, .y_chroma_shift = 0,
     },
 
     /* RGB formats */
@@ -253,21 +253,21 @@ const char *avcodec_get_pix_fmt_name(int pix_fmt)
 
 enum PixelFormat avcodec_get_pix_fmt(const char* name)
 {
-    int i; 
-    
+    int i;
+
     for (i=0; i < PIX_FMT_NB; i++)
          if (!strcmp(pix_fmt_info[i].name, name))
-	     break;
+             break;
     return i;
 }
 
 /* Picture field are filled with 'ptr' addresses. Also return size */
 int avpicture_fill(AVPicture *picture, uint8_t *ptr,
-		   int pix_fmt, int width, int height)
+                   int pix_fmt, int width, int height)
 {
     int size, w2, h2, size2;
     PixFmtInfo *pinfo;
-    
+
     if(avcodec_check_dimensions(NULL, width, height))
         goto fail;
 
@@ -361,49 +361,49 @@ int avpicture_layout(const AVPicture* src, int pix_fmt, int width, int height,
 {
     PixFmtInfo* pf = &pix_fmt_info[pix_fmt];
     int i, j, w, h, data_planes;
-    const unsigned char* s; 
+    const unsigned char* s;
     int size = avpicture_get_size(pix_fmt, width, height);
 
     if (size > dest_size || size < 0)
         return -1;
 
     if (pf->pixel_type == FF_PIXEL_PACKED || pf->pixel_type == FF_PIXEL_PALETTE) {
-        if (pix_fmt == PIX_FMT_YUV422 || 
-            pix_fmt == PIX_FMT_UYVY422 || 
+        if (pix_fmt == PIX_FMT_YUV422 ||
+            pix_fmt == PIX_FMT_UYVY422 ||
             pix_fmt == PIX_FMT_RGB565 ||
             pix_fmt == PIX_FMT_RGB555)
             w = width * 2;
-	else if (pix_fmt == PIX_FMT_UYVY411)
-	  w = width + width/2;
-	else if (pix_fmt == PIX_FMT_PAL8)
-	  w = width;
-	else
-	  w = width * (pf->depth * pf->nb_channels / 8);
-	  
-	data_planes = 1;
-	h = height;
+        else if (pix_fmt == PIX_FMT_UYVY411)
+          w = width + width/2;
+        else if (pix_fmt == PIX_FMT_PAL8)
+          w = width;
+        else
+          w = width * (pf->depth * pf->nb_channels / 8);
+
+        data_planes = 1;
+        h = height;
     } else {
         data_planes = pf->nb_channels;
-	w = (width*pf->depth + 7)/8;
-	h = height;
+        w = (width*pf->depth + 7)/8;
+        h = height;
     }
-    
+
     for (i=0; i<data_planes; i++) {
          if (i == 1) {
-	     w = width >> pf->x_chroma_shift;
-	     h = height >> pf->y_chroma_shift;
-	 }
+             w = width >> pf->x_chroma_shift;
+             h = height >> pf->y_chroma_shift;
+         }
          s = src->data[i];
-	 for(j=0; j<h; j++) {
-	     memcpy(dest, s, w);
-	     dest += w;
-	     s += src->linesize[i];
-	 }
+         for(j=0; j<h; j++) {
+             memcpy(dest, s, w);
+             dest += w;
+             s += src->linesize[i];
+         }
     }
-    
+
     if (pf->pixel_type == FF_PIXEL_PALETTE)
-	memcpy((unsigned char *)(((size_t)dest + 3) & ~3), src->data[1], 256 * 4);
-    
+        memcpy((unsigned char *)(((size_t)dest + 3) & ~3), src->data[1], 256 * 4);
+
     return size;
 }
 
@@ -414,7 +414,7 @@ int avpicture_get_size(int pix_fmt, int width, int height)
 }
 
 /**
- * compute the loss when converting from a pixel format to another 
+ * compute the loss when converting from a pixel format to another
  */
 int avcodec_get_pix_fmt_loss(int dst_pix_fmt, int src_pix_fmt,
                              int has_alpha)
@@ -450,7 +450,7 @@ int avcodec_get_pix_fmt_loss(int dst_pix_fmt, int src_pix_fmt,
         break;
     case FF_COLOR_YUV_JPEG:
         if (ps->color_type != FF_COLOR_YUV_JPEG &&
-            ps->color_type != FF_COLOR_YUV && 
+            ps->color_type != FF_COLOR_YUV &&
             ps->color_type != FF_COLOR_GRAY)
             loss |= FF_LOSS_COLORSPACE;
         break;
@@ -465,7 +465,7 @@ int avcodec_get_pix_fmt_loss(int dst_pix_fmt, int src_pix_fmt,
         loss |= FF_LOSS_CHROMA;
     if (!pf->is_alpha && (ps->is_alpha && has_alpha))
         loss |= FF_LOSS_ALPHA;
-    if (pf->pixel_type == FF_PIXEL_PALETTE && 
+    if (pf->pixel_type == FF_PIXEL_PALETTE &&
         (ps->pixel_type != FF_PIXEL_PALETTE && ps->color_type != FF_COLOR_GRAY))
         loss |= FF_LOSS_COLORQUANT;
     return loss;
@@ -486,9 +486,9 @@ static int avg_bits_per_pixel(int pix_fmt)
         case PIX_FMT_RGB555:
             bits = 16;
             break;
-	case PIX_FMT_UYVY411:
-	    bits = 12;
-	    break;
+        case PIX_FMT_UYVY411:
+            bits = 12;
+            break;
         default:
             bits = pf->depth * pf->nb_channels;
             break;
@@ -498,7 +498,7 @@ static int avg_bits_per_pixel(int pix_fmt)
         if (pf->x_chroma_shift == 0 && pf->y_chroma_shift == 0) {
             bits = pf->depth * pf->nb_channels;
         } else {
-            bits = pf->depth + ((2 * pf->depth) >> 
+            bits = pf->depth + ((2 * pf->depth) >>
                                 (pf->x_chroma_shift + pf->y_chroma_shift));
         }
         break;
@@ -512,7 +512,7 @@ static int avg_bits_per_pixel(int pix_fmt)
     return bits;
 }
 
-static int avcodec_find_best_pix_fmt1(int pix_fmt_mask, 
+static int avcodec_find_best_pix_fmt1(int pix_fmt_mask,
                                       int src_pix_fmt,
                                       int has_alpha,
                                       int loss_mask)
@@ -537,8 +537,8 @@ static int avcodec_find_best_pix_fmt1(int pix_fmt_mask,
     return dst_pix_fmt;
 }
 
-/** 
- * find best pixel format to convert to. Return -1 if none found 
+/**
+ * find best pixel format to convert to. Return -1 if none found
  */
 int avcodec_find_best_pix_fmt(int pix_fmt_mask, int src_pix_fmt,
                               int has_alpha, int *loss_ptr)
@@ -558,7 +558,7 @@ int avcodec_find_best_pix_fmt(int pix_fmt_mask, int src_pix_fmt,
     i = 0;
     for(;;) {
         loss_mask = loss_mask_order[i++];
-        dst_pix_fmt = avcodec_find_best_pix_fmt1(pix_fmt_mask, src_pix_fmt, 
+        dst_pix_fmt = avcodec_find_best_pix_fmt1(pix_fmt_mask, src_pix_fmt,
                                                  has_alpha, loss_mask);
         if (dst_pix_fmt >= 0)
             goto found;
@@ -572,11 +572,11 @@ int avcodec_find_best_pix_fmt(int pix_fmt_mask, int src_pix_fmt,
     return dst_pix_fmt;
 }
 
-static void img_copy_plane(uint8_t *dst, int dst_wrap, 
+static void img_copy_plane(uint8_t *dst, int dst_wrap,
                            const uint8_t *src, int src_wrap,
                            int width, int height)
 {
-    if((!dst) || (!src)) 
+    if((!dst) || (!src))
         return;
     for(;height > 0; height--) {
         memcpy(dst, src, width);
@@ -593,7 +593,7 @@ void img_copy(AVPicture *dst, const AVPicture *src,
 {
     int bwidth, bits, i;
     PixFmtInfo *pf = &pix_fmt_info[pix_fmt];
-    
+
     pf = &pix_fmt_info[pix_fmt];
     switch(pf->pixel_type) {
     case FF_PIXEL_PACKED:
@@ -604,9 +604,9 @@ void img_copy(AVPicture *dst, const AVPicture *src,
         case PIX_FMT_RGB555:
             bits = 16;
             break;
-	case PIX_FMT_UYVY411:
-	    bits = 12;
-	    break;
+        case PIX_FMT_UYVY411:
+            bits = 12;
+            break;
         default:
             bits = pf->depth * pf->nb_channels;
             break;
@@ -651,7 +651,7 @@ static void yuv422_to_yuv420p(AVPicture *dst, const AVPicture *src,
     const uint8_t *p, *p1;
     uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1;
     int w;
- 
+
     p1 = src->data[0];
     lum1 = dst->data[0];
     cb1 = dst->data[1];
@@ -707,9 +707,9 @@ static void uyvy422_to_yuv420p(AVPicture *dst, const AVPicture *src,
     const uint8_t *p, *p1;
     uint8_t *lum, *cr, *cb, *lum1, *cr1, *cb1;
     int w;
- 
+
     p1 = src->data[0];
-    
+
     lum1 = dst->data[0];
     cb1 = dst->data[1];
     cr1 = dst->data[2];
@@ -910,11 +910,11 @@ static void uyvy411_to_yuv411p(AVPicture *dst, const AVPicture *src,
         cr = cr1;
         for(w = width; w >= 4; w -= 4) {
             cb[0] = p[0];
-	    lum[0] = p[1];
+            lum[0] = p[1];
             lum[1] = p[2];
             cr[0] = p[3];
-	    lum[2] = p[4];
-	    lum[3] = p[5];
+            lum[2] = p[4];
+            lum[3] = p[5];
             p += 6;
             lum += 4;
             cb++;
@@ -936,24 +936,24 @@ static void yuv420p_to_yuv422(AVPicture *dst, const AVPicture *src,
     uint8_t *lum1, *lum2, *lumsrc = src->data[0];
     uint8_t *cb1, *cb2 = src->data[1];
     uint8_t *cr1, *cr2 = src->data[2];
-    
+
     for(h = height / 2; h--;) {
         line1 = linesrc;
         line2 = linesrc + dst->linesize[0];
-        
+
         lum1 = lumsrc;
         lum2 = lumsrc + src->linesize[0];
-        
+
         cb1 = cb2;
         cr1 = cr2;
-        
+
         for(w = width / 2; w--;) {
-                *line1++ = *lum1++; *line2++ = *lum2++;                     
-                *line1++ =          *line2++ = *cb1++;                      
-                *line1++ = *lum1++; *line2++ = *lum2++;                     
+                *line1++ = *lum1++; *line2++ = *lum2++;
+                *line1++ =          *line2++ = *cb1++;
+                *line1++ = *lum1++; *line2++ = *lum2++;
                 *line1++ =          *line2++ = *cr1++;
         }
-        
+
         linesrc += dst->linesize[0] * 2;
         lumsrc += src->linesize[0] * 2;
         cb2 += src->linesize[1];
@@ -969,24 +969,24 @@ static void yuv420p_to_uyvy422(AVPicture *dst, const AVPicture *src,
     uint8_t *lum1, *lum2, *lumsrc = src->data[0];
     uint8_t *cb1, *cb2 = src->data[1];
     uint8_t *cr1, *cr2 = src->data[2];
-    
+
     for(h = height / 2; h--;) {
         line1 = linesrc;
         line2 = linesrc + dst->linesize[0];
-        
+
         lum1 = lumsrc;
         lum2 = lumsrc + src->linesize[0];
-        
+
         cb1 = cb2;
         cr1 = cr2;
-        
+
         for(w = width / 2; w--;) {
-                *line1++ =          *line2++ = *cb1++;                      
-                *line1++ = *lum1++; *line2++ = *lum2++;                     
+                *line1++ =          *line2++ = *cb1++;
+                *line1++ = *lum1++; *line2++ = *lum2++;
                 *line1++ =          *line2++ = *cr1++;
-                *line1++ = *lum1++; *line2++ = *lum2++;                     
+                *line1++ = *lum1++; *line2++ = *lum2++;
         }
-        
+
         linesrc += dst->linesize[0] * 2;
         lumsrc += src->linesize[0] * 2;
         cb2 += src->linesize[1];
@@ -996,7 +996,7 @@ static void yuv420p_to_uyvy422(AVPicture *dst, const AVPicture *src,
 
 #define SCALEBITS 10
 #define ONE_HALF  (1 << (SCALEBITS - 1))
-#define FIX(x)	  ((int) ((x) * (1<<SCALEBITS) + 0.5))
+#define FIX(x)    ((int) ((x) * (1<<SCALEBITS) + 0.5))
 
 #define YUV_TO_RGB1_CCIR(cb1, cr1)\
 {\
@@ -1046,7 +1046,7 @@ static void yuv420p_to_uyvy422(AVPicture *dst, const AVPicture *src,
 static inline int C_JPEG_TO_CCIR(int y) {
     y = (((y - 128) * FIX(112.0/127.0) + (ONE_HALF + (128 << SCALEBITS))) >> SCALEBITS);
     if (y < 16)
-	y = 16;
+        y = 16;
     return y;
 }
 
@@ -1095,7 +1095,7 @@ static void img_convert_init(void)
 }
 
 /* apply to each pixel the given table */
-static void img_apply_table(uint8_t *dst, int dst_wrap, 
+static void img_apply_table(uint8_t *dst, int dst_wrap,
                             const uint8_t *src, int src_wrap,
                             int width, int height, const uint8_t *table1)
 {
@@ -1133,7 +1133,7 @@ static void img_apply_table(uint8_t *dst, int dst_wrap,
 /* XXX: in most cases, the sampling position is incorrect */
 
 /* 4x1 -> 1x1 */
-static void shrink41(uint8_t *dst, int dst_wrap, 
+static void shrink41(uint8_t *dst, int dst_wrap,
                      const uint8_t *src, int src_wrap,
                      int width, int height)
 {
@@ -1155,7 +1155,7 @@ static void shrink41(uint8_t *dst, int dst_wrap,
 }
 
 /* 2x1 -> 1x1 */
-static void shrink21(uint8_t *dst, int dst_wrap, 
+static void shrink21(uint8_t *dst, int dst_wrap,
                      const uint8_t *src, int src_wrap,
                      int width, int height)
 {
@@ -1177,7 +1177,7 @@ static void shrink21(uint8_t *dst, int dst_wrap,
 }
 
 /* 1x2 -> 1x1 */
-static void shrink12(uint8_t *dst, int dst_wrap, 
+static void shrink12(uint8_t *dst, int dst_wrap,
                      const uint8_t *src, int src_wrap,
                      int width, int height)
 {
@@ -1210,7 +1210,7 @@ static void shrink12(uint8_t *dst, int dst_wrap,
 }
 
 /* 2x2 -> 1x1 */
-static void shrink22(uint8_t *dst, int dst_wrap, 
+static void shrink22(uint8_t *dst, int dst_wrap,
                      const uint8_t *src, int src_wrap,
                      int width, int height)
 {
@@ -1243,7 +1243,7 @@ static void shrink22(uint8_t *dst, int dst_wrap,
 }
 
 /* 4x4 -> 1x1 */
-static void shrink44(uint8_t *dst, int dst_wrap, 
+static void shrink44(uint8_t *dst, int dst_wrap,
                      const uint8_t *src, int src_wrap,
                      int width, int height)
 {
@@ -1371,7 +1371,7 @@ static void grow44(uint8_t *dst, int dst_wrap,
 }
 
 /* 1x2 -> 2x1 */
-static void conv411(uint8_t *dst, int dst_wrap, 
+static void conv411(uint8_t *dst, int dst_wrap,
                     const uint8_t *src, int src_wrap,
                     int width, int height)
 {
@@ -1419,7 +1419,7 @@ static void build_rgb_palette(uint8_t *palette, int has_alpha)
     for(r = 0; r < 6; r++) {
         for(g = 0; g < 6; g++) {
             for(b = 0; b < 6; b++) {
-                pal[i++] = (0xff << 24) | (pal_value[r] << 16) | 
+                pal[i++] = (0xff << 24) | (pal_value[r] << 16) |
                     (pal_value[g] << 8) | pal_value[b];
             }
         }
@@ -1584,7 +1584,7 @@ static void mono_to_gray(AVPicture *dst, const AVPicture *src,
     q = dst->data[0];
     dst_wrap = dst->linesize[0] - width;
     for(y=0;y<height;y++) {
-        w = width; 
+        w = width;
         while (w >= 8) {
             v = *p++ ^ xor_mask;
             q[0] = -(v >> 7);
@@ -1681,14 +1681,14 @@ static void gray_to_monoblack(AVPicture *dst, const AVPicture *src,
 
 typedef struct ConvertEntry {
     void (*convert)(AVPicture *dst,
-		    const AVPicture *src, int width, int height);
+                    const AVPicture *src, int width, int height);
 } ConvertEntry;
 
 /* Add each new convertion function in this table. In order to be able
    to convert from any format to any format, the following constraints
    must be satisfied:
 
-   - all FF_COLOR_RGB formats must convert to and from PIX_FMT_RGB24 
+   - all FF_COLOR_RGB formats must convert to and from PIX_FMT_RGB24
 
    - all FF_COLOR_GRAY formats must convert to and from PIX_FMT_GRAY8
 
@@ -1706,213 +1706,213 @@ static ConvertEntry convert_table[PIX_FMT_NB][PIX_FMT_NB] = {
         [PIX_FMT_YUV422] = {
             .convert = yuv420p_to_yuv422,
         },
-        [PIX_FMT_RGB555] = { 
+        [PIX_FMT_RGB555] = {
             .convert = yuv420p_to_rgb555
         },
-        [PIX_FMT_RGB565] = { 
+        [PIX_FMT_RGB565] = {
             .convert = yuv420p_to_rgb565
         },
-        [PIX_FMT_BGR24] = { 
+        [PIX_FMT_BGR24] = {
             .convert = yuv420p_to_bgr24
         },
-        [PIX_FMT_RGB24] = { 
+        [PIX_FMT_RGB24] = {
             .convert = yuv420p_to_rgb24
         },
-        [PIX_FMT_RGBA32] = { 
+        [PIX_FMT_RGBA32] = {
             .convert = yuv420p_to_rgba32
         },
-	[PIX_FMT_UYVY422] = { 
+        [PIX_FMT_UYVY422] = {
             .convert = yuv420p_to_uyvy422,
         },
     },
-    [PIX_FMT_YUV422P] = { 
-        [PIX_FMT_YUV422] = { 
+    [PIX_FMT_YUV422P] = {
+        [PIX_FMT_YUV422] = {
             .convert = yuv422p_to_yuv422,
         },
-        [PIX_FMT_UYVY422] = { 
+        [PIX_FMT_UYVY422] = {
             .convert = yuv422p_to_uyvy422,
         },
     },
-    [PIX_FMT_YUV444P] = { 
-        [PIX_FMT_RGB24] = { 
+    [PIX_FMT_YUV444P] = {
+        [PIX_FMT_RGB24] = {
             .convert = yuv444p_to_rgb24
         },
     },
     [PIX_FMT_YUVJ420P] = {
-        [PIX_FMT_RGB555] = { 
+        [PIX_FMT_RGB555] = {
             .convert = yuvj420p_to_rgb555
         },
-        [PIX_FMT_RGB565] = { 
+        [PIX_FMT_RGB565] = {
             .convert = yuvj420p_to_rgb565
         },
-        [PIX_FMT_BGR24] = { 
+        [PIX_FMT_BGR24] = {
             .convert = yuvj420p_to_bgr24
         },
-        [PIX_FMT_RGB24] = { 
+        [PIX_FMT_RGB24] = {
             .convert = yuvj420p_to_rgb24
         },
-        [PIX_FMT_RGBA32] = { 
+        [PIX_FMT_RGBA32] = {
             .convert = yuvj420p_to_rgba32
         },
     },
-    [PIX_FMT_YUVJ444P] = { 
-        [PIX_FMT_RGB24] = { 
+    [PIX_FMT_YUVJ444P] = {
+        [PIX_FMT_RGB24] = {
             .convert = yuvj444p_to_rgb24
         },
     },
-    [PIX_FMT_YUV422] = { 
-        [PIX_FMT_YUV420P] = { 
+    [PIX_FMT_YUV422] = {
+        [PIX_FMT_YUV420P] = {
             .convert = yuv422_to_yuv420p,
         },
-        [PIX_FMT_YUV422P] = { 
+        [PIX_FMT_YUV422P] = {
             .convert = yuv422_to_yuv422p,
         },
     },
-    [PIX_FMT_UYVY422] = { 
-        [PIX_FMT_YUV420P] = { 
+    [PIX_FMT_UYVY422] = {
+        [PIX_FMT_YUV420P] = {
             .convert = uyvy422_to_yuv420p,
         },
-        [PIX_FMT_YUV422P] = { 
+        [PIX_FMT_YUV422P] = {
             .convert = uyvy422_to_yuv422p,
         },
     },
     [PIX_FMT_RGB24] = {
-        [PIX_FMT_YUV420P] = { 
+        [PIX_FMT_YUV420P] = {
             .convert = rgb24_to_yuv420p
         },
-        [PIX_FMT_RGB565] = { 
+        [PIX_FMT_RGB565] = {
             .convert = rgb24_to_rgb565
         },
-        [PIX_FMT_RGB555] = { 
+        [PIX_FMT_RGB555] = {
             .convert = rgb24_to_rgb555
         },
-        [PIX_FMT_RGBA32] = { 
+        [PIX_FMT_RGBA32] = {
             .convert = rgb24_to_rgba32
         },
-        [PIX_FMT_BGR24] = { 
+        [PIX_FMT_BGR24] = {
             .convert = rgb24_to_bgr24
         },
-        [PIX_FMT_GRAY8] = { 
+        [PIX_FMT_GRAY8] = {
             .convert = rgb24_to_gray
         },
         [PIX_FMT_PAL8] = {
             .convert = rgb24_to_pal8
         },
-        [PIX_FMT_YUV444P] = { 
+        [PIX_FMT_YUV444P] = {
             .convert = rgb24_to_yuv444p
         },
-        [PIX_FMT_YUVJ420P] = { 
+        [PIX_FMT_YUVJ420P] = {
             .convert = rgb24_to_yuvj420p
         },
-        [PIX_FMT_YUVJ444P] = { 
+        [PIX_FMT_YUVJ444P] = {
             .convert = rgb24_to_yuvj444p
         },
     },
     [PIX_FMT_RGBA32] = {
-        [PIX_FMT_RGB24] = { 
+        [PIX_FMT_RGB24] = {
             .convert = rgba32_to_rgb24
         },
-        [PIX_FMT_RGB555] = { 
+        [PIX_FMT_RGB555] = {
             .convert = rgba32_to_rgb555
         },
-        [PIX_FMT_PAL8] = { 
+        [PIX_FMT_PAL8] = {
             .convert = rgba32_to_pal8
         },
-        [PIX_FMT_YUV420P] = { 
+        [PIX_FMT_YUV420P] = {
             .convert = rgba32_to_yuv420p
         },
-        [PIX_FMT_GRAY8] = { 
+        [PIX_FMT_GRAY8] = {
             .convert = rgba32_to_gray
         },
     },
     [PIX_FMT_BGR24] = {
-        [PIX_FMT_RGB24] = { 
+        [PIX_FMT_RGB24] = {
             .convert = bgr24_to_rgb24
         },
-        [PIX_FMT_YUV420P] = { 
+        [PIX_FMT_YUV420P] = {
             .convert = bgr24_to_yuv420p
         },
-        [PIX_FMT_GRAY8] = { 
+        [PIX_FMT_GRAY8] = {
             .convert = bgr24_to_gray
         },
     },
     [PIX_FMT_RGB555] = {
-        [PIX_FMT_RGB24] = { 
+        [PIX_FMT_RGB24] = {
             .convert = rgb555_to_rgb24
         },
-        [PIX_FMT_RGBA32] = { 
+        [PIX_FMT_RGBA32] = {
             .convert = rgb555_to_rgba32
         },
-        [PIX_FMT_YUV420P] = { 
+        [PIX_FMT_YUV420P] = {
             .convert = rgb555_to_yuv420p
         },
-        [PIX_FMT_GRAY8] = { 
+        [PIX_FMT_GRAY8] = {
             .convert = rgb555_to_gray
         },
     },
     [PIX_FMT_RGB565] = {
-        [PIX_FMT_RGB24] = { 
+        [PIX_FMT_RGB24] = {
             .convert = rgb565_to_rgb24
         },
-        [PIX_FMT_YUV420P] = { 
+        [PIX_FMT_YUV420P] = {
             .convert = rgb565_to_yuv420p
         },
-        [PIX_FMT_GRAY8] = { 
+        [PIX_FMT_GRAY8] = {
             .convert = rgb565_to_gray
         },
     },
     [PIX_FMT_GRAY8] = {
-        [PIX_FMT_RGB555] = { 
+        [PIX_FMT_RGB555] = {
             .convert = gray_to_rgb555
         },
-        [PIX_FMT_RGB565] = { 
+        [PIX_FMT_RGB565] = {
             .convert = gray_to_rgb565
         },
-        [PIX_FMT_RGB24] = { 
+        [PIX_FMT_RGB24] = {
             .convert = gray_to_rgb24
         },
-        [PIX_FMT_BGR24] = { 
+        [PIX_FMT_BGR24] = {
             .convert = gray_to_bgr24
         },
-        [PIX_FMT_RGBA32] = { 
+        [PIX_FMT_RGBA32] = {
             .convert = gray_to_rgba32
         },
-        [PIX_FMT_MONOWHITE] = { 
+        [PIX_FMT_MONOWHITE] = {
             .convert = gray_to_monowhite
         },
-        [PIX_FMT_MONOBLACK] = { 
+        [PIX_FMT_MONOBLACK] = {
             .convert = gray_to_monoblack
         },
     },
     [PIX_FMT_MONOWHITE] = {
-        [PIX_FMT_GRAY8] = { 
+        [PIX_FMT_GRAY8] = {
             .convert = monowhite_to_gray
         },
     },
     [PIX_FMT_MONOBLACK] = {
-        [PIX_FMT_GRAY8] = { 
+        [PIX_FMT_GRAY8] = {
             .convert = monoblack_to_gray
         },
     },
     [PIX_FMT_PAL8] = {
-        [PIX_FMT_RGB555] = { 
+        [PIX_FMT_RGB555] = {
             .convert = pal8_to_rgb555
         },
-        [PIX_FMT_RGB565] = { 
+        [PIX_FMT_RGB565] = {
             .convert = pal8_to_rgb565
         },
-        [PIX_FMT_BGR24] = { 
+        [PIX_FMT_BGR24] = {
             .convert = pal8_to_bgr24
         },
-        [PIX_FMT_RGB24] = { 
+        [PIX_FMT_RGB24] = {
             .convert = pal8_to_rgb24
         },
-        [PIX_FMT_RGBA32] = { 
+        [PIX_FMT_RGBA32] = {
             .convert = pal8_to_rgba32
         },
     },
-    [PIX_FMT_UYVY411] = { 
-        [PIX_FMT_YUV411P] = { 
+    [PIX_FMT_UYVY411] = {
+        [PIX_FMT_YUV411P] = {
             .convert = uyvy411_to_yuv411p,
         },
     },
@@ -1947,13 +1947,13 @@ void avpicture_free(AVPicture *picture)
 static inline int is_yuv_planar(PixFmtInfo *ps)
 {
     return (ps->color_type == FF_COLOR_YUV ||
-            ps->color_type == FF_COLOR_YUV_JPEG) && 
+            ps->color_type == FF_COLOR_YUV_JPEG) &&
         ps->pixel_type == FF_PIXEL_PLANAR;
 }
 
 /* XXX: always use linesize. Return -1 if not supported */
 int img_convert(AVPicture *dst, int dst_pix_fmt,
-                const AVPicture *src, int src_pix_fmt, 
+                const AVPicture *src, int src_pix_fmt,
                 int src_width, int src_height)
 {
     static int inited;
@@ -2023,7 +2023,7 @@ int img_convert(AVPicture *dst, int dst_pix_fmt,
     }
 
     /* YUV to gray */
-    if (is_yuv_planar(src_pix) && 
+    if (is_yuv_planar(src_pix) &&
         dst_pix_fmt == PIX_FMT_GRAY8) {
         if (src_pix->color_type == FF_COLOR_YUV_JPEG) {
             img_copy_plane(dst->data[0], dst->linesize[0],
@@ -2041,7 +2041,7 @@ int img_convert(AVPicture *dst, int dst_pix_fmt,
     /* YUV to YUV planar */
     if (is_yuv_planar(dst_pix) && is_yuv_planar(src_pix)) {
         int x_shift, y_shift, w, h, xy_shift;
-        void (*resize_func)(uint8_t *dst, int dst_wrap, 
+        void (*resize_func)(uint8_t *dst, int dst_wrap,
                             const uint8_t *src, int src_wrap,
                             int width, int height);
 
@@ -2128,7 +2128,7 @@ int img_convert(AVPicture *dst, int dst_pix_fmt,
             for(i = 1;i <= 2; i++)
                 img_apply_table(dst->data[i], dst->linesize[i],
                                 dst->data[i], dst->linesize[i],
-                                dst_width>>dst_pix->x_chroma_shift, 
+                                dst_width>>dst_pix->x_chroma_shift,
                                 dst_height>>dst_pix->y_chroma_shift,
                                 c_table);
         }
@@ -2150,12 +2150,12 @@ int img_convert(AVPicture *dst, int dst_pix_fmt,
         /* specific case: convert to YUV411P first */
         int_pix_fmt = PIX_FMT_YUV411P;
     } else if ((src_pix->color_type == FF_COLOR_GRAY &&
-                src_pix_fmt != PIX_FMT_GRAY8) || 
+                src_pix_fmt != PIX_FMT_GRAY8) ||
                (dst_pix->color_type == FF_COLOR_GRAY &&
                 dst_pix_fmt != PIX_FMT_GRAY8)) {
         /* gray8 is the normalized format */
         int_pix_fmt = PIX_FMT_GRAY8;
-    } else if ((is_yuv_planar(src_pix) && 
+    } else if ((is_yuv_planar(src_pix) &&
                 src_pix_fmt != PIX_FMT_YUV444P &&
                 src_pix_fmt != PIX_FMT_YUVJ444P)) {
         /* yuv444 is the normalized format */
@@ -2163,7 +2163,7 @@ int img_convert(AVPicture *dst, int dst_pix_fmt,
             int_pix_fmt = PIX_FMT_YUVJ444P;
         else
             int_pix_fmt = PIX_FMT_YUV444P;
-    } else if ((is_yuv_planar(dst_pix) && 
+    } else if ((is_yuv_planar(dst_pix) &&
                 dst_pix_fmt != PIX_FMT_YUV444P &&
                 dst_pix_fmt != PIX_FMT_YUVJ444P)) {
         /* yuv444 is the normalized format */
@@ -2200,7 +2200,7 @@ static int get_alpha_info_pal8(const AVPicture *src, int width, int height)
     int src_wrap, ret, x, y;
     unsigned int a;
     uint32_t *palette = (uint32_t *)src->data[1];
-    
+
     p = src->data[0];
     src_wrap = src->linesize[0] - width;
     ret = 0;
@@ -2224,7 +2224,7 @@ static int get_alpha_info_pal8(const AVPicture *src, int width, int height)
  * @return ored mask of FF_ALPHA_xxx constants
  */
 int img_get_alpha_info(const AVPicture *src,
-		       int pix_fmt, int width, int height)
+                       int pix_fmt, int width, int height)
 {
     PixFmtInfo *pf = &pix_fmt_info[pix_fmt];
     int ret;
@@ -2299,11 +2299,11 @@ int img_get_alpha_info(const AVPicture *src,
 #endif
 
 /* filter parameters: [-1 4 2 4 -1] // 8 */
-static void deinterlace_line(uint8_t *dst, 
-			     const uint8_t *lum_m4, const uint8_t *lum_m3, 
-			     const uint8_t *lum_m2, const uint8_t *lum_m1, 
-			     const uint8_t *lum,
-			     int size)
+static void deinterlace_line(uint8_t *dst,
+                             const uint8_t *lum_m4, const uint8_t *lum_m3,
+                             const uint8_t *lum_m2, const uint8_t *lum_m1,
+                             const uint8_t *lum,
+                             int size)
 {
 #ifndef HAVE_MMX
     uint8_t *cm = cropTbl + MAX_NEG_CROP;
@@ -2421,7 +2421,7 @@ static void deinterlace_bottom_field(uint8_t *dst, int dst_wrap,
 }
 
 static void deinterlace_bottom_field_inplace(uint8_t *src1, int src_wrap,
-					     int width, int height)
+                                             int width, int height)
 {
     uint8_t *src_m1, *src_0, *src_p1, *src_p2;
     int y;
@@ -2455,7 +2455,7 @@ int avpicture_deinterlace(AVPicture *dst, const AVPicture *src,
     if (pix_fmt != PIX_FMT_YUV420P &&
         pix_fmt != PIX_FMT_YUV422P &&
         pix_fmt != PIX_FMT_YUV444P &&
-	pix_fmt != PIX_FMT_YUV411P)
+        pix_fmt != PIX_FMT_YUV411P)
         return -1;
     if ((width & 3) != 0 || (height & 3) != 0)
         return -1;
diff --git a/src/libffmpeg/libavcodec/imgconvert_template.h b/src/libffmpeg/libavcodec/imgconvert_template.h
index cd5a7313c..e58b0cae2 100644
--- a/src/libffmpeg/libavcodec/imgconvert_template.h
+++ b/src/libffmpeg/libavcodec/imgconvert_template.h
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #ifndef RGB_OUT
@@ -787,7 +787,7 @@ static void glue(RGB_NAME, _to_pal8)(AVPicture *dst, const AVPicture *src,
     q = dst->data[0];
     dst_wrap = dst->linesize[0] - width;
     has_alpha = 0;
-    
+
     for(y=0;y<height;y++) {
         for(x=0;x<width;x++) {
 #ifdef RGBA_IN
@@ -817,11 +817,11 @@ static void glue(RGB_NAME, _to_pal8)(AVPicture *dst, const AVPicture *src,
 }
 
 #endif /* defined(FMT_RGB24) || defined(FMT_RGBA32) */
-        
+
 #ifdef RGBA_IN
 
 static int glue(get_alpha_info_, RGB_NAME)(const AVPicture *src,
-					   int width, int height)
+                                           int width, int height)
 {
     const unsigned char *p;
     int src_wrap, ret, x, y;
diff --git a/src/libffmpeg/libavcodec/imgresample.c b/src/libffmpeg/libavcodec/imgresample.c
index d423f388c..906fde3f2 100644
--- a/src/libffmpeg/libavcodec/imgresample.c
+++ b/src/libffmpeg/libavcodec/imgresample.c
@@ -1,5 +1,5 @@
 /*
- * High quality image resampling with polyphase filters 
+ * High quality image resampling with polyphase filters
  * Copyright (c) 2001 Fabrice Bellard.
  *
  * This library is free software; you can redistribute it and/or
@@ -14,14 +14,14 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
- 
+
 /**
  * @file imgresample.c
  * High quality image resampling with polyphase filters .
  */
- 
+
 #include "avcodec.h"
 #include "dsputil.h"
 
@@ -50,8 +50,8 @@ struct ImgReSampleContext {
     int padtop, padbottom, padleft, padright;
     int pad_owidth, pad_oheight;
     int h_incr, v_incr;
-    int16_t h_filters[NB_PHASES][NB_TAPS] __align8; /* horizontal filters */
-    int16_t v_filters[NB_PHASES][NB_TAPS] __align8; /* vertical filters */
+    DECLARE_ALIGNED_8(int16_t, h_filters[NB_PHASES][NB_TAPS]); /* horizontal filters */
+    DECLARE_ALIGNED_8(int16_t, v_filters[NB_PHASES][NB_TAPS]); /* vertical filters */
     uint8_t *line_buf;
 };
 
@@ -64,8 +64,8 @@ static inline int get_phase(int pos)
 
 /* This function must be optimized */
 static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
-			    int src_width, int src_start, int src_incr,
-			    int16_t *filters)
+                            int src_width, int src_start, int src_incr,
+                            int16_t *filters)
 {
     int src_pos, phase, sum, i;
     const uint8_t *s;
@@ -108,7 +108,7 @@ static void h_resample_fast(uint8_t *dst, int dst_width, const uint8_t *src,
 
 /* This function must be optimized */
 static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
-		       int wrap, int16_t *filter)
+                       int wrap, int16_t *filter)
 {
     int sum, i;
     const uint8_t *s;
@@ -167,14 +167,14 @@ static void v_resample(uint8_t *dst, int dst_width, const uint8_t *src,
 
 /* XXX: do four pixels at a time */
 static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
-				 const uint8_t *src, int src_width,
+                                 const uint8_t *src, int src_width,
                                  int src_start, int src_incr, int16_t *filters)
 {
     int src_pos, phase;
     const uint8_t *s;
     int16_t *filter;
     mmx_t tmp;
-    
+
     src_pos = src_start;
     pxor_r2r(mm7, mm7);
 
@@ -212,13 +212,13 @@ static void h_resample_fast4_mmx(uint8_t *dst, int dst_width,
 }
 
 static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
-			    int wrap, int16_t *filter)
+                            int wrap, int16_t *filter)
 {
     int sum, i, v;
     const uint8_t *s;
     mmx_t tmp;
     mmx_t coefs[4];
-    
+
     for(i=0;i<4;i++) {
         v = filter[i];
         coefs[i].uw[0] = v;
@@ -226,7 +226,7 @@ static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
         coefs[i].uw[2] = v;
         coefs[i].uw[3] = v;
     }
-    
+
     pxor_r2r(mm7, mm7);
     s = src;
     while (dst_width >= 4) {
@@ -248,7 +248,7 @@ static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
         paddw_r2r(mm3, mm2);
         paddw_r2r(mm2, mm0);
         psraw_i2r(FILTER_BITS, mm0);
-        
+
         packuswb_r2r(mm7, mm0);
         movq_r2m(mm0, tmp);
 
@@ -277,24 +277,24 @@ static void v_resample4_mmx(uint8_t *dst, int dst_width, const uint8_t *src,
 #endif
 
 #ifdef HAVE_ALTIVEC
-typedef	union {
+typedef         union {
     vector unsigned char v;
     unsigned char c[16];
 } vec_uc_t;
 
-typedef	union {
+typedef         union {
     vector signed short v;
     signed short s[8];
 } vec_ss_t;
 
 void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
-			  int wrap, int16_t *filter)
+                          int wrap, int16_t *filter)
 {
     int sum, i;
     const uint8_t *s;
     vector unsigned char *tv, tmp, dstv, zero;
     vec_ss_t srchv[4], srclv[4], fv[4];
-    vector signed short zeros, sumhv, sumlv;    
+    vector signed short zeros, sumhv, sumlv;
     s = src;
 
     for(i=0;i<4;i++)
@@ -308,7 +308,7 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
         fv[i].s[0] = filter[i] << (15-FILTER_BITS);
         fv[i].v = vec_splat(fv[i].v, 0);
     }
-    
+
     zero = vec_splat_u8(0);
     zeros = vec_splat_s16(0);
 
@@ -334,7 +334,7 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
         dst_width--;
         i--;
     }
-    
+
     /* Do our altivec resampling on 16 pixels at once. */
     while(dst_width>=16) {
         /*
@@ -371,14 +371,14 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
         srclv[3].v = (vector signed short) vec_mergel(zero, tmp);
         sumhv = vec_madds(srchv[3].v, fv[3].v, sumhv);
         sumlv = vec_madds(srclv[3].v, fv[3].v, sumlv);
-    
+
         /*
            Pack the results into our destination vector,
            and do an aligned write of that back to memory.
         */
         dstv = vec_packsu(sumhv, sumlv) ;
         vec_st(dstv, 0, (vector unsigned char *) dst);
-        
+
         dst+=16;
         s+=16;
         dst_width-=16;
@@ -405,7 +405,7 @@ void v_resample16_altivec(uint8_t *dst, int dst_width, const uint8_t *src,
 
 /* slow version to handle limit cases. Does not need optimisation */
 static void h_resample_slow(uint8_t *dst, int dst_width,
-			    const uint8_t *src, int src_width,
+                            const uint8_t *src, int src_width,
                             int src_start, int src_incr, int16_t *filters)
 {
     int src_pos, phase, sum, j, v, i;
@@ -441,8 +441,8 @@ static void h_resample_slow(uint8_t *dst, int dst_width,
 }
 
 static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
-		       int src_width, int src_start, int src_incr,
-		       int16_t *filters)
+                       int src_width, int src_start, int src_incr,
+                       int16_t *filters)
 {
     int n, src_end;
 
@@ -455,29 +455,29 @@ static void h_resample(uint8_t *dst, int dst_width, const uint8_t *src,
     }
     src_end = src_start + dst_width * src_incr;
     if (src_end > ((src_width - NB_TAPS) << POS_FRAC_BITS)) {
-        n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) / 
+        n = (((src_width - NB_TAPS + 1) << POS_FRAC_BITS) - 1 - src_start) /
             src_incr;
     } else {
         n = dst_width;
     }
 #ifdef HAVE_MMX
     if ((mm_flags & MM_MMX) && NB_TAPS == 4)
-        h_resample_fast4_mmx(dst, n, 
+        h_resample_fast4_mmx(dst, n,
                              src, src_width, src_start, src_incr, filters);
     else
 #endif
-        h_resample_fast(dst, n, 
+        h_resample_fast(dst, n,
                         src, src_width, src_start, src_incr, filters);
     if (n < dst_width) {
         dst += n;
         dst_width -= n;
         src_start += n * src_incr;
-        h_resample_slow(dst, dst_width, 
+        h_resample_slow(dst, dst_width,
                         src, src_width, src_start, src_incr, filters);
     }
 }
 
-static void component_resample(ImgReSampleContext *s, 
+static void component_resample(ImgReSampleContext *s,
                                uint8_t *output, int owrap, int owidth, int oheight,
                                uint8_t *input, int iwrap, int iwidth, int iheight)
 {
@@ -486,7 +486,7 @@ static void component_resample(ImgReSampleContext *s,
 
     last_src_y = - FCENTER - 1;
     /* position of the bottom of the filter in the source image */
-    src_y = (last_src_y + NB_TAPS) * POS_FRAC; 
+    src_y = (last_src_y + NB_TAPS) * POS_FRAC;
     ring_y = NB_TAPS; /* position in ring buffer */
     for(y=0;y<oheight;y++) {
         /* apply horizontal filter on new lines from input if needed */
@@ -506,8 +506,8 @@ static void component_resample(ImgReSampleContext *s,
             src_line = input + y1 * iwrap;
             new_line = s->line_buf + ring_y * owidth;
             /* apply filter and handle limit cases correctly */
-            h_resample(new_line, owidth, 
-                       src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr, 
+            h_resample(new_line, owidth,
+                       src_line, iwidth, - FCENTER * POS_FRAC, s->h_incr,
                        &s->h_filters[0][0]);
             /* handle ring buffer wraping */
             if (ring_y >= LINE_BUF_HEIGHT) {
@@ -520,8 +520,8 @@ static void component_resample(ImgReSampleContext *s,
 #ifdef HAVE_MMX
         /* desactivated MMX because loss of precision */
         if ((mm_flags & MM_MMX) && NB_TAPS == 4 && 0)
-            v_resample4_mmx(output, owidth, 
-                            s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, 
+            v_resample4_mmx(output, owidth,
+                            s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
                             &s->v_filters[phase_y][0]);
         else
 #endif
@@ -532,12 +532,12 @@ static void component_resample(ImgReSampleContext *s,
                                 &s->v_filters[phase_y][0]);
         else
 #endif
-            v_resample(output, owidth, 
-                       s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth, 
+            v_resample(output, owidth,
+                       s->line_buf + (ring_y - NB_TAPS + 1) * owidth, owidth,
                        &s->v_filters[phase_y][0]);
-            
+
         src_y += s->v_incr;
-        
+
         output += owrap;
     }
 }
@@ -545,7 +545,7 @@ static void component_resample(ImgReSampleContext *s,
 ImgReSampleContext *img_resample_init(int owidth, int oheight,
                                       int iwidth, int iheight)
 {
-    return img_resample_full_init(owidth, oheight, iwidth, iheight, 
+    return img_resample_full_init(owidth, oheight, iwidth, iheight,
             0, 0, 0, 0, 0, 0, 0, 0);
 }
 
@@ -559,7 +559,7 @@ ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
     ImgReSampleContext *s;
 
     if (!owidth || !oheight || !iwidth || !iheight)
-	return NULL;
+        return NULL;
 
     s = av_mallocz(sizeof(ImgReSampleContext));
     if (!s)
@@ -567,19 +567,19 @@ ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
     if((unsigned)owidth >= UINT_MAX / (LINE_BUF_HEIGHT + NB_TAPS))
         return NULL;
     s->line_buf = av_mallocz(owidth * (LINE_BUF_HEIGHT + NB_TAPS));
-    if (!s->line_buf) 
+    if (!s->line_buf)
         goto fail;
-    
+
     s->owidth = owidth;
     s->oheight = oheight;
     s->iwidth = iwidth;
     s->iheight = iheight;
-  
+
     s->topBand = topBand;
     s->bottomBand = bottomBand;
     s->leftBand = leftBand;
     s->rightBand = rightBand;
-    
+
     s->padtop = padtop;
     s->padbottom = padbottom;
     s->padleft = padleft;
@@ -589,11 +589,11 @@ ImgReSampleContext *img_resample_full_init(int owidth, int oheight,
     s->pad_oheight = oheight - (padtop + padbottom);
 
     s->h_incr = ((iwidth - leftBand - rightBand) * POS_FRAC) / s->pad_owidth;
-    s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight; 
+    s->v_incr = ((iheight - topBand - bottomBand) * POS_FRAC) / s->pad_oheight;
 
-    av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth  / 
+    av_build_filter(&s->h_filters[0][0], (float) s->pad_owidth  /
             (float) (iwidth - leftBand - rightBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
-    av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight / 
+    av_build_filter(&s->v_filters[0][0], (float) s->pad_oheight /
             (float) (iheight - topBand - bottomBand), NB_TAPS, NB_PHASES, 1<<FILTER_BITS, 0);
 
     return s;
@@ -602,7 +602,7 @@ fail:
     return NULL;
 }
 
-void img_resample(ImgReSampleContext *s, 
+void img_resample(ImgReSampleContext *s,
                   AVPicture *output, const AVPicture *input)
 {
     int i, shift;
@@ -611,14 +611,14 @@ void img_resample(ImgReSampleContext *s,
     for (i=0;i<3;i++) {
         shift = (i == 0) ? 0 : 1;
 
-        optr = output->data[i] + (((output->linesize[i] * 
+        optr = output->data[i] + (((output->linesize[i] *
                         s->padtop) + s->padleft) >> shift);
 
-        component_resample(s, optr, output->linesize[i], 
+        component_resample(s, optr, output->linesize[i],
                 s->pad_owidth >> shift, s->pad_oheight >> shift,
-                input->data[i] + (input->linesize[i] * 
+                input->data[i] + (input->linesize[i] *
                     (s->topBand >> shift)) + (s->leftBand >> shift),
-                input->linesize[i], ((s->iwidth - s->leftBand - 
+                input->linesize[i], ((s->iwidth - s->leftBand -
                         s->rightBand) >> shift),
                            (s->iheight - s->topBand - s->bottomBand) >> shift);
     }
@@ -690,20 +690,20 @@ int main(int argc, char **argv)
                     else
                         v = 0x00;
                 } else if (x < XSIZE/4) {
-                    if (x & 1) 
+                    if (x & 1)
                         v = 0xff;
-                    else 
+                    else
                         v = 0;
                 } else if (y < XSIZE/4) {
-                    if (y & 1) 
+                    if (y & 1)
                         v = 0xff;
-                    else 
+                    else
                         v = 0;
                 } else {
                     if (y < YSIZE*3/8) {
-                        if ((y+x) & 1) 
+                        if ((y+x) & 1)
                             v = 0xff;
-                        else 
+                        else
                             v = 0;
                     } else {
                         if (((x+3) % 4) <= 1 &&
diff --git a/src/libffmpeg/libavcodec/indeo2.c b/src/libffmpeg/libavcodec/indeo2.c
index 7001beb12..3814e5250 100644
--- a/src/libffmpeg/libavcodec/indeo2.c
+++ b/src/libffmpeg/libavcodec/indeo2.c
@@ -14,10 +14,10 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
- 
+
 /**
  * @file indeo2.c
  * Intel Indeo 2 decoder.
@@ -51,7 +51,7 @@ static int ir2_decode_plane(Ir2Context *ctx, int width, int height, uint8_t *dst
     int out = 0;
     int c;
     int t;
-    
+
     if(width&1)
         return -1;
 
@@ -70,7 +70,7 @@ static int ir2_decode_plane(Ir2Context *ctx, int width, int height, uint8_t *dst
         }
     }
     dst += stride;
-    
+
     for (j = 1; j < height; j++){
         out = 0;
         while (out < width){
@@ -133,7 +133,7 @@ static int ir2_decode_plane_inter(Ir2Context *ctx, int width, int height, uint8_
     return 0;
 }
 
-static int ir2_decode_frame(AVCodecContext *avctx, 
+static int ir2_decode_frame(AVCodecContext *avctx,
                         void *data, int *data_size,
                         uint8_t *buf, int buf_size)
 {
@@ -153,9 +153,9 @@ static int ir2_decode_frame(AVCodecContext *avctx,
     }
 
     s->decode_delta = buf[18];
-    
+
     /* decide whether frame uses deltas or not */
-#ifndef ALT_BITSTREAM_READER_LE  
+#ifndef ALT_BITSTREAM_READER_LE
     for (i = 0; i < buf_size; i++)
         buf[i] = ff_reverse[buf[i]];
 #endif
@@ -193,16 +193,16 @@ static int ir2_decode_init(AVCodecContext *avctx){
     ic->avctx = avctx;
 
     avctx->pix_fmt= PIX_FMT_YUV410P;
-    
+
     if (!ir2_vlc.table)
         init_vlc(&ir2_vlc, CODE_VLC_BITS, IR2_CODES,
                  &ir2_codes[0][1], 4, 2,
 #ifdef ALT_BITSTREAM_READER_LE
-                 &ir2_codes[0][0], 4, 2, INIT_VLC_USE_STATIC | INIT_VLC_LE);    
+                 &ir2_codes[0][0], 4, 2, INIT_VLC_USE_STATIC | INIT_VLC_LE);
 #else
-                 &ir2_codes[0][0], 4, 2, INIT_VLC_USE_STATIC);    
+                 &ir2_codes[0][0], 4, 2, INIT_VLC_USE_STATIC);
 #endif
-                 
+
     return 0;
 }
 
diff --git a/src/libffmpeg/libavcodec/indeo2data.h b/src/libffmpeg/libavcodec/indeo2data.h
index 58e7e48dc..2430b53c3 100644
--- a/src/libffmpeg/libavcodec/indeo2data.h
+++ b/src/libffmpeg/libavcodec/indeo2data.h
@@ -1,41 +1,41 @@
 #define IR2_CODES 143
 static const uint16_t ir2_codes[IR2_CODES][2] = {
 #ifdef ALT_BITSTREAM_READER_LE
-{0x0000,  3}, {0x0004,  3}, {0x0006,  3}, {0x0001,  5}, 
-{0x0009,  5}, {0x0019,  5}, {0x000D,  5}, {0x001D,  5}, 
-{0x0023,  6}, {0x0013,  6}, {0x0033,  6}, {0x000B,  6}, 
-{0x002B,  6}, {0x001B,  6}, {0x0007,  8}, {0x0087,  8}, 
-{0x0027,  8}, {0x00A7,  8}, {0x0067,  8}, {0x00E7,  8}, 
-{0x0097,  8}, {0x0057,  8}, {0x0037,  8}, {0x00B7,  8}, 
-{0x00F7,  8}, {0x000F,  9}, {0x008F,  9}, {0x018F,  9}, 
-{0x014F,  9}, {0x00CF,  9}, {0x002F,  9}, {0x012F,  9}, 
-{0x01AF,  9}, {0x006F,  9}, {0x00EF,  9}, {0x01EF,  9}, 
-{0x001F, 10}, {0x021F, 10}, {0x011F, 10}, {0x031F, 10}, 
-{0x009F, 10}, {0x029F, 10}, {0x019F, 10}, {0x039F, 10}, 
-{0x005F, 10}, {0x025F, 10}, {0x015F, 10}, {0x035F, 10}, 
-{0x00DF, 10}, {0x02DF, 10}, {0x01DF, 10}, {0x03DF, 10}, 
-{0x003F, 13}, {0x103F, 13}, {0x083F, 13}, {0x183F, 13}, 
-{0x043F, 13}, {0x143F, 13}, {0x0C3F, 13}, {0x1C3F, 13}, 
-{0x023F, 13}, {0x123F, 13}, {0x0A3F, 13}, {0x1A3F, 13}, 
-{0x063F, 13}, {0x163F, 13}, {0x0E3F, 13}, {0x1E3F, 13}, 
-{0x013F, 13}, {0x113F, 13}, {0x093F, 13}, {0x193F, 13}, 
-{0x053F, 13}, {0x153F, 13}, {0x0D3F, 13}, {0x1D3F, 13}, 
-{0x033F, 13}, {0x133F, 13}, {0x0B3F, 13}, {0x1B3F, 13}, 
-{0x073F, 13}, {0x173F, 13}, {0x0F3F, 13}, {0x1F3F, 13}, 
-{0x00BF, 13}, {0x10BF, 13}, {0x08BF, 13}, {0x18BF, 13}, 
-{0x04BF, 13}, {0x14BF, 13}, {0x0CBF, 13}, {0x1CBF, 13}, 
-{0x02BF, 13}, {0x12BF, 13}, {0x0ABF, 13}, {0x1ABF, 13}, 
-{0x06BF, 13}, {0x16BF, 13}, {0x0EBF, 13}, {0x1EBF, 13}, 
-{0x01BF, 13}, {0x11BF, 13}, {0x09BF, 13}, {0x19BF, 13}, 
-{0x05BF, 13}, {0x15BF, 13}, {0x0DBF, 13}, {0x1DBF, 13}, 
-{0x03BF, 13}, {0x13BF, 13}, {0x0BBF, 13}, {0x1BBF, 13}, 
-{0x07BF, 13}, {0x17BF, 13}, {0x0FBF, 13}, {0x1FBF, 13}, 
-{0x007F, 14}, {0x207F, 14}, {0x107F, 14}, {0x307F, 14}, 
-{0x087F, 14}, {0x287F, 14}, {0x187F, 14}, {0x387F, 14}, 
-{0x047F, 14}, {0x247F, 14}, {0x147F, 14}, {0x0002,  3}, 
-{0x0011,  5}, {0x0005,  5}, {0x0015,  5}, {0x0003,  6}, 
-{0x003B,  6}, {0x0047,  8}, {0x00C7,  8}, {0x0017,  8}, 
-{0x00D7,  8}, {0x0077,  8}, {0x010F,  9}, {0x004F,  9}, 
+{0x0000,  3}, {0x0004,  3}, {0x0006,  3}, {0x0001,  5},
+{0x0009,  5}, {0x0019,  5}, {0x000D,  5}, {0x001D,  5},
+{0x0023,  6}, {0x0013,  6}, {0x0033,  6}, {0x000B,  6},
+{0x002B,  6}, {0x001B,  6}, {0x0007,  8}, {0x0087,  8},
+{0x0027,  8}, {0x00A7,  8}, {0x0067,  8}, {0x00E7,  8},
+{0x0097,  8}, {0x0057,  8}, {0x0037,  8}, {0x00B7,  8},
+{0x00F7,  8}, {0x000F,  9}, {0x008F,  9}, {0x018F,  9},
+{0x014F,  9}, {0x00CF,  9}, {0x002F,  9}, {0x012F,  9},
+{0x01AF,  9}, {0x006F,  9}, {0x00EF,  9}, {0x01EF,  9},
+{0x001F, 10}, {0x021F, 10}, {0x011F, 10}, {0x031F, 10},
+{0x009F, 10}, {0x029F, 10}, {0x019F, 10}, {0x039F, 10},
+{0x005F, 10}, {0x025F, 10}, {0x015F, 10}, {0x035F, 10},
+{0x00DF, 10}, {0x02DF, 10}, {0x01DF, 10}, {0x03DF, 10},
+{0x003F, 13}, {0x103F, 13}, {0x083F, 13}, {0x183F, 13},
+{0x043F, 13}, {0x143F, 13}, {0x0C3F, 13}, {0x1C3F, 13},
+{0x023F, 13}, {0x123F, 13}, {0x0A3F, 13}, {0x1A3F, 13},
+{0x063F, 13}, {0x163F, 13}, {0x0E3F, 13}, {0x1E3F, 13},
+{0x013F, 13}, {0x113F, 13}, {0x093F, 13}, {0x193F, 13},
+{0x053F, 13}, {0x153F, 13}, {0x0D3F, 13}, {0x1D3F, 13},
+{0x033F, 13}, {0x133F, 13}, {0x0B3F, 13}, {0x1B3F, 13},
+{0x073F, 13}, {0x173F, 13}, {0x0F3F, 13}, {0x1F3F, 13},
+{0x00BF, 13}, {0x10BF, 13}, {0x08BF, 13}, {0x18BF, 13},
+{0x04BF, 13}, {0x14BF, 13}, {0x0CBF, 13}, {0x1CBF, 13},
+{0x02BF, 13}, {0x12BF, 13}, {0x0ABF, 13}, {0x1ABF, 13},
+{0x06BF, 13}, {0x16BF, 13}, {0x0EBF, 13}, {0x1EBF, 13},
+{0x01BF, 13}, {0x11BF, 13}, {0x09BF, 13}, {0x19BF, 13},
+{0x05BF, 13}, {0x15BF, 13}, {0x0DBF, 13}, {0x1DBF, 13},
+{0x03BF, 13}, {0x13BF, 13}, {0x0BBF, 13}, {0x1BBF, 13},
+{0x07BF, 13}, {0x17BF, 13}, {0x0FBF, 13}, {0x1FBF, 13},
+{0x007F, 14}, {0x207F, 14}, {0x107F, 14}, {0x307F, 14},
+{0x087F, 14}, {0x287F, 14}, {0x187F, 14}, {0x387F, 14},
+{0x047F, 14}, {0x247F, 14}, {0x147F, 14}, {0x0002,  3},
+{0x0011,  5}, {0x0005,  5}, {0x0015,  5}, {0x0003,  6},
+{0x003B,  6}, {0x0047,  8}, {0x00C7,  8}, {0x0017,  8},
+{0x00D7,  8}, {0x0077,  8}, {0x010F,  9}, {0x004F,  9},
 {0x01CF,  9}, {0x00AF,  9}, {0x016F,  9},
 #else
     {0x0000,  3}, {0x0001,  3}, {0x0003,  3}, {0x0010,  5},
@@ -82,32 +82,32 @@ static const uint8_t ir2_luma_table[256] = {
  0x81, 0x7B, 0x85, 0x7F, 0x7B, 0x81, 0x8C, 0x8C,
  0x74, 0x74, 0x83, 0x8D, 0x7D, 0x73, 0x8D, 0x83,
  0x73, 0x7D, 0x77, 0x89, 0x89, 0x77, 0x89, 0x77,
- 0x77, 0x89, 0x8C, 0x95, 0x74, 0x6B, 0x95, 0x8C, 
+ 0x77, 0x89, 0x8C, 0x95, 0x74, 0x6B, 0x95, 0x8C,
  0x6B, 0x74, 0x7C, 0x90, 0x84, 0x70, 0x90, 0x7C,
- 0x70, 0x84, 0x96, 0x96, 0x6A, 0x6A, 0x82, 0x98, 
+ 0x70, 0x84, 0x96, 0x96, 0x6A, 0x6A, 0x82, 0x98,
  0x7E, 0x68, 0x98, 0x82, 0x68, 0x7E, 0x97, 0xA2,
- 0x69, 0x5E, 0xA2, 0x97, 0x5E, 0x69, 0xA2, 0xA2, 
+ 0x69, 0x5E, 0xA2, 0x97, 0x5E, 0x69, 0xA2, 0xA2,
  0x5E, 0x5E, 0x8B, 0xA3, 0x75, 0x5D, 0xA3, 0x8B,
- 0x5D, 0x75, 0x71, 0x95, 0x8F, 0x6B, 0x95, 0x71, 
+ 0x5D, 0x75, 0x71, 0x95, 0x8F, 0x6B, 0x95, 0x71,
  0x6B, 0x8F, 0x78, 0x9D, 0x88, 0x63, 0x9D, 0x78,
- 0x63, 0x88, 0x7F, 0xA7, 0x81, 0x59, 0xA7, 0x7F, 
+ 0x63, 0x88, 0x7F, 0xA7, 0x81, 0x59, 0xA7, 0x7F,
  0x59, 0x81, 0xA4, 0xB1, 0x5C, 0x4F, 0xB1, 0xA4,
- 0x4F, 0x5C, 0x96, 0xB1, 0x6A, 0x4F, 0xB1, 0x96, 
+ 0x4F, 0x5C, 0x96, 0xB1, 0x6A, 0x4F, 0xB1, 0x96,
  0x4F, 0x6A, 0xB2, 0xB2, 0x4E, 0x4E, 0x65, 0x9B,
- 0x9B, 0x65, 0x9B, 0x65, 0x65, 0x9B, 0x89, 0xB4, 
+ 0x9B, 0x65, 0x9B, 0x65, 0x65, 0x9B, 0x89, 0xB4,
  0x77, 0x4C, 0xB4, 0x89, 0x4C, 0x77, 0x6A, 0xA3,
- 0x96, 0x5D, 0xA3, 0x6A, 0x5D, 0x96, 0x73, 0xAC, 
+ 0x96, 0x5D, 0xA3, 0x6A, 0x5D, 0x96, 0x73, 0xAC,
  0x8D, 0x54, 0xAC, 0x73, 0x54, 0x8D, 0xB4, 0xC3,
- 0x4C, 0x3D, 0xC3, 0xB4, 0x3D, 0x4C, 0xA4, 0xC3, 
+ 0x4C, 0x3D, 0xC3, 0xB4, 0x3D, 0x4C, 0xA4, 0xC3,
  0x5C, 0x3D, 0xC3, 0xA4, 0x3D, 0x5C, 0xC4, 0xC4,
- 0x3C, 0x3C, 0x96, 0xC6, 0x6A, 0x3A, 0xC6, 0x96, 
+ 0x3C, 0x3C, 0x96, 0xC6, 0x6A, 0x3A, 0xC6, 0x96,
  0x3A, 0x6A, 0x7C, 0xBA, 0x84, 0x46, 0xBA, 0x7C,
- 0x46, 0x84, 0x5B, 0xAB, 0xA5, 0x55, 0xAB, 0x5B, 
+ 0x46, 0x84, 0x5B, 0xAB, 0xA5, 0x55, 0xAB, 0x5B,
  0x55, 0xA5, 0x63, 0xB4, 0x9D, 0x4C, 0xB4, 0x63,
- 0x4C, 0x9D, 0x86, 0xCA, 0x7A, 0x36, 0xCA, 0x86, 
+ 0x4C, 0x9D, 0x86, 0xCA, 0x7A, 0x36, 0xCA, 0x86,
  0x36, 0x7A, 0xB6, 0xD7, 0x4A, 0x29, 0xD7, 0xB6,
- 0x29, 0x4A, 0xC8, 0xD7, 0x38, 0x29, 0xD7, 0xC8, 
+ 0x29, 0x4A, 0xC8, 0xD7, 0x38, 0x29, 0xD7, 0xC8,
  0x29, 0x38, 0xA4, 0xD8, 0x5C, 0x28, 0xD8, 0xA4,
- 0x28, 0x5C, 0x6C, 0xC1, 0x94, 0x3F, 0xC1, 0x6C, 
+ 0x28, 0x5C, 0x6C, 0xC1, 0x94, 0x3F, 0xC1, 0x6C,
  0x3F, 0x94, 0xD9, 0xD9, 0x27, 0x27, 0x80, 0x80
 };
diff --git a/src/libffmpeg/libavcodec/indeo3.c b/src/libffmpeg/libavcodec/indeo3.c
index 351af2191..90eb37150 100644
--- a/src/libffmpeg/libavcodec/indeo3.c
+++ b/src/libffmpeg/libavcodec/indeo3.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include <stdio.h>
@@ -61,7 +61,7 @@ static int corrector_type_0[24] = {
 
 static int corrector_type_2[8] = { 9, 7, 6, 8, 5, 4, 3, 2 };
 
-static void build_modpred(Indeo3DecodeContext *s) 
+static void build_modpred(Indeo3DecodeContext *s)
 {
   int i, j;
 
@@ -70,13 +70,13 @@ static void build_modpred(Indeo3DecodeContext *s)
   for (i=0; i < 128; ++i) {
     s->ModPred[i+0*128] = (i > 126) ? 254 : 2*((i + 1) - ((i + 1) % 2));
     s->ModPred[i+1*128] = (i == 7)  ?  20 : ((i == 119 || i == 120)
-				 ? 236 : 2*((i + 2) - ((i + 1) % 3)));
+                                 ? 236 : 2*((i + 2) - ((i + 1) % 3)));
     s->ModPred[i+2*128] = (i > 125) ? 248 : 2*((i + 2) - ((i + 2) % 4));
-    s->ModPred[i+3*128] =			 2*((i + 1) - ((i - 3) % 5));
+    s->ModPred[i+3*128] =                        2*((i + 1) - ((i - 3) % 5));
     s->ModPred[i+4*128] = (i == 8)  ?  20 : 2*((i + 1) - ((i - 3) % 6));
-    s->ModPred[i+5*128] =			 2*((i + 4) - ((i + 3) % 7));
+    s->ModPred[i+5*128] =                        2*((i + 4) - ((i + 3) % 7));
     s->ModPred[i+6*128] = (i > 123) ? 240 : 2*((i + 4) - ((i + 4) % 8));
-    s->ModPred[i+7*128] =			 2*((i + 5) - ((i + 4) % 9));
+    s->ModPred[i+7*128] =                        2*((i + 5) - ((i + 4) % 9));
   }
 
   s->corrector_type = (unsigned short *) av_malloc (24 * 256 * sizeof(unsigned short));
@@ -84,14 +84,14 @@ static void build_modpred(Indeo3DecodeContext *s)
   for (i=0; i < 24; ++i) {
     for (j=0; j < 256; ++j) {
       s->corrector_type[i*256+j] = (j < corrector_type_0[i])
-				? 1 : ((j < 248 || (i == 16 && j == 248))
-				       ? 0 : corrector_type_2[j - 248]);
+                                ? 1 : ((j < 248 || (i == 16 && j == 248))
+                                       ? 0 : corrector_type_2[j - 248]);
     }
   }
 }
 
-static void iv_Decode_Chunk(Indeo3DecodeContext *s, unsigned char *cur, 
-  unsigned char *ref, int width, int height, unsigned char *buf1, 
+static void iv_Decode_Chunk(Indeo3DecodeContext *s, unsigned char *cur,
+  unsigned char *ref, int width, int height, unsigned char *buf1,
   long fflags2, unsigned char *hdr,
   unsigned char *buf2, int min_width_160);
 
@@ -100,7 +100,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s, unsigned char *cur,
 #endif
 
 /* ---------------------------------------------------------------------- */
-static void iv_alloc_frames(Indeo3DecodeContext *s) 
+static void iv_alloc_frames(Indeo3DecodeContext *s)
 {
   int luma_width, luma_height, luma_pixels, chroma_width, chroma_height,
       chroma_pixels, i;
@@ -109,9 +109,9 @@ static void iv_alloc_frames(Indeo3DecodeContext *s)
   luma_width   = (s->width  + 3) & (~3);
   luma_height  = (s->height + 3) & (~3);
 
-  s->iv_frame[0].y_w = s->iv_frame[0].y_h = 
+  s->iv_frame[0].y_w = s->iv_frame[0].y_h =
     s->iv_frame[0].the_buf_size = 0;
-  s->iv_frame[1].y_w = s->iv_frame[1].y_h = 
+  s->iv_frame[1].y_w = s->iv_frame[1].y_h =
     s->iv_frame[1].the_buf_size = 0;
   s->iv_frame[1].the_buf = NULL;
 
@@ -120,11 +120,11 @@ static void iv_alloc_frames(Indeo3DecodeContext *s)
   luma_pixels = luma_width * luma_height;
   chroma_pixels = chroma_width * chroma_height;
 
-  bufsize = luma_pixels * 2 + luma_width * 3 + 
+  bufsize = luma_pixels * 2 + luma_width * 3 +
     (chroma_pixels + chroma_width) * 4;
 
-  if((s->iv_frame[0].the_buf = 
-    (s->iv_frame[0].the_buf_size == 0 ? av_malloc(bufsize) : 
+  if((s->iv_frame[0].the_buf =
+    (s->iv_frame[0].the_buf_size == 0 ? av_malloc(bufsize) :
       av_realloc(s->iv_frame[0].the_buf, bufsize))) == NULL)
     return;
   s->iv_frame[0].y_w = s->iv_frame[1].y_w = luma_width;
@@ -146,7 +146,7 @@ static void iv_alloc_frames(Indeo3DecodeContext *s)
   s->iv_frame[1].Vbuf = s->iv_frame[0].the_buf + i;
 
   for(i = 1; i <= luma_width; i++)
-    s->iv_frame[0].Ybuf[-i] = s->iv_frame[1].Ybuf[-i] = 
+    s->iv_frame[0].Ybuf[-i] = s->iv_frame[1].Ybuf[-i] =
       s->iv_frame[0].Ubuf[-i] = 0x80;
 
   for(i = 1; i <= chroma_width; i++) {
@@ -158,14 +158,14 @@ static void iv_alloc_frames(Indeo3DecodeContext *s)
 }
 
 /* ---------------------------------------------------------------------- */
-static void iv_free_func(Indeo3DecodeContext *s) 
+static void iv_free_func(Indeo3DecodeContext *s)
 {
   int i;
 
   for(i = 0 ; i < 2 ; i++) {
-    if(s->iv_frame[i].the_buf != NULL) 
+    if(s->iv_frame[i].the_buf != NULL)
       av_free(s->iv_frame[i].the_buf);
-    s->iv_frame[i].Ybuf = s->iv_frame[i].Ubuf = 
+    s->iv_frame[i].Ybuf = s->iv_frame[i].Ubuf =
       s->iv_frame[i].Vbuf = NULL;
     s->iv_frame[i].the_buf = NULL;
     s->iv_frame[i].the_buf_size = 0;
@@ -178,8 +178,8 @@ static void iv_free_func(Indeo3DecodeContext *s)
 }
 
 /* ---------------------------------------------------------------------- */
-static unsigned long iv_decode_frame(Indeo3DecodeContext *s, 
-                                     unsigned char *buf, int buf_size) 
+static unsigned long iv_decode_frame(Indeo3DecodeContext *s,
+                                     unsigned char *buf, int buf_size)
 {
   unsigned int hdr_width, hdr_height,
     chroma_width, chroma_height;
@@ -198,10 +198,10 @@ static unsigned long iv_decode_frame(Indeo3DecodeContext *s,
   hdr_height = le2me_16(*(uint16_t *)buf_pos);
   buf_pos += 2;
   hdr_width = le2me_16(*(uint16_t *)buf_pos);
-  
+
   if(avcodec_check_dimensions(NULL, hdr_width, hdr_height))
       return -1;
-  
+
   buf_pos += 2;
   chroma_height = ((hdr_height >> 2) + 3) & 0x7ffc;
   chroma_width = ((hdr_width >> 2) + 3) & 0x7ffc;
@@ -226,8 +226,8 @@ static unsigned long iv_decode_frame(Indeo3DecodeContext *s,
   offs = le2me_32(*(uint32_t *)buf_pos);
   buf_pos += 4;
 
-  iv_Decode_Chunk(s, s->cur_frame->Ybuf, s->ref_frame->Ybuf, hdr_width, 
-    hdr_height, buf_pos + offs * 2, fflags2, hdr_pos, buf_pos, 
+  iv_Decode_Chunk(s, s->cur_frame->Ybuf, s->ref_frame->Ybuf, hdr_width,
+    hdr_height, buf_pos + offs * 2, fflags2, hdr_pos, buf_pos,
     min(hdr_width, 160));
 
   if (!(s->avctx->flags & CODEC_FLAG_GRAY))
@@ -237,16 +237,16 @@ static unsigned long iv_decode_frame(Indeo3DecodeContext *s,
   offs = le2me_32(*(uint32_t *)buf_pos);
   buf_pos += 4;
 
-  iv_Decode_Chunk(s, s->cur_frame->Vbuf, s->ref_frame->Vbuf, chroma_width, 
-    chroma_height, buf_pos + offs * 2, fflags2, hdr_pos, buf_pos, 
+  iv_Decode_Chunk(s, s->cur_frame->Vbuf, s->ref_frame->Vbuf, chroma_width,
+    chroma_height, buf_pos + offs * 2, fflags2, hdr_pos, buf_pos,
     min(chroma_width, 40));
 
   buf_pos = buf + 16 + offs3;
   offs = le2me_32(*(uint32_t *)buf_pos);
   buf_pos += 4;
 
-  iv_Decode_Chunk(s, s->cur_frame->Ubuf, s->ref_frame->Ubuf, chroma_width, 
-    chroma_height, buf_pos + offs * 2, fflags2, hdr_pos, buf_pos, 
+  iv_Decode_Chunk(s, s->cur_frame->Ubuf, s->ref_frame->Ubuf, chroma_width,
+    chroma_height, buf_pos + offs * 2, fflags2, hdr_pos, buf_pos,
     min(chroma_width, 40));
 
   }
@@ -309,7 +309,7 @@ typedef struct {
   lp2 = 4;
 
 static void iv_Decode_Chunk(Indeo3DecodeContext *s,
-  unsigned char *cur, unsigned char *ref, int width, int height, 
+  unsigned char *cur, unsigned char *ref, int width, int height,
   unsigned char *buf1, long fflags2, unsigned char *hdr,
   unsigned char *buf2, int min_width_160)
 {
@@ -331,7 +331,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
 
   width_tbl = width_tbl_arr + 1;
   i = (width < 0 ? width + 3 : width)/4;
-  for(j = -1; j < 8; j++) 
+  for(j = -1; j < 8; j++)
     width_tbl[j] = i * j;
 
   strip = strip_tbl;
@@ -389,7 +389,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
 
     cur_frm_pos = cur + width * strip->ypos + strip->xpos;
 
-    if((blks_width = strip->width) < 0) 
+    if((blks_width = strip->width) < 0)
       blks_width += 3;
     blks_width >>= 2;
     blks_height = strip->height;
@@ -397,7 +397,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
     if(ref_vectors != NULL) {
       ref_frm_pos = ref + (ref_vectors[0] + strip->ypos) * width +
         ref_vectors[1] + strip->xpos;
-    } else 
+    } else
       ref_frm_pos = cur_frm_pos - width_tbl[4];
 
     if(cmd == 2) {
@@ -416,7 +416,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
           cur_frm_pos += 4;
           ref_frm_pos += 4;
         }
-      } else if(cmd != 1) 
+      } else if(cmd != 1)
         return;
     } else {
       k = *buf1 >> 4;
@@ -427,9 +427,9 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
       if((lv - 8) <= 7 && (k == 0 || k == 3 || k == 10)) {
         cp2 = s->ModPred + ((lv - 8) << 7);
         cp = ref_frm_pos;
-        for(i = 0; i < blks_width << 2; i++) { 
+        for(i = 0; i < blks_width << 2; i++) {
             int v = *cp >> 1;
-            *(cp++) = cp2[v]; 
+            *(cp++) = cp2[v];
         }
       }
 
@@ -508,7 +508,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
                     break;
 
                   case 7:
-                    if(rle_v3 != 0) 
+                    if(rle_v3 != 0)
                       rle_v3 = 0;
                     else {
                       buf1--;
@@ -532,7 +532,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
 
                     LV1_CHECK(buf1,rle_v3,lv1,lp2)
                     break;
-                  default: 
+                  default:
                     return;
                 }
               }
@@ -548,7 +548,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
 
         case 4:
         case 3:                    /********** CASE 3 **********/
-          if(ref_vectors != NULL) 
+          if(ref_vectors != NULL)
             return;
           flag1 = 1;
 
@@ -605,7 +605,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
                     break;
 
                   case 7:
-                    if(rle_v3 != 0) 
+                    if(rle_v3 != 0)
                       rle_v3 = 0;
                     else {
                       buf1--;
@@ -650,7 +650,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
                     LV1_CHECK(buf1,rle_v3,lv1,lp2)
                     break;
 
-                  default: 
+                  default:
                     return;
                 }
               }
@@ -804,7 +804,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
 
                     case 7:
                       if(lp2 == 0) {
-                        if(rle_v3 != 0) 
+                        if(rle_v3 != 0)
                           rle_v3 = 0;
                         else {
                           buf1--;
@@ -825,7 +825,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
                       LV1_CHECK(buf1,rle_v3,lv1,lp2)
                       break;
 
-                    default: 
+                    default:
                       return;
                   }
                 }
@@ -921,7 +921,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
                       LV1_CHECK(buf1,rle_v3,lv1,lp2)
                       break;
 
-                    default: 
+                    default:
                       return;
                   }
                 }
@@ -937,7 +937,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
           break;
 
         case 11:                    /********** CASE 11 **********/
-          if(ref_vectors == NULL) 
+          if(ref_vectors == NULL)
             return;
 
           for( ; blks_height > 0; blks_height -= 8) {
@@ -1018,7 +1018,7 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
                   LV1_CHECK(buf1,rle_v3,lv1,lp2)
                   break;
 
-                  default: 
+                  default:
                     return;
                 }
               }
@@ -1032,12 +1032,12 @@ static void iv_Decode_Chunk(Indeo3DecodeContext *s,
           }
           break;
 
-        default: 
+        default:
           return;
       }
     }
 
-    if(strip < strip_tbl) 
+    if(strip < strip_tbl)
       return;
 
     for( ; strip >= strip_tbl; strip--) {
diff --git a/src/libffmpeg/libavcodec/indeo3data.h b/src/libffmpeg/libavcodec/indeo3data.h
index 47549268d..77bbc07ba 100644
--- a/src/libffmpeg/libavcodec/indeo3data.h
+++ b/src/libffmpeg/libavcodec/indeo3data.h
@@ -1,2315 +1,2315 @@
 
 static const uint32_t correction[] = {
-	0x00000000, 0x00000202, 0xfffffdfe, 0x000002ff, 0xfffffd01, 0xffffff03, 0x000000fd, 0x00000404, 
-	0xfffffbfc, 0x00000501, 0xfffffaff, 0x00000105, 0xfffffefb, 0x000003fc, 0xfffffc04, 0x000005fe, 
-	0xfffffa02, 0xfffffe06, 0x000001fa, 0x00000904, 0xfffff6fc, 0x00000409, 0xfffffbf7, 0x00000909, 
-	0xfffff6f7, 0x00000a01, 0xfffff5ff, 0x0000010a, 0xfffffef6, 0x000007fb, 0xfffff805, 0xfffffb08, 
-	0x000004f8, 0x00000f09, 0xfffff0f7, 0x0000090f, 0xfffff6f1, 0x00000bfd, 0xfffff403, 0xfffffd0c, 
-	0x000002f4, 0x00001004, 0xffffeffc, 0x00000410, 0xfffffbf0, 0x00001010, 0xffffeff0, 0x00001200, 
-	0xffffee00, 0x00000012, 0xffffffee, 0x00000bf4, 0xfffff40c, 0x00000ff7, 0xfffff009, 0xfffff710, 
-	0x000008f0, 0x00001b0b, 0xffffe4f5, 0x00000b1b, 0xfffff4e5, 0x00001c13, 0xffffe3ed, 0x0000131c, 
-	0xffffece4, 0x000015fa, 0xffffea06, 0xfffffa16, 0x000005ea, 0x00001d04, 0xffffe2fc, 0x0000041d, 
-	0xfffffbe3, 0x00001e1e, 0xffffe1e2, 0x000020fe, 0xffffdf02, 0xfffffe21, 0x000001df, 0x000016ee, 
-	0xffffe912, 0xffffee17, 0x000011e9, 0x00001df1, 0xffffe20f, 0xfffff11e, 0x00000ee2, 0x00002e16, 
-	0xffffd1ea, 0x0000162e, 0xffffe9d2, 0x00002f0d, 0xffffd0f3, 0x00000d2f, 0xfffff2d1, 0x00003123, 
-	0xffffcedd, 0x00002331, 0xffffdccf, 0x000028f5, 0xffffd70b, 0xfffff529, 0x00000ad7, 0x00003304, 
-	0xffffccfc, 0x00000433, 0xfffffbcd, 0x00003636, 0xffffc9ca, 0x000021de, 0xffffde22, 0x000029e3, 
-	0xffffd61d, 0xffffe32a, 0x00001cd6, 0x00003bfa, 0xffffc406, 0xfffffa3c, 0x000005c4, 0x00004c1b, 
-	0xffffb3e5, 0x00001b4c, 0xffffe4b4, 0x00004d2b, 0xffffb2d5, 0x00002b4d, 0xffffd4b3, 0x000036e8, 
-	0xffffc918, 0xffffe837, 0x000017c9, 0x00004f0e, 0xffffb0f2, 0x00000e4f, 0xfffff1b1, 0x0000533f, 
-	0xffffacc1, 0x00003f53, 0xffffc0ad, 0x000049ec, 0xffffb614, 0xffffec4a, 0x000013b6, 0x00005802, 
-	0xffffa7fe, 0x00000258, 0xfffffda8, 0x00005d5d, 0xffffa2a3, 0x00003ccc, 0xffffc334, 0xffffcc3d, 
-	0x000033c3, 0x00007834, 0xffff87cc, 0x00003478, 0xffffcb88, 0x00004ad3, 0xffffb52d, 0xffffd34b, 
-	0x00002cb5, 0x00007d4b, 0xffff82b5, 0x00004b7d, 0xffffb483, 0x00007a21, 0xffff85df, 0x0000217a, 
-	0xffffde86, 0x000066f3, 0xffff990d, 0xfffff367, 0x00000c99, 0x00005fd8, 0xffffa028, 0xffffd860, 
-	0x000027a0, 0x00007ede, 0xffff8122, 0xffffde7f, 0x00002181, 0x000058a7, 0xffffa759, 0x000068b2, 
-	0xffff974e, 0xffffb269, 0x00004d97, 0x00000c0c, 0xfffff3f4, 0x00001717, 0xffffe8e9, 0x00002a2a, 
-	0xffffd5d6, 0x00004949, 0xffffb6b7, 0x00000000, 0x02020000, 0xfdfe0000, 0x02ff0000, 0xfd010000, 
-	0xff030000, 0x00fd0000, 0x00000202, 0x02020202, 0xfdfe0202, 0x02ff0202, 0xfd010202, 0xff030202, 
-	0x00fd0202, 0xfffffdfe, 0x0201fdfe, 0xfdfdfdfe, 0x02fefdfe, 0xfd00fdfe, 0xff02fdfe, 0x00fcfdfe, 
-	0x000002ff, 0x020202ff, 0xfdfe02ff, 0x02ff02ff, 0xfd0102ff, 0xff0302ff, 0x00fd02ff, 0xfffffd01, 
-	0x0201fd01, 0xfdfdfd01, 0x02fefd01, 0xfd00fd01, 0xff02fd01, 0x00fcfd01, 0xffffff03, 0x0201ff03, 
-	0xfdfdff03, 0x02feff03, 0xfd00ff03, 0xff02ff03, 0x00fcff03, 0x000000fd, 0x020200fd, 0xfdfe00fd, 
-	0x02ff00fd, 0xfd0100fd, 0xff0300fd, 0x00fd00fd, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000303, 0xfffffcfd, 0x000003ff, 0xfffffc01, 0xffffff04, 0x000000fc, 0x00000707, 
-	0xfffff8f9, 0x00000802, 0xfffff7fe, 0x00000208, 0xfffffdf8, 0x000008fe, 0xfffff702, 0xfffffe09, 
-	0x000001f7, 0x000005fa, 0xfffffa06, 0x00000d06, 0xfffff2fa, 0x0000060d, 0xfffff9f3, 0x00000d0d, 
-	0xfffff2f3, 0x00000e01, 0xfffff1ff, 0x0000010e, 0xfffffef2, 0x00000bf8, 0xfffff408, 0xfffff80c, 
-	0x000007f4, 0x0000170e, 0xffffe8f2, 0x00000e17, 0xfffff1e9, 0x000011fb, 0xffffee05, 0xfffffb12, 
-	0x000004ee, 0x00001806, 0xffffe7fa, 0x00000618, 0xfffff9e8, 0x00001818, 0xffffe7e8, 0x00001aff, 
-	0xffffe501, 0xffffff1b, 0x000000e5, 0x000010ef, 0xffffef11, 0x000016f3, 0xffffe90d, 0xfffff317, 
-	0x00000ce9, 0x00002810, 0xffffd7f0, 0x00001028, 0xffffefd8, 0x0000291c, 0xffffd6e4, 0x00001c29, 
-	0xffffe3d7, 0x000020f7, 0xffffdf09, 0xfffff721, 0x000008df, 0x00002b06, 0xffffd4fa, 0x0000062b, 
-	0xfffff9d5, 0x00002e2e, 0xffffd1d2, 0x000031fc, 0xffffce04, 0xfffffc32, 0x000003ce, 0x000021e5, 
-	0xffffde1b, 0xffffe522, 0x00001ade, 0x00002cea, 0xffffd316, 0xffffea2d, 0x000015d3, 0x00004522, 
-	0xffffbade, 0x00002245, 0xffffddbb, 0x00004613, 0xffffb9ed, 0x00001346, 0xffffecba, 0x00004935, 
-	0xffffb6cb, 0x00003549, 0xffffcab7, 0x00003def, 0xffffc211, 0xffffef3e, 0x000010c2, 0x00004d05, 
-	0xffffb2fb, 0x0000054d, 0xfffffab3, 0x00005252, 0xffffadae, 0x000032cd, 0xffffcd33, 0x00003fd5, 
-	0xffffc02b, 0xffffd540, 0x00002ac0, 0x000059f6, 0xffffa60a, 0xfffff65a, 0x000009a6, 0x00007229, 
-	0xffff8dd7, 0x00002972, 0xffffd68e, 0x00007440, 0xffff8bc0, 0x00004074, 0xffffbf8c, 0x000051db, 
-	0xffffae25, 0xffffdb52, 0x000024ae, 0x00007716, 0xffff88ea, 0x00001677, 0xffffe989, 0x00007c5f, 
-	0xffff83a1, 0x00005f7c, 0xffffa084, 0x00006ee2, 0xffff911e, 0xffffe26f, 0x00001d91, 0x00005bb2, 
-	0xffffa44e, 0xffffb25c, 0x00004da4, 0x000070bc, 0xffff8f44, 0xffffbc71, 0x0000438f, 0x00001212, 
-	0xffffedee, 0x00002222, 0xffffddde, 0x00003f3f, 0xffffc0c1, 0x00006d6d, 0xffff9293, 0x00000000, 
-	0x03030000, 0xfcfd0000, 0x03ff0000, 0xfc010000, 0xff040000, 0x00fc0000, 0x07070000, 0xf8f90000, 
-	0x00000303, 0x03030303, 0xfcfd0303, 0x03ff0303, 0xfc010303, 0xff040303, 0x00fc0303, 0x07070303, 
-	0xf8f90303, 0xfffffcfd, 0x0302fcfd, 0xfcfcfcfd, 0x03fefcfd, 0xfc00fcfd, 0xff03fcfd, 0x00fbfcfd, 
-	0x0706fcfd, 0xf8f8fcfd, 0x000003ff, 0x030303ff, 0xfcfd03ff, 0x03ff03ff, 0xfc0103ff, 0xff0403ff, 
-	0x00fc03ff, 0x070703ff, 0xf8f903ff, 0xfffffc01, 0x0302fc01, 0xfcfcfc01, 0x03fefc01, 0xfc00fc01, 
-	0xff03fc01, 0x00fbfc01, 0x0706fc01, 0xf8f8fc01, 0xffffff04, 0x0302ff04, 0xfcfcff04, 0x03feff04, 
-	0xfc00ff04, 0xff03ff04, 0x00fbff04, 0x0706ff04, 0xf8f8ff04, 0x000000fc, 0x030300fc, 0xfcfd00fc, 
-	0x03ff00fc, 0xfc0100fc, 0xff0400fc, 0x00fc00fc, 0x070700fc, 0xf8f900fc, 0x00000707, 0x03030707, 
-	0xfcfd0707, 0x03ff0707, 0xfc010707, 0xff040707, 0x00fc0707, 0x07070707, 0xf8f90707, 0xfffff8f9, 
-	0x0302f8f9, 0xfcfcf8f9, 0x03fef8f9, 0xfc00f8f9, 0xff03f8f9, 0x00fbf8f9, 0x0706f8f9, 0xf8f8f8f9, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000404, 0xfffffbfc, 0x000004ff, 0xfffffb01, 0xffffff05, 0x000000fb, 0x00000a03, 
-	0xfffff5fd, 0x0000030a, 0xfffffcf6, 0x00000909, 0xfffff6f7, 0x000006f9, 0xfffff907, 0x00000bfd, 
-	0xfffff403, 0xfffffd0c, 0x000002f4, 0x00001108, 0xffffeef8, 0x00000811, 0xfffff7ef, 0x00001111, 
-	0xffffeeef, 0x00001301, 0xffffecff, 0x00000113, 0xfffffeed, 0x00000ff5, 0xfffff00b, 0xfffff510, 
-	0x00000af0, 0x000016fa, 0xffffe906, 0xfffffa17, 0x000005e9, 0x00001f12, 0xffffe0ee, 0x0000121f, 
-	0xffffede1, 0x00002008, 0xffffdff8, 0x00000820, 0xfffff7e0, 0x00002121, 0xffffdedf, 0x000023ff, 
-	0xffffdc01, 0xffffff24, 0x000000dc, 0x000016e9, 0xffffe917, 0x00001eef, 0xffffe111, 0xffffef1f, 
-	0x000010e1, 0x00003615, 0xffffc9eb, 0x00001536, 0xffffeaca, 0x00003725, 0xffffc8db, 0x00002537, 
-	0xffffdac9, 0x00002bf4, 0xffffd40c, 0xfffff42c, 0x00000bd4, 0x00003908, 0xffffc6f8, 0x00000839, 
-	0xfffff7c7, 0x00003d3d, 0xffffc2c3, 0x000041fb, 0xffffbe05, 0xfffffb42, 0x000004be, 0x00002cdc, 
-	0xffffd324, 0xffffdc2d, 0x000023d3, 0x00003be3, 0xffffc41d, 0xffffe33c, 0x00001cc4, 0x00005c2d, 
-	0xffffa3d3, 0x00002d5c, 0xffffd2a4, 0x00005d19, 0xffffa2e7, 0x0000195d, 0xffffe6a3, 0x00006147, 
-	0xffff9eb9, 0x00004761, 0xffffb89f, 0x000052ea, 0xffffad16, 0xffffea53, 0x000015ad, 0x00006607, 
-	0xffff99f9, 0x00000766, 0xfffff89a, 0x00006d6d, 0xffff9293, 0x000043bc, 0xffffbc44, 0x000054c7, 
-	0xffffab39, 0xffffc755, 0x000038ab, 0x000077f3, 0xffff880d, 0xfffff378, 0x00000c88, 0x00006dcf, 
-	0xffff9231, 0xffffcf6e, 0x00003092, 0x00007a98, 0xffff8568, 0xffff987b, 0x00006785, 0x00001818, 
-	0xffffe7e8, 0x00002e2e, 0xffffd1d2, 0x00005454, 0xffffabac, 0x00000000, 0x04040000, 0xfbfc0000, 
-	0x04ff0000, 0xfb010000, 0xff050000, 0x00fb0000, 0x0a030000, 0xf5fd0000, 0x030a0000, 0x00000404, 
-	0x04040404, 0xfbfc0404, 0x04ff0404, 0xfb010404, 0xff050404, 0x00fb0404, 0x0a030404, 0xf5fd0404, 
-	0x030a0404, 0xfffffbfc, 0x0403fbfc, 0xfbfbfbfc, 0x04fefbfc, 0xfb00fbfc, 0xff04fbfc, 0x00fafbfc, 
-	0x0a02fbfc, 0xf5fcfbfc, 0x0309fbfc, 0x000004ff, 0x040404ff, 0xfbfc04ff, 0x04ff04ff, 0xfb0104ff, 
-	0xff0504ff, 0x00fb04ff, 0x0a0304ff, 0xf5fd04ff, 0x030a04ff, 0xfffffb01, 0x0403fb01, 0xfbfbfb01, 
-	0x04fefb01, 0xfb00fb01, 0xff04fb01, 0x00fafb01, 0x0a02fb01, 0xf5fcfb01, 0x0309fb01, 0xffffff05, 
-	0x0403ff05, 0xfbfbff05, 0x04feff05, 0xfb00ff05, 0xff04ff05, 0x00faff05, 0x0a02ff05, 0xf5fcff05, 
-	0x0309ff05, 0x000000fb, 0x040400fb, 0xfbfc00fb, 0x04ff00fb, 0xfb0100fb, 0xff0500fb, 0x00fb00fb, 
-	0x0a0300fb, 0xf5fd00fb, 0x030a00fb, 0x00000a03, 0x04040a03, 0xfbfc0a03, 0x04ff0a03, 0xfb010a03, 
-	0xff050a03, 0x00fb0a03, 0x0a030a03, 0xf5fd0a03, 0x030a0a03, 0xfffff5fd, 0x0403f5fd, 0xfbfbf5fd, 
-	0x04fef5fd, 0xfb00f5fd, 0xff04f5fd, 0x00faf5fd, 0x0a02f5fd, 0xf5fcf5fd, 0x0309f5fd, 0x0000030a, 
-	0x0404030a, 0xfbfc030a, 0x04ff030a, 0xfb01030a, 0xff05030a, 0x00fb030a, 0x0a03030a, 0xf5fd030a, 
-	0x030a030a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000505, 0xfffffafb, 0x000006fe, 0xfffff902, 0xfffffe07, 0x000001f9, 0x00000b0b, 
-	0xfffff4f5, 0x00000d03, 0xfffff2fd, 0x0000030d, 0xfffffcf3, 0x000008f7, 0xfffff709, 0x00000efc, 
-	0xfffff104, 0xfffffc0f, 0x000003f1, 0x0000160b, 0xffffe9f5, 0x00000b16, 0xfffff4ea, 0x00001515, 
-	0xffffeaeb, 0x00001802, 0xffffe7fe, 0x00000218, 0xfffffde8, 0x000013f2, 0xffffec0e, 0xfffff214, 
-	0x00000dec, 0x00002617, 0xffffd9e9, 0x00001726, 0xffffe8da, 0x00001cf8, 0xffffe308, 0xfffff81d, 
-	0x000007e3, 0x0000270b, 0xffffd8f5, 0x00000b27, 0xfffff4d9, 0x00002929, 0xffffd6d7, 0x00002cff, 
-	0xffffd301, 0xffffff2d, 0x000000d3, 0x00001ce3, 0xffffe31d, 0x000026ea, 0xffffd916, 0xffffea27, 
-	0x000015d9, 0x0000431b, 0xffffbce5, 0x00001b43, 0xffffe4bd, 0x0000452f, 0xffffbad1, 0x00002f45, 
-	0xffffd0bb, 0x000037f1, 0xffffc80f, 0xfffff138, 0x00000ec8, 0x0000470b, 0xffffb8f5, 0x00000b47, 
-	0xfffff4b9, 0x00004c4c, 0xffffb3b4, 0x000052fa, 0xffffad06, 0xfffffa53, 0x000005ad, 0x000038d3, 
-	0xffffc72d, 0xffffd339, 0x00002cc7, 0x00004adc, 0xffffb524, 0xffffdc4b, 0x000023b5, 0x00007338, 
-	0xffff8cc8, 0x00003873, 0xffffc78d, 0x0000751f, 0xffff8ae1, 0x00001f75, 0xffffe08b, 0x00007a58, 
-	0xffff85a8, 0x0000587a, 0xffffa786, 0x000067e4, 0xffff981c, 0xffffe468, 0x00001b98, 0x000054ab, 
-	0xffffab55, 0x000069b8, 0xffff9648, 0xffffb86a, 0x00004796, 0x00001e1e, 0xffffe1e2, 0x00003a3a, 
-	0xffffc5c6, 0x00006969, 0xffff9697, 0x00000000, 0x05050000, 0xfafb0000, 0x06fe0000, 0xf9020000, 
-	0xfe070000, 0x01f90000, 0x0b0b0000, 0xf4f50000, 0x0d030000, 0xf2fd0000, 0x00000505, 0x05050505, 
-	0xfafb0505, 0x06fe0505, 0xf9020505, 0xfe070505, 0x01f90505, 0x0b0b0505, 0xf4f50505, 0x0d030505, 
-	0xf2fd0505, 0xfffffafb, 0x0504fafb, 0xfafafafb, 0x06fdfafb, 0xf901fafb, 0xfe06fafb, 0x01f8fafb, 
-	0x0b0afafb, 0xf4f4fafb, 0x0d02fafb, 0xf2fcfafb, 0x000006fe, 0x050506fe, 0xfafb06fe, 0x06fe06fe, 
-	0xf90206fe, 0xfe0706fe, 0x01f906fe, 0x0b0b06fe, 0xf4f506fe, 0x0d0306fe, 0xf2fd06fe, 0xfffff902, 
-	0x0504f902, 0xfafaf902, 0x06fdf902, 0xf901f902, 0xfe06f902, 0x01f8f902, 0x0b0af902, 0xf4f4f902, 
-	0x0d02f902, 0xf2fcf902, 0xfffffe07, 0x0504fe07, 0xfafafe07, 0x06fdfe07, 0xf901fe07, 0xfe06fe07, 
-	0x01f8fe07, 0x0b0afe07, 0xf4f4fe07, 0x0d02fe07, 0xf2fcfe07, 0x000001f9, 0x050501f9, 0xfafb01f9, 
-	0x06fe01f9, 0xf90201f9, 0xfe0701f9, 0x01f901f9, 0x0b0b01f9, 0xf4f501f9, 0x0d0301f9, 0xf2fd01f9, 
-	0x00000b0b, 0x05050b0b, 0xfafb0b0b, 0x06fe0b0b, 0xf9020b0b, 0xfe070b0b, 0x01f90b0b, 0x0b0b0b0b, 
-	0xf4f50b0b, 0x0d030b0b, 0xf2fd0b0b, 0xfffff4f5, 0x0504f4f5, 0xfafaf4f5, 0x06fdf4f5, 0xf901f4f5, 
-	0xfe06f4f5, 0x01f8f4f5, 0x0b0af4f5, 0xf4f4f4f5, 0x0d02f4f5, 0xf2fcf4f5, 0x00000d03, 0x05050d03, 
-	0xfafb0d03, 0x06fe0d03, 0xf9020d03, 0xfe070d03, 0x01f90d03, 0x0b0b0d03, 0xf4f50d03, 0x0d030d03, 
-	0xf2fd0d03, 0xfffff2fd, 0x0504f2fd, 0xfafaf2fd, 0x06fdf2fd, 0xf901f2fd, 0xfe06f2fd, 0x01f8f2fd, 
-	0x0b0af2fd, 0xf4f4f2fd, 0x0d02f2fd, 0xf2fcf2fd, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000606, 0xfffff9fa, 0x000007fe, 0xfffff802, 0xfffffe08, 0x000001f8, 0x00000d0d, 
-	0xfffff2f3, 0x00000f04, 0xfffff0fc, 0x0000040f, 0xfffffbf1, 0x00000af5, 0xfffff50b, 0x000011fb, 
-	0xffffee05, 0xfffffb12, 0x000004ee, 0x00001a0d, 0xffffe5f3, 0x00000d1a, 0xfffff2e6, 0x00001a1a, 
-	0xffffe5e6, 0x00001d02, 0xffffe2fe, 0x0000021d, 0xfffffde3, 0x000017f0, 0xffffe810, 0xfffff018, 
-	0x00000fe8, 0x00002e1c, 0xffffd1e4, 0x00001c2e, 0xffffe3d2, 0x000022f7, 0xffffdd09, 0xfffff723, 
-	0x000008dd, 0x00002f0d, 0xffffd0f3, 0x00000d2f, 0xfffff2d1, 0x00003131, 0xffffcecf, 0x000035ff, 
-	0xffffca01, 0xffffff36, 0x000000ca, 0x000022dd, 0xffffdd23, 0x00002ee6, 0xffffd11a, 0xffffe62f, 
-	0x000019d1, 0x00005120, 0xffffaee0, 0x00002051, 0xffffdfaf, 0x00005338, 0xffffacc8, 0x00003853, 
-	0xffffc7ad, 0x000042ee, 0xffffbd12, 0xffffee43, 0x000011bd, 0x0000560d, 0xffffa9f3, 0x00000d56, 
-	0xfffff2aa, 0x00005b5b, 0xffffa4a5, 0x000062f9, 0xffff9d07, 0xfffff963, 0x0000069d, 0x000043ca, 
-	0xffffbc36, 0xffffca44, 0x000035bc, 0x000059d4, 0xffffa62c, 0xffffd45a, 0x00002ba6, 0x00007bdf, 
-	0xffff8421, 0xffffdf7c, 0x00002084, 0x00006699, 0xffff9967, 0x00007eaa, 0xffff8156, 0xffffaa7f, 
-	0x00005581, 0x00002525, 0xffffdadb, 0x00004545, 0xffffbabb, 0x00000000, 0x06060000, 0xf9fa0000, 
-	0x07fe0000, 0xf8020000, 0xfe080000, 0x01f80000, 0x0d0d0000, 0xf2f30000, 0x0f040000, 0xf0fc0000, 
-	0x040f0000, 0x00000606, 0x06060606, 0xf9fa0606, 0x07fe0606, 0xf8020606, 0xfe080606, 0x01f80606, 
-	0x0d0d0606, 0xf2f30606, 0x0f040606, 0xf0fc0606, 0x040f0606, 0xfffff9fa, 0x0605f9fa, 0xf9f9f9fa, 
-	0x07fdf9fa, 0xf801f9fa, 0xfe07f9fa, 0x01f7f9fa, 0x0d0cf9fa, 0xf2f2f9fa, 0x0f03f9fa, 0xf0fbf9fa, 
-	0x040ef9fa, 0x000007fe, 0x060607fe, 0xf9fa07fe, 0x07fe07fe, 0xf80207fe, 0xfe0807fe, 0x01f807fe, 
-	0x0d0d07fe, 0xf2f307fe, 0x0f0407fe, 0xf0fc07fe, 0x040f07fe, 0xfffff802, 0x0605f802, 0xf9f9f802, 
-	0x07fdf802, 0xf801f802, 0xfe07f802, 0x01f7f802, 0x0d0cf802, 0xf2f2f802, 0x0f03f802, 0xf0fbf802, 
-	0x040ef802, 0xfffffe08, 0x0605fe08, 0xf9f9fe08, 0x07fdfe08, 0xf801fe08, 0xfe07fe08, 0x01f7fe08, 
-	0x0d0cfe08, 0xf2f2fe08, 0x0f03fe08, 0xf0fbfe08, 0x040efe08, 0x000001f8, 0x060601f8, 0xf9fa01f8, 
-	0x07fe01f8, 0xf80201f8, 0xfe0801f8, 0x01f801f8, 0x0d0d01f8, 0xf2f301f8, 0x0f0401f8, 0xf0fc01f8, 
-	0x040f01f8, 0x00000d0d, 0x06060d0d, 0xf9fa0d0d, 0x07fe0d0d, 0xf8020d0d, 0xfe080d0d, 0x01f80d0d, 
-	0x0d0d0d0d, 0xf2f30d0d, 0x0f040d0d, 0xf0fc0d0d, 0x040f0d0d, 0xfffff2f3, 0x0605f2f3, 0xf9f9f2f3, 
-	0x07fdf2f3, 0xf801f2f3, 0xfe07f2f3, 0x01f7f2f3, 0x0d0cf2f3, 0xf2f2f2f3, 0x0f03f2f3, 0xf0fbf2f3, 
-	0x040ef2f3, 0x00000f04, 0x06060f04, 0xf9fa0f04, 0x07fe0f04, 0xf8020f04, 0xfe080f04, 0x01f80f04, 
-	0x0d0d0f04, 0xf2f30f04, 0x0f040f04, 0xf0fc0f04, 0x040f0f04, 0xfffff0fc, 0x0605f0fc, 0xf9f9f0fc, 
-	0x07fdf0fc, 0xf801f0fc, 0xfe07f0fc, 0x01f7f0fc, 0x0d0cf0fc, 0xf2f2f0fc, 0x0f03f0fc, 0xf0fbf0fc, 
-	0x040ef0fc, 0x0000040f, 0x0606040f, 0xf9fa040f, 0x07fe040f, 0xf802040f, 0xfe08040f, 0x01f8040f, 
-	0x0d0d040f, 0xf2f3040f, 0x0f04040f, 0xf0fc040f, 0x040f040f, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000707, 0xfffff8f9, 0x000009fd, 0xfffff603, 0xfffffd0a, 0x000002f6, 0x00001010, 
-	0xffffeff0, 0x00001205, 0xffffedfb, 0x00000512, 0xfffffaee, 0x00000cf3, 0xfffff30d, 0x000014fa, 
-	0xffffeb06, 0xfffffa15, 0x000005eb, 0x00001e0f, 0xffffe1f1, 0x00000f1e, 0xfffff0e2, 0x00001e1e, 
-	0xffffe1e2, 0x00002202, 0xffffddfe, 0x00000222, 0xfffffdde, 0x00001bed, 0xffffe413, 0xffffed1c, 
-	0x000012e4, 0x00003620, 0xffffc9e0, 0x00002036, 0xffffdfca, 0x000028f5, 0xffffd70b, 0xfffff529, 
-	0x00000ad7, 0x0000370f, 0xffffc8f1, 0x00000f37, 0xfffff0c9, 0x00003939, 0xffffc6c7, 0x00003eff, 
-	0xffffc101, 0xffffff3f, 0x000000c1, 0x000027d8, 0xffffd828, 0x000036e2, 0xffffc91e, 0xffffe237, 
-	0x00001dc9, 0x00005e25, 0xffffa1db, 0x0000255e, 0xffffdaa2, 0x00006041, 0xffff9fbf, 0x00004160, 
-	0xffffbea0, 0x00004deb, 0xffffb215, 0xffffeb4e, 0x000014b2, 0x0000640f, 0xffff9bf1, 0x00000f64, 
-	0xfffff09c, 0x00006a6a, 0xffff9596, 0x000073f8, 0xffff8c08, 0xfffff874, 0x0000078c, 0x00004ec1, 
-	0xffffb13f, 0xffffc14f, 0x00003eb1, 0x000068cd, 0xffff9733, 0xffffcd69, 0x00003297, 0x00007788, 
-	0xffff8878, 0x00002b2b, 0xffffd4d5, 0x00005050, 0xffffafb0, 0x00000000, 0x07070000, 0xf8f90000, 
-	0x09fd0000, 0xf6030000, 0xfd0a0000, 0x02f60000, 0x10100000, 0xeff00000, 0x12050000, 0xedfb0000, 
-	0x05120000, 0x00000707, 0x07070707, 0xf8f90707, 0x09fd0707, 0xf6030707, 0xfd0a0707, 0x02f60707, 
-	0x10100707, 0xeff00707, 0x12050707, 0xedfb0707, 0x05120707, 0xfffff8f9, 0x0706f8f9, 0xf8f8f8f9, 
-	0x09fcf8f9, 0xf602f8f9, 0xfd09f8f9, 0x02f5f8f9, 0x100ff8f9, 0xefeff8f9, 0x1204f8f9, 0xedfaf8f9, 
-	0x0511f8f9, 0x000009fd, 0x070709fd, 0xf8f909fd, 0x09fd09fd, 0xf60309fd, 0xfd0a09fd, 0x02f609fd, 
-	0x101009fd, 0xeff009fd, 0x120509fd, 0xedfb09fd, 0x051209fd, 0xfffff603, 0x0706f603, 0xf8f8f603, 
-	0x09fcf603, 0xf602f603, 0xfd09f603, 0x02f5f603, 0x100ff603, 0xefeff603, 0x1204f603, 0xedfaf603, 
-	0x0511f603, 0xfffffd0a, 0x0706fd0a, 0xf8f8fd0a, 0x09fcfd0a, 0xf602fd0a, 0xfd09fd0a, 0x02f5fd0a, 
-	0x100ffd0a, 0xefeffd0a, 0x1204fd0a, 0xedfafd0a, 0x0511fd0a, 0x000002f6, 0x070702f6, 0xf8f902f6, 
-	0x09fd02f6, 0xf60302f6, 0xfd0a02f6, 0x02f602f6, 0x101002f6, 0xeff002f6, 0x120502f6, 0xedfb02f6, 
-	0x051202f6, 0x00001010, 0x07071010, 0xf8f91010, 0x09fd1010, 0xf6031010, 0xfd0a1010, 0x02f61010, 
-	0x10101010, 0xeff01010, 0x12051010, 0xedfb1010, 0x05121010, 0xffffeff0, 0x0706eff0, 0xf8f8eff0, 
-	0x09fceff0, 0xf602eff0, 0xfd09eff0, 0x02f5eff0, 0x100feff0, 0xefefeff0, 0x1204eff0, 0xedfaeff0, 
-	0x0511eff0, 0x00001205, 0x07071205, 0xf8f91205, 0x09fd1205, 0xf6031205, 0xfd0a1205, 0x02f61205, 
-	0x10101205, 0xeff01205, 0x12051205, 0xedfb1205, 0x05121205, 0xffffedfb, 0x0706edfb, 0xf8f8edfb, 
-	0x09fcedfb, 0xf602edfb, 0xfd09edfb, 0x02f5edfb, 0x100fedfb, 0xefefedfb, 0x1204edfb, 0xedfaedfb, 
-	0x0511edfb, 0x00000512, 0x07070512, 0xf8f90512, 0x09fd0512, 0xf6030512, 0xfd0a0512, 0x02f60512, 
-	0x10100512, 0xeff00512, 0x12050512, 0xedfb0512, 0x05120512, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000808, 0xfffff7f8, 0x00000afd, 0xfffff503, 0xfffffd0b, 0x000002f5, 0x00001212, 
-	0xffffedee, 0x00001405, 0xffffebfb, 0x00000514, 0xfffffaec, 0x00000ef1, 0xfffff10f, 0x000017f9, 
-	0xffffe807, 0xfffff918, 0x000006e8, 0x00002311, 0xffffdcef, 0x00001123, 0xffffeedd, 0x00002222, 
-	0xffffddde, 0x00002603, 0xffffd9fd, 0x00000326, 0xfffffcda, 0x00001fea, 0xffffe016, 0xffffea20, 
-	0x000015e0, 0x00003d25, 0xffffc2db, 0x0000253d, 0xffffdac3, 0x00002ef3, 0xffffd10d, 0xfffff32f, 
-	0x00000cd1, 0x00003f11, 0xffffc0ef, 0x0000113f, 0xffffeec1, 0x00004141, 0xffffbebf, 0x000047ff, 
-	0xffffb801, 0xffffff48, 0x000000b8, 0x00002dd2, 0xffffd22e, 0x00003edd, 0xffffc123, 0xffffdd3f, 
-	0x000022c1, 0x00006b2b, 0xffff94d5, 0x00002b6b, 0xffffd495, 0x00006e4b, 0xffff91b5, 0x00004b6e, 
-	0xffffb492, 0x000058e8, 0xffffa718, 0xffffe859, 0x000017a7, 0x00007211, 0xffff8def, 0x00001172, 
-	0xffffee8e, 0x00007979, 0xffff8687, 0x00005ab8, 0xffffa548, 0xffffb85b, 0x000047a5, 0x000077c6, 
-	0xffff883a, 0xffffc678, 0x00003988, 0x00003131, 0xffffcecf, 0x00005c5c, 0xffffa3a4, 0x00000000, 
-	0x08080000, 0xf7f80000, 0x0afd0000, 0xf5030000, 0xfd0b0000, 0x02f50000, 0x12120000, 0xedee0000, 
-	0x14050000, 0xebfb0000, 0x05140000, 0x00000808, 0x08080808, 0xf7f80808, 0x0afd0808, 0xf5030808, 
-	0xfd0b0808, 0x02f50808, 0x12120808, 0xedee0808, 0x14050808, 0xebfb0808, 0x05140808, 0xfffff7f8, 
-	0x0807f7f8, 0xf7f7f7f8, 0x0afcf7f8, 0xf502f7f8, 0xfd0af7f8, 0x02f4f7f8, 0x1211f7f8, 0xededf7f8, 
-	0x1404f7f8, 0xebfaf7f8, 0x0513f7f8, 0x00000afd, 0x08080afd, 0xf7f80afd, 0x0afd0afd, 0xf5030afd, 
-	0xfd0b0afd, 0x02f50afd, 0x12120afd, 0xedee0afd, 0x14050afd, 0xebfb0afd, 0x05140afd, 0xfffff503, 
-	0x0807f503, 0xf7f7f503, 0x0afcf503, 0xf502f503, 0xfd0af503, 0x02f4f503, 0x1211f503, 0xededf503, 
-	0x1404f503, 0xebfaf503, 0x0513f503, 0xfffffd0b, 0x0807fd0b, 0xf7f7fd0b, 0x0afcfd0b, 0xf502fd0b, 
-	0xfd0afd0b, 0x02f4fd0b, 0x1211fd0b, 0xededfd0b, 0x1404fd0b, 0xebfafd0b, 0x0513fd0b, 0x000002f5, 
-	0x080802f5, 0xf7f802f5, 0x0afd02f5, 0xf50302f5, 0xfd0b02f5, 0x02f502f5, 0x121202f5, 0xedee02f5, 
-	0x140502f5, 0xebfb02f5, 0x051402f5, 0x00001212, 0x08081212, 0xf7f81212, 0x0afd1212, 0xf5031212, 
-	0xfd0b1212, 0x02f51212, 0x12121212, 0xedee1212, 0x14051212, 0xebfb1212, 0x05141212, 0xffffedee, 
-	0x0807edee, 0xf7f7edee, 0x0afcedee, 0xf502edee, 0xfd0aedee, 0x02f4edee, 0x1211edee, 0xedededee, 
-	0x1404edee, 0xebfaedee, 0x0513edee, 0x00001405, 0x08081405, 0xf7f81405, 0x0afd1405, 0xf5031405, 
-	0xfd0b1405, 0x02f51405, 0x12121405, 0xedee1405, 0x14051405, 0xebfb1405, 0x05141405, 0xffffebfb, 
-	0x0807ebfb, 0xf7f7ebfb, 0x0afcebfb, 0xf502ebfb, 0xfd0aebfb, 0x02f4ebfb, 0x1211ebfb, 0xededebfb, 
-	0x1404ebfb, 0xebfaebfb, 0x0513ebfb, 0x00000514, 0x08080514, 0xf7f80514, 0x0afd0514, 0xf5030514, 
-	0xfd0b0514, 0x02f50514, 0x12120514, 0xedee0514, 0x14050514, 0xebfb0514, 0x05140514, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000909, 0xfffff6f7, 0x00000bfd, 0xfffff403, 0xfffffd0c, 0x000002f4, 0x00001414, 
-	0xffffebec, 0x00001706, 0xffffe8fa, 0x00000617, 0xfffff9e9, 0x000010ef, 0xffffef11, 0x00001af9, 
-	0xffffe507, 0xfffff91b, 0x000006e5, 0x00002713, 0xffffd8ed, 0x00001327, 0xffffecd9, 0x00002727, 
-	0xffffd8d9, 0x00002b03, 0xffffd4fd, 0x0000032b, 0xfffffcd5, 0x000023e8, 0xffffdc18, 0xffffe824, 
-	0x000017dc, 0x0000452a, 0xffffbad6, 0x00002a45, 0xffffd5bb, 0x000034f2, 0xffffcb0e, 0xfffff235, 
-	0x00000dcb, 0x00004713, 0xffffb8ed, 0x00001347, 0xffffecb9, 0x00004949, 0xffffb6b7, 0x00004ffe, 
-	0xffffb002, 0xfffffe50, 0x000001b0, 0x000033cc, 0xffffcc34, 0x000045d9, 0xffffba27, 0xffffd946, 
-	0x000026ba, 0x00007930, 0xffff86d0, 0x00003079, 0xffffcf87, 0x00007c54, 0xffff83ac, 0x0000547c, 
-	0xffffab84, 0x000063e5, 0xffff9c1b, 0xffffe564, 0x00001a9c, 0x000065af, 0xffff9a51, 0xffffaf66, 
-	0x0000509a, 0x00003737, 0xffffc8c9, 0x00006868, 0xffff9798, 0x00000000, 0x09090000, 0xf6f70000, 
-	0x0bfd0000, 0xf4030000, 0xfd0c0000, 0x02f40000, 0x14140000, 0xebec0000, 0x17060000, 0xe8fa0000, 
-	0x06170000, 0xf9e90000, 0x00000909, 0x09090909, 0xf6f70909, 0x0bfd0909, 0xf4030909, 0xfd0c0909, 
-	0x02f40909, 0x14140909, 0xebec0909, 0x17060909, 0xe8fa0909, 0x06170909, 0xf9e90909, 0xfffff6f7, 
-	0x0908f6f7, 0xf6f6f6f7, 0x0bfcf6f7, 0xf402f6f7, 0xfd0bf6f7, 0x02f3f6f7, 0x1413f6f7, 0xebebf6f7, 
-	0x1705f6f7, 0xe8f9f6f7, 0x0616f6f7, 0xf9e8f6f7, 0x00000bfd, 0x09090bfd, 0xf6f70bfd, 0x0bfd0bfd, 
-	0xf4030bfd, 0xfd0c0bfd, 0x02f40bfd, 0x14140bfd, 0xebec0bfd, 0x17060bfd, 0xe8fa0bfd, 0x06170bfd, 
-	0xf9e90bfd, 0xfffff403, 0x0908f403, 0xf6f6f403, 0x0bfcf403, 0xf402f403, 0xfd0bf403, 0x02f3f403, 
-	0x1413f403, 0xebebf403, 0x1705f403, 0xe8f9f403, 0x0616f403, 0xf9e8f403, 0xfffffd0c, 0x0908fd0c, 
-	0xf6f6fd0c, 0x0bfcfd0c, 0xf402fd0c, 0xfd0bfd0c, 0x02f3fd0c, 0x1413fd0c, 0xebebfd0c, 0x1705fd0c, 
-	0xe8f9fd0c, 0x0616fd0c, 0xf9e8fd0c, 0x000002f4, 0x090902f4, 0xf6f702f4, 0x0bfd02f4, 0xf40302f4, 
-	0xfd0c02f4, 0x02f402f4, 0x141402f4, 0xebec02f4, 0x170602f4, 0xe8fa02f4, 0x061702f4, 0xf9e902f4, 
-	0x00001414, 0x09091414, 0xf6f71414, 0x0bfd1414, 0xf4031414, 0xfd0c1414, 0x02f41414, 0x14141414, 
-	0xebec1414, 0x17061414, 0xe8fa1414, 0x06171414, 0xf9e91414, 0xffffebec, 0x0908ebec, 0xf6f6ebec, 
-	0x0bfcebec, 0xf402ebec, 0xfd0bebec, 0x02f3ebec, 0x1413ebec, 0xebebebec, 0x1705ebec, 0xe8f9ebec, 
-	0x0616ebec, 0xf9e8ebec, 0x00001706, 0x09091706, 0xf6f71706, 0x0bfd1706, 0xf4031706, 0xfd0c1706, 
-	0x02f41706, 0x14141706, 0xebec1706, 0x17061706, 0xe8fa1706, 0x06171706, 0xf9e91706, 0xffffe8fa, 
-	0x0908e8fa, 0xf6f6e8fa, 0x0bfce8fa, 0xf402e8fa, 0xfd0be8fa, 0x02f3e8fa, 0x1413e8fa, 0xebebe8fa, 
-	0x1705e8fa, 0xe8f9e8fa, 0x0616e8fa, 0xf9e8e8fa, 0x00000617, 0x09090617, 0xf6f70617, 0x0bfd0617, 
-	0xf4030617, 0xfd0c0617, 0x02f40617, 0x14140617, 0xebec0617, 0x17060617, 0xe8fa0617, 0x06170617, 
-	0xf9e90617, 0xfffff9e9, 0x0908f9e9, 0xf6f6f9e9, 0x0bfcf9e9, 0xf402f9e9, 0xfd0bf9e9, 0x02f3f9e9, 
-	0x1413f9e9, 0xebebf9e9, 0x1705f9e9, 0xe8f9f9e9, 0x0616f9e9, 0xf9e8f9e9, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000202, 0xfffffdfe, 0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000404, 
-	0xfffffbfc, 0x00000400, 0xfffffc00, 0x00000004, 0xfffffffc, 0x000003fc, 0xfffffc04, 0x000005fe, 
-	0xfffffa02, 0xfffffe06, 0x000001fa, 0x00000804, 0xfffff7fc, 0x00000408, 0xfffffbf8, 0x00000808, 
-	0xfffff7f8, 0x00000a00, 0xfffff600, 0x0000000a, 0xfffffff6, 0x000007fc, 0xfffff804, 0xfffffc08, 
-	0x000003f8, 0x00000e08, 0xfffff1f8, 0x0000080e, 0xfffff7f2, 0x00000bfe, 0xfffff402, 0xfffffe0c, 
-	0x000001f4, 0x00001004, 0xffffeffc, 0x00000410, 0xfffffbf0, 0x00001010, 0xffffeff0, 0x00001200, 
-	0xffffee00, 0x00000012, 0xffffffee, 0x00000bf4, 0xfffff40c, 0x00000ff8, 0xfffff008, 0xfffff810, 
-	0x000007f0, 0x00001a0a, 0xffffe5f6, 0x00000a1a, 0xfffff5e6, 0x00001c12, 0xffffe3ee, 0x0000121c, 
-	0xffffede4, 0x000015fa, 0xffffea06, 0xfffffa16, 0x000005ea, 0x00001c04, 0xffffe3fc, 0x0000041c, 
-	0xfffffbe4, 0x00001e1e, 0xffffe1e2, 0x00001ffe, 0xffffe002, 0xfffffe20, 0x000001e0, 0x000015ee, 
-	0xffffea12, 0xffffee16, 0x000011ea, 0x00001df2, 0xffffe20e, 0xfffff21e, 0x00000de2, 0x00002e16, 
-	0xffffd1ea, 0x0000162e, 0xffffe9d2, 0x00002e0c, 0xffffd1f4, 0x00000c2e, 0xfffff3d2, 0x00003022, 
-	0xffffcfde, 0x00002230, 0xffffddd0, 0x000027f6, 0xffffd80a, 0xfffff628, 0x000009d8, 0x00003204, 
-	0xffffcdfc, 0x00000432, 0xfffffbce, 0x00003636, 0xffffc9ca, 0x000021de, 0xffffde22, 0x000029e4, 
-	0xffffd61c, 0xffffe42a, 0x00001bd6, 0x00003bfa, 0xffffc406, 0xfffffa3c, 0x000005c4, 0x00004c1a, 
-	0xffffb3e6, 0x00001a4c, 0xffffe5b4, 0x00004c2a, 0xffffb3d6, 0x00002a4c, 0xffffd5b4, 0x000035e8, 
-	0xffffca18, 0xffffe836, 0x000017ca, 0x00004e0e, 0xffffb1f2, 0x00000e4e, 0xfffff1b2, 0x0000523e, 
-	0xffffadc2, 0x00003e52, 0xffffc1ae, 0x000049ec, 0xffffb614, 0xffffec4a, 0x000013b6, 0x00005802, 
-	0xffffa7fe, 0x00000258, 0xfffffda8, 0x00005c5c, 0xffffa3a4, 0x00003bcc, 0xffffc434, 0xffffcc3c, 
-	0x000033c4, 0x00007634, 0xffff89cc, 0x00003476, 0xffffcb8a, 0x000049d4, 0xffffb62c, 0xffffd44a, 
-	0x00002bb6, 0x0000764a, 0xffff89b6, 0x00004a76, 0xffffb58a, 0x00007620, 0xffff89e0, 0x00002076, 
-	0xffffdf8a, 0x000065f4, 0xffff9a0c, 0xfffff466, 0x00000b9a, 0x00005fd8, 0xffffa028, 0xffffd860, 
-	0x000027a0, 0x000075de, 0xffff8a22, 0xffffde76, 0x0000218a, 0x000057a8, 0xffffa858, 0x000067b2, 
-	0xffff984e, 0xffffb268, 0x00004d98, 0x00000c0c, 0xfffff3f4, 0x00001616, 0xffffe9ea, 0x00002a2a, 
-	0xffffd5d6, 0x00004848, 0xffffb7b8, 0x00000000, 0x02020000, 0xfdfe0000, 0x02000000, 0xfe000000, 
-	0x00020000, 0xfffe0000, 0x00000202, 0x02020202, 0xfdfe0202, 0x02000202, 0xfe000202, 0x00020202, 
-	0xfffe0202, 0xfffffdfe, 0x0201fdfe, 0xfdfdfdfe, 0x01fffdfe, 0xfdfffdfe, 0x0001fdfe, 0xfffdfdfe, 
-	0x00000200, 0x02020200, 0xfdfe0200, 0x02000200, 0xfe000200, 0x00020200, 0xfffe0200, 0xfffffe00, 
-	0x0201fe00, 0xfdfdfe00, 0x01fffe00, 0xfdfffe00, 0x0001fe00, 0xfffdfe00, 0x00000002, 0x02020002, 
-	0xfdfe0002, 0x02000002, 0xfe000002, 0x00020002, 0xfffe0002, 0xfffffffe, 0x0201fffe, 0xfdfdfffe, 
-	0x01fffffe, 0xfdfffffe, 0x0001fffe, 0xfffdfffe, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000303, 0xfffffcfd, 0x00000300, 0xfffffd00, 0x00000003, 0xfffffffd, 0x00000606, 
-	0xfffff9fa, 0x00000903, 0xfffff6fd, 0x00000309, 0xfffffcf7, 0x000008fd, 0xfffff703, 0xfffffd09, 
-	0x000002f7, 0x000005fa, 0xfffffa06, 0x00000c06, 0xfffff3fa, 0x0000060c, 0xfffff9f4, 0x00000c0c, 
-	0xfffff3f4, 0x00000f00, 0xfffff100, 0x0000000f, 0xfffffff1, 0x00000bf7, 0xfffff409, 0xfffff70c, 
-	0x000008f4, 0x0000180f, 0xffffe7f1, 0x00000f18, 0xfffff0e8, 0x000011fa, 0xffffee06, 0xfffffa12, 
-	0x000005ee, 0x00001806, 0xffffe7fa, 0x00000618, 0xfffff9e8, 0x00001818, 0xffffe7e8, 0x00001b00, 
-	0xffffe500, 0x0000001b, 0xffffffe5, 0x000011ee, 0xffffee12, 0x000017f4, 0xffffe80c, 0xfffff418, 
-	0x00000be8, 0x0000270f, 0xffffd8f1, 0x00000f27, 0xfffff0d9, 0x00002a1b, 0xffffd5e5, 0x00001b2a, 
-	0xffffe4d6, 0x000020f7, 0xffffdf09, 0xfffff721, 0x000008df, 0x00002a06, 0xffffd5fa, 0x0000062a, 
-	0xfffff9d6, 0x00002d2d, 0xffffd2d3, 0x000032fd, 0xffffcd03, 0xfffffd33, 0x000002cd, 0x000020e5, 
-	0xffffdf1b, 0xffffe521, 0x00001adf, 0x00002ceb, 0xffffd315, 0xffffeb2d, 0x000014d3, 0x00004521, 
-	0xffffbadf, 0x00002145, 0xffffdebb, 0x00004512, 0xffffbaee, 0x00001245, 0xffffedbb, 0x00004836, 
-	0xffffb7ca, 0x00003648, 0xffffc9b8, 0x00003eee, 0xffffc112, 0xffffee3f, 0x000011c1, 0x00004e06, 
-	0xffffb1fa, 0x0000064e, 0xfffff9b2, 0x00005151, 0xffffaeaf, 0x000032cd, 0xffffcd33, 0x00003ed6, 
-	0xffffc12a, 0xffffd63f, 0x000029c1, 0x000059f7, 0xffffa609, 0xfffff75a, 0x000008a6, 0x0000722a, 
-	0xffff8dd6, 0x00002a72, 0xffffd58e, 0x0000753f, 0xffff8ac1, 0x00003f75, 0xffffc08b, 0x000050dc, 
-	0xffffaf24, 0xffffdc51, 0x000023af, 0x00007815, 0xffff87eb, 0x00001578, 0xffffea88, 0x00007b60, 
-	0xffff84a0, 0x0000607b, 0xffff9f85, 0x00006ee2, 0xffff911e, 0xffffe26f, 0x00001d91, 0x00005cb2, 
-	0xffffa34e, 0xffffb25d, 0x00004da3, 0x000071bb, 0xffff8e45, 0xffffbb72, 0x0000448e, 0x00001212, 
-	0xffffedee, 0x00002121, 0xffffdedf, 0x00003f3f, 0xffffc0c1, 0x00006c6c, 0xffff9394, 0x00000000, 
-	0x03030000, 0xfcfd0000, 0x03000000, 0xfd000000, 0x00030000, 0xfffd0000, 0x06060000, 0xf9fa0000, 
-	0x00000303, 0x03030303, 0xfcfd0303, 0x03000303, 0xfd000303, 0x00030303, 0xfffd0303, 0x06060303, 
-	0xf9fa0303, 0xfffffcfd, 0x0302fcfd, 0xfcfcfcfd, 0x02fffcfd, 0xfcfffcfd, 0x0002fcfd, 0xfffcfcfd, 
-	0x0605fcfd, 0xf9f9fcfd, 0x00000300, 0x03030300, 0xfcfd0300, 0x03000300, 0xfd000300, 0x00030300, 
-	0xfffd0300, 0x06060300, 0xf9fa0300, 0xfffffd00, 0x0302fd00, 0xfcfcfd00, 0x02fffd00, 0xfcfffd00, 
-	0x0002fd00, 0xfffcfd00, 0x0605fd00, 0xf9f9fd00, 0x00000003, 0x03030003, 0xfcfd0003, 0x03000003, 
-	0xfd000003, 0x00030003, 0xfffd0003, 0x06060003, 0xf9fa0003, 0xfffffffd, 0x0302fffd, 0xfcfcfffd, 
-	0x02fffffd, 0xfcfffffd, 0x0002fffd, 0xfffcfffd, 0x0605fffd, 0xf9f9fffd, 0x00000606, 0x03030606, 
-	0xfcfd0606, 0x03000606, 0xfd000606, 0x00030606, 0xfffd0606, 0x06060606, 0xf9fa0606, 0xfffff9fa, 
-	0x0302f9fa, 0xfcfcf9fa, 0x02fff9fa, 0xfcfff9fa, 0x0002f9fa, 0xfffcf9fa, 0x0605f9fa, 0xf9f9f9fa, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000404, 0xfffffbfc, 0x00000400, 0xfffffc00, 0x00000004, 0xfffffffc, 0x00000804, 
-	0xfffff7fc, 0x00000408, 0xfffffbf8, 0x00000808, 0xfffff7f8, 0x000007f8, 0xfffff808, 0x00000bfc, 
-	0xfffff404, 0xfffffc0c, 0x000003f4, 0x00001008, 0xffffeff8, 0x00000810, 0xfffff7f0, 0x00001010, 
-	0xffffeff0, 0x00001400, 0xffffec00, 0x00000014, 0xffffffec, 0x00000ff4, 0xfffff00c, 0xfffff410, 
-	0x00000bf0, 0x000017fc, 0xffffe804, 0xfffffc18, 0x000003e8, 0x00002010, 0xffffdff0, 0x00001020, 
-	0xffffefe0, 0x00002008, 0xffffdff8, 0x00000820, 0xfffff7e0, 0x00002020, 0xffffdfe0, 0x00002400, 
-	0xffffdc00, 0x00000024, 0xffffffdc, 0x000017e8, 0xffffe818, 0x00001ff0, 0xffffe010, 0xfffff020, 
-	0x00000fe0, 0x00003414, 0xffffcbec, 0x00001434, 0xffffebcc, 0x00003824, 0xffffc7dc, 0x00002438, 
-	0xffffdbc8, 0x00002bf4, 0xffffd40c, 0xfffff42c, 0x00000bd4, 0x00003808, 0xffffc7f8, 0x00000838, 
-	0xfffff7c8, 0x00003c3c, 0xffffc3c4, 0x00003ffc, 0xffffc004, 0xfffffc40, 0x000003c0, 0x00002bdc, 
-	0xffffd424, 0xffffdc2c, 0x000023d4, 0x00003be4, 0xffffc41c, 0xffffe43c, 0x00001bc4, 0x00005c2c, 
-	0xffffa3d4, 0x00002c5c, 0xffffd3a4, 0x00005c18, 0xffffa3e8, 0x0000185c, 0xffffe7a4, 0x00006048, 
-	0xffff9fb8, 0x00004860, 0xffffb7a0, 0x000053ec, 0xffffac14, 0xffffec54, 0x000013ac, 0x00006408, 
-	0xffff9bf8, 0x00000864, 0xfffff79c, 0x00006c6c, 0xffff9394, 0x000043bc, 0xffffbc44, 0x000053c8, 
-	0xffffac38, 0xffffc854, 0x000037ac, 0x000077f4, 0xffff880c, 0xfffff478, 0x00000b88, 0x00006bd0, 
-	0xffff9430, 0xffffd06c, 0x00002f94, 0x00007b98, 0xffff8468, 0xffff987c, 0x00006784, 0x00001818, 
-	0xffffe7e8, 0x00002c2c, 0xffffd3d4, 0x00005454, 0xffffabac, 0x00000000, 0x04040000, 0xfbfc0000, 
-	0x04000000, 0xfc000000, 0x00040000, 0xfffc0000, 0x08040000, 0xf7fc0000, 0x04080000, 0x00000404, 
-	0x04040404, 0xfbfc0404, 0x04000404, 0xfc000404, 0x00040404, 0xfffc0404, 0x08040404, 0xf7fc0404, 
-	0x04080404, 0xfffffbfc, 0x0403fbfc, 0xfbfbfbfc, 0x03fffbfc, 0xfbfffbfc, 0x0003fbfc, 0xfffbfbfc, 
-	0x0803fbfc, 0xf7fbfbfc, 0x0407fbfc, 0x00000400, 0x04040400, 0xfbfc0400, 0x04000400, 0xfc000400, 
-	0x00040400, 0xfffc0400, 0x08040400, 0xf7fc0400, 0x04080400, 0xfffffc00, 0x0403fc00, 0xfbfbfc00, 
-	0x03fffc00, 0xfbfffc00, 0x0003fc00, 0xfffbfc00, 0x0803fc00, 0xf7fbfc00, 0x0407fc00, 0x00000004, 
-	0x04040004, 0xfbfc0004, 0x04000004, 0xfc000004, 0x00040004, 0xfffc0004, 0x08040004, 0xf7fc0004, 
-	0x04080004, 0xfffffffc, 0x0403fffc, 0xfbfbfffc, 0x03fffffc, 0xfbfffffc, 0x0003fffc, 0xfffbfffc, 
-	0x0803fffc, 0xf7fbfffc, 0x0407fffc, 0x00000804, 0x04040804, 0xfbfc0804, 0x04000804, 0xfc000804, 
-	0x00040804, 0xfffc0804, 0x08040804, 0xf7fc0804, 0x04080804, 0xfffff7fc, 0x0403f7fc, 0xfbfbf7fc, 
-	0x03fff7fc, 0xfbfff7fc, 0x0003f7fc, 0xfffbf7fc, 0x0803f7fc, 0xf7fbf7fc, 0x0407f7fc, 0x00000408, 
-	0x04040408, 0xfbfc0408, 0x04000408, 0xfc000408, 0x00040408, 0xfffc0408, 0x08040408, 0xf7fc0408, 
-	0x04080408, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000505, 0xfffffafb, 0x00000500, 0xfffffb00, 0x00000005, 0xfffffffb, 0x00000a0a, 
-	0xfffff5f6, 0x00000f05, 0xfffff0fb, 0x0000050f, 0xfffffaf1, 0x000009f6, 0xfffff60a, 0x00000efb, 
-	0xfffff105, 0xfffffb0f, 0x000004f1, 0x0000140a, 0xffffebf6, 0x00000a14, 0xfffff5ec, 0x00001414, 
-	0xffffebec, 0x00001900, 0xffffe700, 0x00000019, 0xffffffe7, 0x000013f1, 0xffffec0f, 0xfffff114, 
-	0x00000eec, 0x00002819, 0xffffd7e7, 0x00001928, 0xffffe6d8, 0x00001df6, 0xffffe20a, 0xfffff61e, 
-	0x000009e2, 0x0000280a, 0xffffd7f6, 0x00000a28, 0xfffff5d8, 0x00002828, 0xffffd7d8, 0x00002d00, 
-	0xffffd300, 0x0000002d, 0xffffffd3, 0x00001de2, 0xffffe21e, 0x000027ec, 0xffffd814, 0xffffec28, 
-	0x000013d8, 0x00004119, 0xffffbee7, 0x00001941, 0xffffe6bf, 0x0000462d, 0xffffb9d3, 0x00002d46, 
-	0xffffd2ba, 0x000036f1, 0xffffc90f, 0xfffff137, 0x00000ec9, 0x0000460a, 0xffffb9f6, 0x00000a46, 
-	0xfffff5ba, 0x00004b4b, 0xffffb4b5, 0x000054fb, 0xffffab05, 0xfffffb55, 0x000004ab, 0x000036d3, 
-	0xffffc92d, 0xffffd337, 0x00002cc9, 0x00004add, 0xffffb523, 0xffffdd4b, 0x000022b5, 0x00007337, 
-	0xffff8cc9, 0x00003773, 0xffffc88d, 0x0000731e, 0xffff8ce2, 0x00001e73, 0xffffe18d, 0x0000785a, 
-	0xffff87a6, 0x00005a78, 0xffffa588, 0x000068e2, 0xffff971e, 0xffffe269, 0x00001d97, 0x000054ab, 
-	0xffffab55, 0x000068ba, 0xffff9746, 0xffffba69, 0x00004597, 0x00001e1e, 0xffffe1e2, 0x00003c3c, 
-	0xffffc3c4, 0x00006969, 0xffff9697, 0x00000000, 0x05050000, 0xfafb0000, 0x05000000, 0xfb000000, 
-	0x00050000, 0xfffb0000, 0x0a0a0000, 0xf5f60000, 0x0f050000, 0xf0fb0000, 0x00000505, 0x05050505, 
-	0xfafb0505, 0x05000505, 0xfb000505, 0x00050505, 0xfffb0505, 0x0a0a0505, 0xf5f60505, 0x0f050505, 
-	0xf0fb0505, 0xfffffafb, 0x0504fafb, 0xfafafafb, 0x04fffafb, 0xfafffafb, 0x0004fafb, 0xfffafafb, 
-	0x0a09fafb, 0xf5f5fafb, 0x0f04fafb, 0xf0fafafb, 0x00000500, 0x05050500, 0xfafb0500, 0x05000500, 
-	0xfb000500, 0x00050500, 0xfffb0500, 0x0a0a0500, 0xf5f60500, 0x0f050500, 0xf0fb0500, 0xfffffb00, 
-	0x0504fb00, 0xfafafb00, 0x04fffb00, 0xfafffb00, 0x0004fb00, 0xfffafb00, 0x0a09fb00, 0xf5f5fb00, 
-	0x0f04fb00, 0xf0fafb00, 0x00000005, 0x05050005, 0xfafb0005, 0x05000005, 0xfb000005, 0x00050005, 
-	0xfffb0005, 0x0a0a0005, 0xf5f60005, 0x0f050005, 0xf0fb0005, 0xfffffffb, 0x0504fffb, 0xfafafffb, 
-	0x04fffffb, 0xfafffffb, 0x0004fffb, 0xfffafffb, 0x0a09fffb, 0xf5f5fffb, 0x0f04fffb, 0xf0fafffb, 
-	0x00000a0a, 0x05050a0a, 0xfafb0a0a, 0x05000a0a, 0xfb000a0a, 0x00050a0a, 0xfffb0a0a, 0x0a0a0a0a, 
-	0xf5f60a0a, 0x0f050a0a, 0xf0fb0a0a, 0xfffff5f6, 0x0504f5f6, 0xfafaf5f6, 0x04fff5f6, 0xfafff5f6, 
-	0x0004f5f6, 0xfffaf5f6, 0x0a09f5f6, 0xf5f5f5f6, 0x0f04f5f6, 0xf0faf5f6, 0x00000f05, 0x05050f05, 
-	0xfafb0f05, 0x05000f05, 0xfb000f05, 0x00050f05, 0xfffb0f05, 0x0a0a0f05, 0xf5f60f05, 0x0f050f05, 
-	0xf0fb0f05, 0xfffff0fb, 0x0504f0fb, 0xfafaf0fb, 0x04fff0fb, 0xfafff0fb, 0x0004f0fb, 0xfffaf0fb, 
-	0x0a09f0fb, 0xf5f5f0fb, 0x0f04f0fb, 0xf0faf0fb, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000606, 0xfffff9fa, 0x00000600, 0xfffffa00, 0x00000006, 0xfffffffa, 0x00000c0c, 
-	0xfffff3f4, 0x00000c06, 0xfffff3fa, 0x0000060c, 0xfffff9f4, 0x00000bf4, 0xfffff40c, 0x000011fa, 
-	0xffffee06, 0xfffffa12, 0x000005ee, 0x0000180c, 0xffffe7f4, 0x00000c18, 0xfffff3e8, 0x00001818, 
-	0xffffe7e8, 0x00001e00, 0xffffe200, 0x0000001e, 0xffffffe2, 0x000017ee, 0xffffe812, 0xffffee18, 
-	0x000011e8, 0x0000301e, 0xffffcfe2, 0x00001e30, 0xffffe1d0, 0x000023fa, 0xffffdc06, 0xfffffa24, 
-	0x000005dc, 0x0000300c, 0xffffcff4, 0x00000c30, 0xfffff3d0, 0x00003030, 0xffffcfd0, 0x00003600, 
-	0xffffca00, 0x00000036, 0xffffffca, 0x000023dc, 0xffffdc24, 0x00002fe8, 0xffffd018, 0xffffe830, 
-	0x000017d0, 0x00004e1e, 0xffffb1e2, 0x00001e4e, 0xffffe1b2, 0x00005436, 0xffffabca, 0x00003654, 
-	0xffffc9ac, 0x000041ee, 0xffffbe12, 0xffffee42, 0x000011be, 0x0000540c, 0xffffabf4, 0x00000c54, 
-	0xfffff3ac, 0x00005a5a, 0xffffa5a6, 0x00005ffa, 0xffffa006, 0xfffffa60, 0x000005a0, 0x000041ca, 
-	0xffffbe36, 0xffffca42, 0x000035be, 0x000059d6, 0xffffa62a, 0xffffd65a, 0x000029a6, 0x00007de2, 
-	0xffff821e, 0xffffe27e, 0x00001d82, 0x0000659a, 0xffff9a66, 0x00007dac, 0xffff8254, 0xffffac7e, 
-	0x00005382, 0x00002424, 0xffffdbdc, 0x00004242, 0xffffbdbe, 0x00000000, 0x06060000, 0xf9fa0000, 
-	0x06000000, 0xfa000000, 0x00060000, 0xfffa0000, 0x0c0c0000, 0xf3f40000, 0x0c060000, 0xf3fa0000, 
-	0x060c0000, 0x00000606, 0x06060606, 0xf9fa0606, 0x06000606, 0xfa000606, 0x00060606, 0xfffa0606, 
-	0x0c0c0606, 0xf3f40606, 0x0c060606, 0xf3fa0606, 0x060c0606, 0xfffff9fa, 0x0605f9fa, 0xf9f9f9fa, 
-	0x05fff9fa, 0xf9fff9fa, 0x0005f9fa, 0xfff9f9fa, 0x0c0bf9fa, 0xf3f3f9fa, 0x0c05f9fa, 0xf3f9f9fa, 
-	0x060bf9fa, 0x00000600, 0x06060600, 0xf9fa0600, 0x06000600, 0xfa000600, 0x00060600, 0xfffa0600, 
-	0x0c0c0600, 0xf3f40600, 0x0c060600, 0xf3fa0600, 0x060c0600, 0xfffffa00, 0x0605fa00, 0xf9f9fa00, 
-	0x05fffa00, 0xf9fffa00, 0x0005fa00, 0xfff9fa00, 0x0c0bfa00, 0xf3f3fa00, 0x0c05fa00, 0xf3f9fa00, 
-	0x060bfa00, 0x00000006, 0x06060006, 0xf9fa0006, 0x06000006, 0xfa000006, 0x00060006, 0xfffa0006, 
-	0x0c0c0006, 0xf3f40006, 0x0c060006, 0xf3fa0006, 0x060c0006, 0xfffffffa, 0x0605fffa, 0xf9f9fffa, 
-	0x05fffffa, 0xf9fffffa, 0x0005fffa, 0xfff9fffa, 0x0c0bfffa, 0xf3f3fffa, 0x0c05fffa, 0xf3f9fffa, 
-	0x060bfffa, 0x00000c0c, 0x06060c0c, 0xf9fa0c0c, 0x06000c0c, 0xfa000c0c, 0x00060c0c, 0xfffa0c0c, 
-	0x0c0c0c0c, 0xf3f40c0c, 0x0c060c0c, 0xf3fa0c0c, 0x060c0c0c, 0xfffff3f4, 0x0605f3f4, 0xf9f9f3f4, 
-	0x05fff3f4, 0xf9fff3f4, 0x0005f3f4, 0xfff9f3f4, 0x0c0bf3f4, 0xf3f3f3f4, 0x0c05f3f4, 0xf3f9f3f4, 
-	0x060bf3f4, 0x00000c06, 0x06060c06, 0xf9fa0c06, 0x06000c06, 0xfa000c06, 0x00060c06, 0xfffa0c06, 
-	0x0c0c0c06, 0xf3f40c06, 0x0c060c06, 0xf3fa0c06, 0x060c0c06, 0xfffff3fa, 0x0605f3fa, 0xf9f9f3fa, 
-	0x05fff3fa, 0xf9fff3fa, 0x0005f3fa, 0xfff9f3fa, 0x0c0bf3fa, 0xf3f3f3fa, 0x0c05f3fa, 0xf3f9f3fa, 
-	0x060bf3fa, 0x0000060c, 0x0606060c, 0xf9fa060c, 0x0600060c, 0xfa00060c, 0x0006060c, 0xfffa060c, 
-	0x0c0c060c, 0xf3f4060c, 0x0c06060c, 0xf3fa060c, 0x060c060c, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000707, 0xfffff8f9, 0x00000700, 0xfffff900, 0x00000007, 0xfffffff9, 0x00000e0e, 
-	0xfffff1f2, 0x00001507, 0xffffeaf9, 0x00000715, 0xfffff8eb, 0x00000df2, 0xfffff20e, 0x000014f9, 
-	0xffffeb07, 0xfffff915, 0x000006eb, 0x00001c0e, 0xffffe3f2, 0x00000e1c, 0xfffff1e4, 0x00001c1c, 
-	0xffffe3e4, 0x00002300, 0xffffdd00, 0x00000023, 0xffffffdd, 0x00001beb, 0xffffe415, 0xffffeb1c, 
-	0x000014e4, 0x00003823, 0xffffc7dd, 0x00002338, 0xffffdcc8, 0x000029f2, 0xffffd60e, 0xfffff22a, 
-	0x00000dd6, 0x0000380e, 0xffffc7f2, 0x00000e38, 0xfffff1c8, 0x00003838, 0xffffc7c8, 0x00003f00, 
-	0xffffc100, 0x0000003f, 0xffffffc1, 0x000029d6, 0xffffd62a, 0x000037e4, 0xffffc81c, 0xffffe438, 
-	0x00001bc8, 0x00005b23, 0xffffa4dd, 0x0000235b, 0xffffdca5, 0x0000623f, 0xffff9dc1, 0x00003f62, 
-	0xffffc09e, 0x00004ceb, 0xffffb315, 0xffffeb4d, 0x000014b3, 0x0000620e, 0xffff9df2, 0x00000e62, 
-	0xfffff19e, 0x00006969, 0xffff9697, 0x000076f9, 0xffff8907, 0xfffff977, 0x00000689, 0x00004cc1, 
-	0xffffb33f, 0xffffc14d, 0x00003eb3, 0x000068cf, 0xffff9731, 0xffffcf69, 0x00003097, 0x00007689, 
-	0xffff8977, 0x00002a2a, 0xffffd5d6, 0x00004d4d, 0xffffb2b3, 0x00000000, 0x07070000, 0xf8f90000, 
-	0x07000000, 0xf9000000, 0x00070000, 0xfff90000, 0x0e0e0000, 0xf1f20000, 0x15070000, 0xeaf90000, 
-	0x07150000, 0x00000707, 0x07070707, 0xf8f90707, 0x07000707, 0xf9000707, 0x00070707, 0xfff90707, 
-	0x0e0e0707, 0xf1f20707, 0x15070707, 0xeaf90707, 0x07150707, 0xfffff8f9, 0x0706f8f9, 0xf8f8f8f9, 
-	0x06fff8f9, 0xf8fff8f9, 0x0006f8f9, 0xfff8f8f9, 0x0e0df8f9, 0xf1f1f8f9, 0x1506f8f9, 0xeaf8f8f9, 
-	0x0714f8f9, 0x00000700, 0x07070700, 0xf8f90700, 0x07000700, 0xf9000700, 0x00070700, 0xfff90700, 
-	0x0e0e0700, 0xf1f20700, 0x15070700, 0xeaf90700, 0x07150700, 0xfffff900, 0x0706f900, 0xf8f8f900, 
-	0x06fff900, 0xf8fff900, 0x0006f900, 0xfff8f900, 0x0e0df900, 0xf1f1f900, 0x1506f900, 0xeaf8f900, 
-	0x0714f900, 0x00000007, 0x07070007, 0xf8f90007, 0x07000007, 0xf9000007, 0x00070007, 0xfff90007, 
-	0x0e0e0007, 0xf1f20007, 0x15070007, 0xeaf90007, 0x07150007, 0xfffffff9, 0x0706fff9, 0xf8f8fff9, 
-	0x06fffff9, 0xf8fffff9, 0x0006fff9, 0xfff8fff9, 0x0e0dfff9, 0xf1f1fff9, 0x1506fff9, 0xeaf8fff9, 
-	0x0714fff9, 0x00000e0e, 0x07070e0e, 0xf8f90e0e, 0x07000e0e, 0xf9000e0e, 0x00070e0e, 0xfff90e0e, 
-	0x0e0e0e0e, 0xf1f20e0e, 0x15070e0e, 0xeaf90e0e, 0x07150e0e, 0xfffff1f2, 0x0706f1f2, 0xf8f8f1f2, 
-	0x06fff1f2, 0xf8fff1f2, 0x0006f1f2, 0xfff8f1f2, 0x0e0df1f2, 0xf1f1f1f2, 0x1506f1f2, 0xeaf8f1f2, 
-	0x0714f1f2, 0x00001507, 0x07071507, 0xf8f91507, 0x07001507, 0xf9001507, 0x00071507, 0xfff91507, 
-	0x0e0e1507, 0xf1f21507, 0x15071507, 0xeaf91507, 0x07151507, 0xffffeaf9, 0x0706eaf9, 0xf8f8eaf9, 
-	0x06ffeaf9, 0xf8ffeaf9, 0x0006eaf9, 0xfff8eaf9, 0x0e0deaf9, 0xf1f1eaf9, 0x1506eaf9, 0xeaf8eaf9, 
-	0x0714eaf9, 0x00000715, 0x07070715, 0xf8f90715, 0x07000715, 0xf9000715, 0x00070715, 0xfff90715, 
-	0x0e0e0715, 0xf1f20715, 0x15070715, 0xeaf90715, 0x07150715, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000808, 0xfffff7f8, 0x00000800, 0xfffff800, 0x00000008, 0xfffffff8, 0x00001010, 
-	0xffffeff0, 0x00001008, 0xffffeff8, 0x00000810, 0xfffff7f0, 0x00000ff0, 0xfffff010, 0x000017f8, 
-	0xffffe808, 0xfffff818, 0x000007e8, 0x00002010, 0xffffdff0, 0x00001020, 0xffffefe0, 0x00002020, 
-	0xffffdfe0, 0x00002800, 0xffffd800, 0x00000028, 0xffffffd8, 0x00001fe8, 0xffffe018, 0xffffe820, 
-	0x000017e0, 0x00004028, 0xffffbfd8, 0x00002840, 0xffffd7c0, 0x00002ff0, 0xffffd010, 0xfffff030, 
-	0x00000fd0, 0x00004010, 0xffffbff0, 0x00001040, 0xffffefc0, 0x00004040, 0xffffbfc0, 0x00004800, 
-	0xffffb800, 0x00000048, 0xffffffb8, 0x00002fd0, 0xffffd030, 0x00003fe0, 0xffffc020, 0xffffe040, 
-	0x00001fc0, 0x00006828, 0xffff97d8, 0x00002868, 0xffffd798, 0x00007048, 0xffff8fb8, 0x00004870, 
-	0xffffb790, 0x000057e8, 0xffffa818, 0xffffe858, 0x000017a8, 0x00007010, 0xffff8ff0, 0x00001070, 
-	0xffffef90, 0x00007878, 0xffff8788, 0x000057b8, 0xffffa848, 0xffffb858, 0x000047a8, 0x000077c8, 
-	0xffff8838, 0xffffc878, 0x00003788, 0x00003030, 0xffffcfd0, 0x00005858, 0xffffa7a8, 0x00000000, 
-	0x08080000, 0xf7f80000, 0x08000000, 0xf8000000, 0x00080000, 0xfff80000, 0x10100000, 0xeff00000, 
-	0x10080000, 0xeff80000, 0x08100000, 0x00000808, 0x08080808, 0xf7f80808, 0x08000808, 0xf8000808, 
-	0x00080808, 0xfff80808, 0x10100808, 0xeff00808, 0x10080808, 0xeff80808, 0x08100808, 0xfffff7f8, 
-	0x0807f7f8, 0xf7f7f7f8, 0x07fff7f8, 0xf7fff7f8, 0x0007f7f8, 0xfff7f7f8, 0x100ff7f8, 0xefeff7f8, 
-	0x1007f7f8, 0xeff7f7f8, 0x080ff7f8, 0x00000800, 0x08080800, 0xf7f80800, 0x08000800, 0xf8000800, 
-	0x00080800, 0xfff80800, 0x10100800, 0xeff00800, 0x10080800, 0xeff80800, 0x08100800, 0xfffff800, 
-	0x0807f800, 0xf7f7f800, 0x07fff800, 0xf7fff800, 0x0007f800, 0xfff7f800, 0x100ff800, 0xefeff800, 
-	0x1007f800, 0xeff7f800, 0x080ff800, 0x00000008, 0x08080008, 0xf7f80008, 0x08000008, 0xf8000008, 
-	0x00080008, 0xfff80008, 0x10100008, 0xeff00008, 0x10080008, 0xeff80008, 0x08100008, 0xfffffff8, 
-	0x0807fff8, 0xf7f7fff8, 0x07fffff8, 0xf7fffff8, 0x0007fff8, 0xfff7fff8, 0x100ffff8, 0xefeffff8, 
-	0x1007fff8, 0xeff7fff8, 0x080ffff8, 0x00001010, 0x08081010, 0xf7f81010, 0x08001010, 0xf8001010, 
-	0x00081010, 0xfff81010, 0x10101010, 0xeff01010, 0x10081010, 0xeff81010, 0x08101010, 0xffffeff0, 
-	0x0807eff0, 0xf7f7eff0, 0x07ffeff0, 0xf7ffeff0, 0x0007eff0, 0xfff7eff0, 0x100feff0, 0xefefeff0, 
-	0x1007eff0, 0xeff7eff0, 0x080feff0, 0x00001008, 0x08081008, 0xf7f81008, 0x08001008, 0xf8001008, 
-	0x00081008, 0xfff81008, 0x10101008, 0xeff01008, 0x10081008, 0xeff81008, 0x08101008, 0xffffeff8, 
-	0x0807eff8, 0xf7f7eff8, 0x07ffeff8, 0xf7ffeff8, 0x0007eff8, 0xfff7eff8, 0x100feff8, 0xefefeff8, 
-	0x1007eff8, 0xeff7eff8, 0x080feff8, 0x00000810, 0x08080810, 0xf7f80810, 0x08000810, 0xf8000810, 
-	0x00080810, 0xfff80810, 0x10100810, 0xeff00810, 0x10080810, 0xeff80810, 0x08100810, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000909, 0xfffff6f7, 0x00000900, 0xfffff700, 0x00000009, 0xfffffff7, 0x00001212, 
-	0xffffedee, 0x00001b09, 0xffffe4f7, 0x0000091b, 0xfffff6e5, 0x000011ee, 0xffffee12, 0x00001af7, 
-	0xffffe509, 0xfffff71b, 0x000008e5, 0x00002412, 0xffffdbee, 0x00001224, 0xffffeddc, 0x00002424, 
-	0xffffdbdc, 0x00002d00, 0xffffd300, 0x0000002d, 0xffffffd3, 0x000023e5, 0xffffdc1b, 0xffffe524, 
-	0x00001adc, 0x0000482d, 0xffffb7d3, 0x00002d48, 0xffffd2b8, 0x000035ee, 0xffffca12, 0xffffee36, 
-	0x000011ca, 0x00004812, 0xffffb7ee, 0x00001248, 0xffffedb8, 0x00004848, 0xffffb7b8, 0x00005100, 
-	0xffffaf00, 0x00000051, 0xffffffaf, 0x000035ca, 0xffffca36, 0x000047dc, 0xffffb824, 0xffffdc48, 
-	0x000023b8, 0x0000752d, 0xffff8ad3, 0x00002d75, 0xffffd28b, 0x00007e51, 0xffff81af, 0x0000517e, 
-	0xffffae82, 0x000062e5, 0xffff9d1b, 0xffffe563, 0x00001a9d, 0x000062af, 0xffff9d51, 0xffffaf63, 
-	0x0000509d, 0x00003636, 0xffffc9ca, 0x00006c6c, 0xffff9394, 0x00000000, 0x09090000, 0xf6f70000, 
-	0x09000000, 0xf7000000, 0x00090000, 0xfff70000, 0x12120000, 0xedee0000, 0x1b090000, 0xe4f70000, 
-	0x091b0000, 0xf6e50000, 0x00000909, 0x09090909, 0xf6f70909, 0x09000909, 0xf7000909, 0x00090909, 
-	0xfff70909, 0x12120909, 0xedee0909, 0x1b090909, 0xe4f70909, 0x091b0909, 0xf6e50909, 0xfffff6f7, 
-	0x0908f6f7, 0xf6f6f6f7, 0x08fff6f7, 0xf6fff6f7, 0x0008f6f7, 0xfff6f6f7, 0x1211f6f7, 0xededf6f7, 
-	0x1b08f6f7, 0xe4f6f6f7, 0x091af6f7, 0xf6e4f6f7, 0x00000900, 0x09090900, 0xf6f70900, 0x09000900, 
-	0xf7000900, 0x00090900, 0xfff70900, 0x12120900, 0xedee0900, 0x1b090900, 0xe4f70900, 0x091b0900, 
-	0xf6e50900, 0xfffff700, 0x0908f700, 0xf6f6f700, 0x08fff700, 0xf6fff700, 0x0008f700, 0xfff6f700, 
-	0x1211f700, 0xededf700, 0x1b08f700, 0xe4f6f700, 0x091af700, 0xf6e4f700, 0x00000009, 0x09090009, 
-	0xf6f70009, 0x09000009, 0xf7000009, 0x00090009, 0xfff70009, 0x12120009, 0xedee0009, 0x1b090009, 
-	0xe4f70009, 0x091b0009, 0xf6e50009, 0xfffffff7, 0x0908fff7, 0xf6f6fff7, 0x08fffff7, 0xf6fffff7, 
-	0x0008fff7, 0xfff6fff7, 0x1211fff7, 0xededfff7, 0x1b08fff7, 0xe4f6fff7, 0x091afff7, 0xf6e4fff7, 
-	0x00001212, 0x09091212, 0xf6f71212, 0x09001212, 0xf7001212, 0x00091212, 0xfff71212, 0x12121212, 
-	0xedee1212, 0x1b091212, 0xe4f71212, 0x091b1212, 0xf6e51212, 0xffffedee, 0x0908edee, 0xf6f6edee, 
-	0x08ffedee, 0xf6ffedee, 0x0008edee, 0xfff6edee, 0x1211edee, 0xedededee, 0x1b08edee, 0xe4f6edee, 
-	0x091aedee, 0xf6e4edee, 0x00001b09, 0x09091b09, 0xf6f71b09, 0x09001b09, 0xf7001b09, 0x00091b09, 
-	0xfff71b09, 0x12121b09, 0xedee1b09, 0x1b091b09, 0xe4f71b09, 0x091b1b09, 0xf6e51b09, 0xffffe4f7, 
-	0x0908e4f7, 0xf6f6e4f7, 0x08ffe4f7, 0xf6ffe4f7, 0x0008e4f7, 0xfff6e4f7, 0x1211e4f7, 0xedede4f7, 
-	0x1b08e4f7, 0xe4f6e4f7, 0x091ae4f7, 0xf6e4e4f7, 0x0000091b, 0x0909091b, 0xf6f7091b, 0x0900091b, 
-	0xf700091b, 0x0009091b, 0xfff7091b, 0x1212091b, 0xedee091b, 0x1b09091b, 0xe4f7091b, 0x091b091b, 
-	0xf6e5091b, 0xfffff6e5, 0x0908f6e5, 0xf6f6f6e5, 0x08fff6e5, 0xf6fff6e5, 0x0008f6e5, 0xfff6f6e5, 
-	0x1211f6e5, 0xededf6e5, 0x1b08f6e5, 0xe4f6f6e5, 0x091af6e5, 0xf6e4f6e5, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000202, 0xfffffdfe, 0x00000300, 0xfffffd00, 0x00000003, 0xfffffffd, 0x00000606, 
-	0xfffff9fa, 0x00000700, 0xfffff900, 0x00000007, 0xfffffff9, 0x000004fb, 0xfffffb05, 0xfffffb05, 
-	0x000004fb, 0x00000b06, 0xfffff4fa, 0x0000060b, 0xfffff9f5, 0x00000800, 0xfffff800, 0x00000008, 
-	0xfffffff8, 0x00000b0b, 0xfffff4f5, 0x00000c00, 0xfffff400, 0x0000000c, 0xfffffff4, 0x0000110c, 
-	0xffffeef4, 0x00000c11, 0xfffff3ef, 0x00001111, 0xffffeeef, 0x00001206, 0xffffedfa, 0x00000612, 
-	0xfffff9ee, 0x00000af8, 0xfffff508, 0xfffff80b, 0x000007f5, 0x00000f00, 0xfffff100, 0x0000000f, 
-	0xfffffff1, 0x00001400, 0xffffec00, 0x00000014, 0xffffffec, 0x00001912, 0xffffe6ee, 0x00001219, 
-	0xffffede7, 0x0000190b, 0xffffe6f5, 0x00000b19, 0xfffff4e7, 0x00001919, 0xffffe6e7, 0x00000df2, 
-	0xfffff20e, 0xfffff20e, 0x00000df2, 0x00001a00, 0xffffe600, 0x0000001a, 0xffffffe6, 0x000011f5, 
-	0xffffee0b, 0xfffff512, 0x00000aee, 0x000015f9, 0xffffea07, 0xfffff916, 0x000006ea, 0x0000221a, 
-	0xffffdde6, 0x00001a22, 0xffffe5de, 0x00002212, 0xffffddee, 0x00001222, 0xffffedde, 0x00002222, 
-	0xffffddde, 0x0000230b, 0xffffdcf5, 0x00000b23, 0xfffff4dd, 0x00001d00, 0xffffe300, 0x0000001d, 
-	0xffffffe3, 0x000015ed, 0xffffea13, 0xffffed16, 0x000012ea, 0x000019f1, 0xffffe60f, 0xfffff11a, 
-	0x00000ee6, 0x00002500, 0xffffdb00, 0x00000025, 0xffffffdb, 0x00002c1b, 0xffffd3e5, 0x00001b2c, 
-	0xffffe4d4, 0x00002c24, 0xffffd3dc, 0x0000242c, 0xffffdbd4, 0x00002c12, 0xffffd3ee, 0x0000122c, 
-	0xffffedd4, 0x000020f6, 0xffffdf0a, 0xfffff621, 0x000009df, 0x00002d2d, 0xffffd2d3, 0x00000000, 
-	0x00000000, 0x00000202, 0xfffffdfe, 0x00000300, 0xfffffd00, 0x00000003, 0xfffffffd, 0x00000606, 
-	0xfffff9fa, 0x00000700, 0xfffff900, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020300, 0x0201fd00, 
-	0x02020003, 0x0201fffd, 0x02020606, 0x0201f9fa, 0x02020700, 0x0201f900, 0xfdfe0000, 0xfdfe0202, 
-	0xfdfdfdfe, 0xfdfe0300, 0xfdfdfd00, 0xfdfe0003, 0xfdfdfffd, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0700, 
-	0xfdfdf900, 0x03000000, 0x03000202, 0x02fffdfe, 0x03000300, 0x02fffd00, 0x03000003, 0x02fffffd, 
-	0x03000606, 0x02fff9fa, 0x03000700, 0x02fff900, 0xfd000000, 0xfd000202, 0xfcfffdfe, 0xfd000300, 
-	0xfcfffd00, 0xfd000003, 0xfcfffffd, 0xfd000606, 0xfcfff9fa, 0xfd000700, 0xfcfff900, 0x00030000, 
-	0x00030202, 0x0002fdfe, 0x00030300, 0x0002fd00, 0x00030003, 0x0002fffd, 0x00030606, 0x0002f9fa, 
-	0x00030700, 0x0002f900, 0xfffd0000, 0xfffd0202, 0xfffcfdfe, 0xfffd0300, 0xfffcfd00, 0xfffd0003, 
-	0xfffcfffd, 0xfffd0606, 0xfffcf9fa, 0xfffd0700, 0xfffcf900, 0x06060000, 0x06060202, 0x0605fdfe, 
-	0x06060300, 0x0605fd00, 0x06060003, 0x0605fffd, 0x06060606, 0x0605f9fa, 0x06060700, 0x0605f900, 
-	0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0300, 0xf9f9fd00, 0xf9fa0003, 0xf9f9fffd, 0xf9fa0606, 
-	0xf9f9f9fa, 0xf9fa0700, 0xf9f9f900, 0x07000000, 0x07000202, 0x06fffdfe, 0x07000300, 0x06fffd00, 
-	0x07000003, 0x06fffffd, 0x07000606, 0x06fff9fa, 0x07000700, 0x06fff900, 0xf9000000, 0xf9000202, 
-	0xf8fffdfe, 0xf9000300, 0xf8fffd00, 0xf9000003, 0xf8fffffd, 0xf9000606, 0xf8fff9fa, 0xf9000700, 
-	0xf8fff900, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000202, 0xfffffdfe, 0x00000606, 
-	0xfffff9fa, 0x00000600, 0xfffffa00, 0x00000006, 0xfffffffa, 0x000003fc, 0xfffffc04, 0xfffffa0a, 
-	0x000005f6, 0xfffff400, 0x00000c00, 0xfffff3fa, 0xfffff406, 0x00000bfa, 0x00000c06, 0xfffffff2, 
-	0x0000000e, 0x00000c0c, 0xfffff3f4, 0xffffee00, 0x00001200, 0xfffff40e, 0x00000bf2, 0xfffff9ee, 
-	0xfffffa12, 0x000005ee, 0x00000612, 0xffffedf6, 0xffffee0a, 0x000011f6, 0x0000120a, 0xffffffea, 
-	0x00000016, 0xffffe800, 0x00001800, 0xfffff3ea, 0xfffff416, 0x00000bea, 0x00000c16, 0xffffe7f8, 
-	0xffffe808, 0x000017f8, 0x00001808, 0xfffff9e6, 0xfffffa1a, 0x000005e6, 0x0000061a, 0xffffffe4, 
-	0x0000001c, 0x00001414, 0xffffebec, 0xffffe5f2, 0x00001a0e, 0xfffff3e2, 0x00000c1e, 0xffffdff6, 
-	0x0000200a, 0xffffdfee, 0x00002012, 0xffffe5e6, 0x00001a1a, 0xffffebde, 0x00001422, 0xfffff3da, 
-	0x00000c26, 0xffffdfe0, 0x00002020, 0x00002020, 0xffffd7ea, 0xffffddde, 0x00002222, 0x00000000, 
-	0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 
-	0x00000600, 0xfffffa00, 0x00000006, 0xfffffffa, 0x02000000, 0x02000200, 0x01fffe00, 0x02000002, 
-	0x01fffffe, 0x02000202, 0x01fffdfe, 0x02000606, 0x01fff9fa, 0x02000600, 0x01fffa00, 0x02000006, 
-	0x01fffffa, 0xfe000000, 0xfe000200, 0xfdfffe00, 0xfe000002, 0xfdfffffe, 0xfe000202, 0xfdfffdfe, 
-	0xfe000606, 0xfdfff9fa, 0xfe000600, 0xfdfffa00, 0xfe000006, 0xfdfffffa, 0x00020000, 0x00020200, 
-	0x0001fe00, 0x00020002, 0x0001fffe, 0x00020202, 0x0001fdfe, 0x00020606, 0x0001f9fa, 0x00020600, 
-	0x0001fa00, 0x00020006, 0x0001fffa, 0xfffe0000, 0xfffe0200, 0xfffdfe00, 0xfffe0002, 0xfffdfffe, 
-	0xfffe0202, 0xfffdfdfe, 0xfffe0606, 0xfffdf9fa, 0xfffe0600, 0xfffdfa00, 0xfffe0006, 0xfffdfffa, 
-	0x02020000, 0x02020200, 0x0201fe00, 0x02020002, 0x0201fffe, 0x02020202, 0x0201fdfe, 0x02020606, 
-	0x0201f9fa, 0x02020600, 0x0201fa00, 0x02020006, 0x0201fffa, 0xfdfe0000, 0xfdfe0200, 0xfdfdfe00, 
-	0xfdfe0002, 0xfdfdfffe, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0600, 0xfdfdfa00, 
-	0xfdfe0006, 0xfdfdfffa, 0x06060000, 0x06060200, 0x0605fe00, 0x06060002, 0x0605fffe, 0x06060202, 
-	0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060600, 0x0605fa00, 0x06060006, 0x0605fffa, 0xf9fa0000, 
-	0xf9fa0200, 0xf9f9fe00, 0xf9fa0002, 0xf9f9fffe, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa, 
-	0xf9fa0600, 0xf9f9fa00, 0xf9fa0006, 0xf9f9fffa, 0x06000000, 0x06000200, 0x05fffe00, 0x06000002, 
-	0x05fffffe, 0x06000202, 0x05fffdfe, 0x06000606, 0x05fff9fa, 0x06000600, 0x05fffa00, 0x06000006, 
-	0x05fffffa, 0xfa000000, 0xfa000200, 0xf9fffe00, 0xfa000002, 0xf9fffffe, 0xfa000202, 0xf9fffdfe, 
-	0xfa000606, 0xf9fff9fa, 0xfa000600, 0xf9fffa00, 0xfa000006, 0xf9fffffa, 0x00060000, 0x00060200, 
-	0x0005fe00, 0x00060002, 0x0005fffe, 0x00060202, 0x0005fdfe, 0x00060606, 0x0005f9fa, 0x00060600, 
-	0x0005fa00, 0x00060006, 0x0005fffa, 0xfffa0000, 0xfffa0200, 0xfff9fe00, 0xfffa0002, 0xfff9fffe, 
-	0xfffa0202, 0xfff9fdfe, 0xfffa0606, 0xfff9f9fa, 0xfffa0600, 0xfff9fa00, 0xfffa0006, 0xfff9fffa, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000404, 0xfffffbfc, 0x00000a0a, 
-	0xfffff5f6, 0x00000a00, 0xfffff600, 0x0000000a, 0xfffffff6, 0x000005fa, 0xfffffa06, 0xfffff80e, 
-	0x000007f2, 0xffffffee, 0x00000012, 0xfffff00a, 0x00000ff6, 0xffffe800, 0x00001800, 0xfffff7e8, 
-	0xfffff818, 0x000007e8, 0x00000818, 0x00001212, 0xffffedee, 0xfffff014, 0x00000fec, 0xffffe5f2, 
-	0xffffe60e, 0x000019f2, 0x00001a0e, 0xffffffe2, 0x0000001e, 0xffffde00, 0x00002200, 0xfffff7de, 
-	0xfffff822, 0x000007de, 0x00000822, 0xffffede2, 0xffffee1e, 0x000011e2, 0x0000121e, 0xffffddf6, 
-	0xffffde0a, 0x000021f6, 0x0000220a, 0xffffddec, 0x00002214, 0xffffffd8, 0x00000028, 0x00001e1e, 
-	0xffffe1e2, 0xffffedd8, 0x00001228, 0xffffd400, 0x00002c00, 0xffffd3f0, 0x00002c10, 0xffffdbdc, 
-	0xffffdbdc, 0x00002424, 0xffffd3e6, 0x00002c1a, 0xffffe5d2, 0x00001a2e, 0xffffedcc, 0x00001234, 
-	0xffffc9ec, 0xffffd3d4, 0x00002c2c, 0xffffc9e0, 0xffffd1d2, 0xffffd1d2, 0x00002e2e, 0x00000000, 
-	0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000404, 0xfffffbfc, 0x00000a0a, 0xfffff5f6, 
-	0x00000a00, 0xfffff600, 0x0000000a, 0xfffffff6, 0x02000000, 0x02000200, 0x01fffe00, 0x02000002, 
-	0x01fffffe, 0x02000404, 0x01fffbfc, 0x02000a0a, 0x01fff5f6, 0x02000a00, 0x01fff600, 0x0200000a, 
-	0x01fffff6, 0xfe000000, 0xfe000200, 0xfdfffe00, 0xfe000002, 0xfdfffffe, 0xfe000404, 0xfdfffbfc, 
-	0xfe000a0a, 0xfdfff5f6, 0xfe000a00, 0xfdfff600, 0xfe00000a, 0xfdfffff6, 0x00020000, 0x00020200, 
-	0x0001fe00, 0x00020002, 0x0001fffe, 0x00020404, 0x0001fbfc, 0x00020a0a, 0x0001f5f6, 0x00020a00, 
-	0x0001f600, 0x0002000a, 0x0001fff6, 0xfffe0000, 0xfffe0200, 0xfffdfe00, 0xfffe0002, 0xfffdfffe, 
-	0xfffe0404, 0xfffdfbfc, 0xfffe0a0a, 0xfffdf5f6, 0xfffe0a00, 0xfffdf600, 0xfffe000a, 0xfffdfff6, 
-	0x04040000, 0x04040200, 0x0403fe00, 0x04040002, 0x0403fffe, 0x04040404, 0x0403fbfc, 0x04040a0a, 
-	0x0403f5f6, 0x04040a00, 0x0403f600, 0x0404000a, 0x0403fff6, 0xfbfc0000, 0xfbfc0200, 0xfbfbfe00, 
-	0xfbfc0002, 0xfbfbfffe, 0xfbfc0404, 0xfbfbfbfc, 0xfbfc0a0a, 0xfbfbf5f6, 0xfbfc0a00, 0xfbfbf600, 
-	0xfbfc000a, 0xfbfbfff6, 0x0a0a0000, 0x0a0a0200, 0x0a09fe00, 0x0a0a0002, 0x0a09fffe, 0x0a0a0404, 
-	0x0a09fbfc, 0x0a0a0a0a, 0x0a09f5f6, 0x0a0a0a00, 0x0a09f600, 0x0a0a000a, 0x0a09fff6, 0xf5f60000, 
-	0xf5f60200, 0xf5f5fe00, 0xf5f60002, 0xf5f5fffe, 0xf5f60404, 0xf5f5fbfc, 0xf5f60a0a, 0xf5f5f5f6, 
-	0xf5f60a00, 0xf5f5f600, 0xf5f6000a, 0xf5f5fff6, 0x0a000000, 0x0a000200, 0x09fffe00, 0x0a000002, 
-	0x09fffffe, 0x0a000404, 0x09fffbfc, 0x0a000a0a, 0x09fff5f6, 0x0a000a00, 0x09fff600, 0x0a00000a, 
-	0x09fffff6, 0xf6000000, 0xf6000200, 0xf5fffe00, 0xf6000002, 0xf5fffffe, 0xf6000404, 0xf5fffbfc, 
-	0xf6000a0a, 0xf5fff5f6, 0xf6000a00, 0xf5fff600, 0xf600000a, 0xf5fffff6, 0x000a0000, 0x000a0200, 
-	0x0009fe00, 0x000a0002, 0x0009fffe, 0x000a0404, 0x0009fbfc, 0x000a0a0a, 0x0009f5f6, 0x000a0a00, 
-	0x0009f600, 0x000a000a, 0x0009fff6, 0xfff60000, 0xfff60200, 0xfff5fe00, 0xfff60002, 0xfff5fffe, 
-	0xfff60404, 0xfff5fbfc, 0xfff60a0a, 0xfff5f5f6, 0xfff60a00, 0xfff5f600, 0xfff6000a, 0xfff5fff6, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000400, 0xfffffc00, 0x00000004, 0xfffffffc, 0x00000404, 0xfffffbfc, 0x00000c0c, 
-	0xfffff3f4, 0x00000c00, 0xfffff400, 0x0000000c, 0xfffffff4, 0x000007f8, 0xfffff808, 0xfffff008, 
-	0x00000ff8, 0xffffe800, 0x00001800, 0xfffff7e8, 0xfffff818, 0x000007e8, 0x00000818, 0xfffff014, 
-	0x00000fec, 0xffffffe4, 0x0000001c, 0xffffe7f0, 0xffffe810, 0x000017f0, 0x00001810, 0xffffe000, 
-	0x00002000, 0xffffefe4, 0xfffff01c, 0x00000fe4, 0x0000101c, 0xffffdff8, 0xffffe008, 0xfffff7e0, 
-	0xfffff820, 0x000007e0, 0x00000820, 0x00001ff8, 0x00002008, 0x00001818, 0xffffe7e8, 0xffffe818, 
-	0x000017e8, 0xffffdfec, 0x00002014, 0xffffffd8, 0x00000028, 0xffffefd8, 0x00001028, 0xffffd400, 
-	0xffffd400, 0xffffffd4, 0x0000002c, 0x00002c00, 0x00002c00, 0xffffdfe0, 0x00002020, 0xffffd3f0, 
-	0x00002c10, 0xffffd3e8, 0xffffe7d4, 0x0000182c, 0x00002c18, 0xffffefd0, 0x00001030, 0xffffdbdc, 
-	0xffffdbdc, 0x00002424, 0x00002424, 0xffffcbec, 0x00002828, 0xffffd7d8, 0xffffcbe0, 0x00000000, 
-	0x00000400, 0xfffffc00, 0x00000004, 0xfffffffc, 0x00000404, 0xfffffbfc, 0x00000c0c, 0xfffff3f4, 
-	0x00000c00, 0xfffff400, 0x0000000c, 0xfffffff4, 0x04000000, 0x04000400, 0x03fffc00, 0x04000004, 
-	0x03fffffc, 0x04000404, 0x03fffbfc, 0x04000c0c, 0x03fff3f4, 0x04000c00, 0x03fff400, 0x0400000c, 
-	0x03fffff4, 0xfc000000, 0xfc000400, 0xfbfffc00, 0xfc000004, 0xfbfffffc, 0xfc000404, 0xfbfffbfc, 
-	0xfc000c0c, 0xfbfff3f4, 0xfc000c00, 0xfbfff400, 0xfc00000c, 0xfbfffff4, 0x00040000, 0x00040400, 
-	0x0003fc00, 0x00040004, 0x0003fffc, 0x00040404, 0x0003fbfc, 0x00040c0c, 0x0003f3f4, 0x00040c00, 
-	0x0003f400, 0x0004000c, 0x0003fff4, 0xfffc0000, 0xfffc0400, 0xfffbfc00, 0xfffc0004, 0xfffbfffc, 
-	0xfffc0404, 0xfffbfbfc, 0xfffc0c0c, 0xfffbf3f4, 0xfffc0c00, 0xfffbf400, 0xfffc000c, 0xfffbfff4, 
-	0x04040000, 0x04040400, 0x0403fc00, 0x04040004, 0x0403fffc, 0x04040404, 0x0403fbfc, 0x04040c0c, 
-	0x0403f3f4, 0x04040c00, 0x0403f400, 0x0404000c, 0x0403fff4, 0xfbfc0000, 0xfbfc0400, 0xfbfbfc00, 
-	0xfbfc0004, 0xfbfbfffc, 0xfbfc0404, 0xfbfbfbfc, 0xfbfc0c0c, 0xfbfbf3f4, 0xfbfc0c00, 0xfbfbf400, 
-	0xfbfc000c, 0xfbfbfff4, 0x0c0c0000, 0x0c0c0400, 0x0c0bfc00, 0x0c0c0004, 0x0c0bfffc, 0x0c0c0404, 
-	0x0c0bfbfc, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c0c00, 0x0c0bf400, 0x0c0c000c, 0x0c0bfff4, 0xf3f40000, 
-	0xf3f40400, 0xf3f3fc00, 0xf3f40004, 0xf3f3fffc, 0xf3f40404, 0xf3f3fbfc, 0xf3f40c0c, 0xf3f3f3f4, 
-	0xf3f40c00, 0xf3f3f400, 0xf3f4000c, 0xf3f3fff4, 0x0c000000, 0x0c000400, 0x0bfffc00, 0x0c000004, 
-	0x0bfffffc, 0x0c000404, 0x0bfffbfc, 0x0c000c0c, 0x0bfff3f4, 0x0c000c00, 0x0bfff400, 0x0c00000c, 
-	0x0bfffff4, 0xf4000000, 0xf4000400, 0xf3fffc00, 0xf4000004, 0xf3fffffc, 0xf4000404, 0xf3fffbfc, 
-	0xf4000c0c, 0xf3fff3f4, 0xf4000c00, 0xf3fff400, 0xf400000c, 0xf3fffff4, 0x000c0000, 0x000c0400, 
-	0x000bfc00, 0x000c0004, 0x000bfffc, 0x000c0404, 0x000bfbfc, 0x000c0c0c, 0x000bf3f4, 0x000c0c00, 
-	0x000bf400, 0x000c000c, 0x000bfff4, 0xfff40000, 0xfff40400, 0xfff3fc00, 0xfff40004, 0xfff3fffc, 
-	0xfff40404, 0xfff3fbfc, 0xfff40c0c, 0xfff3f3f4, 0xfff40c00, 0xfff3f400, 0xfff4000c, 0xfff3fff4, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 
-	0xffffebec, 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 0xffffebec, 
-	0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020606, 
-	0x0201f9fa, 0x02020c0c, 0x0201f3f4, 0x02021414, 0x0201ebec, 0x02022020, 0x0201dfe0, 0x02022e2e, 
-	0x0201d1d2, 0xfdfe0000, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0c0c, 0xfdfdf3f4, 
-	0xfdfe1414, 0xfdfdebec, 0xfdfe2020, 0xfdfddfe0, 0xfdfe2e2e, 0xfdfdd1d2, 0x06060000, 0x06060202, 
-	0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060c0c, 0x0605f3f4, 0x06061414, 0x0605ebec, 0x06062020, 
-	0x0605dfe0, 0x06062e2e, 0x0605d1d2, 0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa, 
-	0xf9fa0c0c, 0xf9f9f3f4, 0xf9fa1414, 0xf9f9ebec, 0xf9fa2020, 0xf9f9dfe0, 0xf9fa2e2e, 0xf9f9d1d2, 
-	0x0c0c0000, 0x0c0c0202, 0x0c0bfdfe, 0x0c0c0606, 0x0c0bf9fa, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c1414, 
-	0x0c0bebec, 0x0c0c2020, 0x0c0bdfe0, 0x0c0c2e2e, 0x0c0bd1d2, 0xf3f40000, 0xf3f40202, 0xf3f3fdfe, 
-	0xf3f40606, 0xf3f3f9fa, 0xf3f40c0c, 0xf3f3f3f4, 0xf3f41414, 0xf3f3ebec, 0xf3f42020, 0xf3f3dfe0, 
-	0xf3f42e2e, 0xf3f3d1d2, 0x14140000, 0x14140202, 0x1413fdfe, 0x14140606, 0x1413f9fa, 0x14140c0c, 
-	0x1413f3f4, 0x14141414, 0x1413ebec, 0x14142020, 0x1413dfe0, 0x14142e2e, 0x1413d1d2, 0xebec0000, 
-	0xebec0202, 0xebebfdfe, 0xebec0606, 0xebebf9fa, 0xebec0c0c, 0xebebf3f4, 0xebec1414, 0xebebebec, 
-	0xebec2020, 0xebebdfe0, 0xebec2e2e, 0xebebd1d2, 0x20200000, 0x20200202, 0x201ffdfe, 0x20200606, 
-	0x201ff9fa, 0x20200c0c, 0x201ff3f4, 0x20201414, 0x201febec, 0x20202020, 0x201fdfe0, 0x20202e2e, 
-	0x201fd1d2, 0xdfe00000, 0xdfe00202, 0xdfdffdfe, 0xdfe00606, 0xdfdff9fa, 0xdfe00c0c, 0xdfdff3f4, 
-	0xdfe01414, 0xdfdfebec, 0xdfe02020, 0xdfdfdfe0, 0xdfe02e2e, 0xdfdfd1d2, 0x2e2e0000, 0x2e2e0202, 
-	0x2e2dfdfe, 0x2e2e0606, 0x2e2df9fa, 0x2e2e0c0c, 0x2e2df3f4, 0x2e2e1414, 0x2e2debec, 0x2e2e2020, 
-	0x2e2ddfe0, 0x2e2e2e2e, 0x2e2dd1d2, 0xd1d20000, 0xd1d20202, 0xd1d1fdfe, 0xd1d20606, 0xd1d1f9fa, 
-	0xd1d20c0c, 0xd1d1f3f4, 0xd1d21414, 0xd1d1ebec, 0xd1d22020, 0xd1d1dfe0, 0xd1d22e2e, 0xd1d1d1d2, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 
-	0xffffebec, 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 0xffffebec, 
-	0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020606, 
-	0x0201f9fa, 0x02020c0c, 0x0201f3f4, 0x02021414, 0x0201ebec, 0x02022020, 0x0201dfe0, 0x02022e2e, 
-	0x0201d1d2, 0xfdfe0000, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0c0c, 0xfdfdf3f4, 
-	0xfdfe1414, 0xfdfdebec, 0xfdfe2020, 0xfdfddfe0, 0xfdfe2e2e, 0xfdfdd1d2, 0x06060000, 0x06060202, 
-	0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060c0c, 0x0605f3f4, 0x06061414, 0x0605ebec, 0x06062020, 
-	0x0605dfe0, 0x06062e2e, 0x0605d1d2, 0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa, 
-	0xf9fa0c0c, 0xf9f9f3f4, 0xf9fa1414, 0xf9f9ebec, 0xf9fa2020, 0xf9f9dfe0, 0xf9fa2e2e, 0xf9f9d1d2, 
-	0x0c0c0000, 0x0c0c0202, 0x0c0bfdfe, 0x0c0c0606, 0x0c0bf9fa, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c1414, 
-	0x0c0bebec, 0x0c0c2020, 0x0c0bdfe0, 0x0c0c2e2e, 0x0c0bd1d2, 0xf3f40000, 0xf3f40202, 0xf3f3fdfe, 
-	0xf3f40606, 0xf3f3f9fa, 0xf3f40c0c, 0xf3f3f3f4, 0xf3f41414, 0xf3f3ebec, 0xf3f42020, 0xf3f3dfe0, 
-	0xf3f42e2e, 0xf3f3d1d2, 0x14140000, 0x14140202, 0x1413fdfe, 0x14140606, 0x1413f9fa, 0x14140c0c, 
-	0x1413f3f4, 0x14141414, 0x1413ebec, 0x14142020, 0x1413dfe0, 0x14142e2e, 0x1413d1d2, 0xebec0000, 
-	0xebec0202, 0xebebfdfe, 0xebec0606, 0xebebf9fa, 0xebec0c0c, 0xebebf3f4, 0xebec1414, 0xebebebec, 
-	0xebec2020, 0xebebdfe0, 0xebec2e2e, 0xebebd1d2, 0x20200000, 0x20200202, 0x201ffdfe, 0x20200606, 
-	0x201ff9fa, 0x20200c0c, 0x201ff3f4, 0x20201414, 0x201febec, 0x20202020, 0x201fdfe0, 0x20202e2e, 
-	0x201fd1d2, 0xdfe00000, 0xdfe00202, 0xdfdffdfe, 0xdfe00606, 0xdfdff9fa, 0xdfe00c0c, 0xdfdff3f4, 
-	0xdfe01414, 0xdfdfebec, 0xdfe02020, 0xdfdfdfe0, 0xdfe02e2e, 0xdfdfd1d2, 0x2e2e0000, 0x2e2e0202, 
-	0x2e2dfdfe, 0x2e2e0606, 0x2e2df9fa, 0x2e2e0c0c, 0x2e2df3f4, 0x2e2e1414, 0x2e2debec, 0x2e2e2020, 
-	0x2e2ddfe0, 0x2e2e2e2e, 0x2e2dd1d2, 0xd1d20000, 0xd1d20202, 0xd1d1fdfe, 0xd1d20606, 0xd1d1f9fa, 
-	0xd1d20c0c, 0xd1d1f3f4, 0xd1d21414, 0xd1d1ebec, 0xd1d22020, 0xd1d1dfe0, 0xd1d22e2e, 0xd1d1d1d2, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 
-	0xffffebec, 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 0xffffebec, 
-	0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020606, 
-	0x0201f9fa, 0x02020c0c, 0x0201f3f4, 0x02021414, 0x0201ebec, 0x02022020, 0x0201dfe0, 0x02022e2e, 
-	0x0201d1d2, 0xfdfe0000, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0c0c, 0xfdfdf3f4, 
-	0xfdfe1414, 0xfdfdebec, 0xfdfe2020, 0xfdfddfe0, 0xfdfe2e2e, 0xfdfdd1d2, 0x06060000, 0x06060202, 
-	0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060c0c, 0x0605f3f4, 0x06061414, 0x0605ebec, 0x06062020, 
-	0x0605dfe0, 0x06062e2e, 0x0605d1d2, 0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa, 
-	0xf9fa0c0c, 0xf9f9f3f4, 0xf9fa1414, 0xf9f9ebec, 0xf9fa2020, 0xf9f9dfe0, 0xf9fa2e2e, 0xf9f9d1d2, 
-	0x0c0c0000, 0x0c0c0202, 0x0c0bfdfe, 0x0c0c0606, 0x0c0bf9fa, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c1414, 
-	0x0c0bebec, 0x0c0c2020, 0x0c0bdfe0, 0x0c0c2e2e, 0x0c0bd1d2, 0xf3f40000, 0xf3f40202, 0xf3f3fdfe, 
-	0xf3f40606, 0xf3f3f9fa, 0xf3f40c0c, 0xf3f3f3f4, 0xf3f41414, 0xf3f3ebec, 0xf3f42020, 0xf3f3dfe0, 
-	0xf3f42e2e, 0xf3f3d1d2, 0x14140000, 0x14140202, 0x1413fdfe, 0x14140606, 0x1413f9fa, 0x14140c0c, 
-	0x1413f3f4, 0x14141414, 0x1413ebec, 0x14142020, 0x1413dfe0, 0x14142e2e, 0x1413d1d2, 0xebec0000, 
-	0xebec0202, 0xebebfdfe, 0xebec0606, 0xebebf9fa, 0xebec0c0c, 0xebebf3f4, 0xebec1414, 0xebebebec, 
-	0xebec2020, 0xebebdfe0, 0xebec2e2e, 0xebebd1d2, 0x20200000, 0x20200202, 0x201ffdfe, 0x20200606, 
-	0x201ff9fa, 0x20200c0c, 0x201ff3f4, 0x20201414, 0x201febec, 0x20202020, 0x201fdfe0, 0x20202e2e, 
-	0x201fd1d2, 0xdfe00000, 0xdfe00202, 0xdfdffdfe, 0xdfe00606, 0xdfdff9fa, 0xdfe00c0c, 0xdfdff3f4, 
-	0xdfe01414, 0xdfdfebec, 0xdfe02020, 0xdfdfdfe0, 0xdfe02e2e, 0xdfdfd1d2, 0x2e2e0000, 0x2e2e0202, 
-	0x2e2dfdfe, 0x2e2e0606, 0x2e2df9fa, 0x2e2e0c0c, 0x2e2df3f4, 0x2e2e1414, 0x2e2debec, 0x2e2e2020, 
-	0x2e2ddfe0, 0x2e2e2e2e, 0x2e2dd1d2, 0xd1d20000, 0xd1d20202, 0xd1d1fdfe, 0xd1d20606, 0xd1d1f9fa, 
-	0xd1d20c0c, 0xd1d1f3f4, 0xd1d21414, 0xd1d1ebec, 0xd1d22020, 0xd1d1dfe0, 0xd1d22e2e, 0xd1d1d1d2, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 
-	0xffffebec, 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 0xffffebec, 
-	0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020606, 
-	0x0201f9fa, 0x02020c0c, 0x0201f3f4, 0x02021414, 0x0201ebec, 0x02022020, 0x0201dfe0, 0x02022e2e, 
-	0x0201d1d2, 0xfdfe0000, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0c0c, 0xfdfdf3f4, 
-	0xfdfe1414, 0xfdfdebec, 0xfdfe2020, 0xfdfddfe0, 0xfdfe2e2e, 0xfdfdd1d2, 0x06060000, 0x06060202, 
-	0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060c0c, 0x0605f3f4, 0x06061414, 0x0605ebec, 0x06062020, 
-	0x0605dfe0, 0x06062e2e, 0x0605d1d2, 0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa, 
-	0xf9fa0c0c, 0xf9f9f3f4, 0xf9fa1414, 0xf9f9ebec, 0xf9fa2020, 0xf9f9dfe0, 0xf9fa2e2e, 0xf9f9d1d2, 
-	0x0c0c0000, 0x0c0c0202, 0x0c0bfdfe, 0x0c0c0606, 0x0c0bf9fa, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c1414, 
-	0x0c0bebec, 0x0c0c2020, 0x0c0bdfe0, 0x0c0c2e2e, 0x0c0bd1d2, 0xf3f40000, 0xf3f40202, 0xf3f3fdfe, 
-	0xf3f40606, 0xf3f3f9fa, 0xf3f40c0c, 0xf3f3f3f4, 0xf3f41414, 0xf3f3ebec, 0xf3f42020, 0xf3f3dfe0, 
-	0xf3f42e2e, 0xf3f3d1d2, 0x14140000, 0x14140202, 0x1413fdfe, 0x14140606, 0x1413f9fa, 0x14140c0c, 
-	0x1413f3f4, 0x14141414, 0x1413ebec, 0x14142020, 0x1413dfe0, 0x14142e2e, 0x1413d1d2, 0xebec0000, 
-	0xebec0202, 0xebebfdfe, 0xebec0606, 0xebebf9fa, 0xebec0c0c, 0xebebf3f4, 0xebec1414, 0xebebebec, 
-	0xebec2020, 0xebebdfe0, 0xebec2e2e, 0xebebd1d2, 0x20200000, 0x20200202, 0x201ffdfe, 0x20200606, 
-	0x201ff9fa, 0x20200c0c, 0x201ff3f4, 0x20201414, 0x201febec, 0x20202020, 0x201fdfe0, 0x20202e2e, 
-	0x201fd1d2, 0xdfe00000, 0xdfe00202, 0xdfdffdfe, 0xdfe00606, 0xdfdff9fa, 0xdfe00c0c, 0xdfdff3f4, 
-	0xdfe01414, 0xdfdfebec, 0xdfe02020, 0xdfdfdfe0, 0xdfe02e2e, 0xdfdfd1d2, 0x2e2e0000, 0x2e2e0202, 
-	0x2e2dfdfe, 0x2e2e0606, 0x2e2df9fa, 0x2e2e0c0c, 0x2e2df3f4, 0x2e2e1414, 0x2e2debec, 0x2e2e2020, 
-	0x2e2ddfe0, 0x2e2e2e2e, 0x2e2dd1d2, 0xd1d20000, 0xd1d20202, 0xd1d1fdfe, 0xd1d20606, 0xd1d1f9fa, 
-	0xd1d20c0c, 0xd1d1f3f4, 0xd1d21414, 0xd1d1ebec, 0xd1d22020, 0xd1d1dfe0, 0xd1d22e2e, 0xd1d1d1d2, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 
+        0x00000000, 0x00000202, 0xfffffdfe, 0x000002ff, 0xfffffd01, 0xffffff03, 0x000000fd, 0x00000404,
+        0xfffffbfc, 0x00000501, 0xfffffaff, 0x00000105, 0xfffffefb, 0x000003fc, 0xfffffc04, 0x000005fe,
+        0xfffffa02, 0xfffffe06, 0x000001fa, 0x00000904, 0xfffff6fc, 0x00000409, 0xfffffbf7, 0x00000909,
+        0xfffff6f7, 0x00000a01, 0xfffff5ff, 0x0000010a, 0xfffffef6, 0x000007fb, 0xfffff805, 0xfffffb08,
+        0x000004f8, 0x00000f09, 0xfffff0f7, 0x0000090f, 0xfffff6f1, 0x00000bfd, 0xfffff403, 0xfffffd0c,
+        0x000002f4, 0x00001004, 0xffffeffc, 0x00000410, 0xfffffbf0, 0x00001010, 0xffffeff0, 0x00001200,
+        0xffffee00, 0x00000012, 0xffffffee, 0x00000bf4, 0xfffff40c, 0x00000ff7, 0xfffff009, 0xfffff710,
+        0x000008f0, 0x00001b0b, 0xffffe4f5, 0x00000b1b, 0xfffff4e5, 0x00001c13, 0xffffe3ed, 0x0000131c,
+        0xffffece4, 0x000015fa, 0xffffea06, 0xfffffa16, 0x000005ea, 0x00001d04, 0xffffe2fc, 0x0000041d,
+        0xfffffbe3, 0x00001e1e, 0xffffe1e2, 0x000020fe, 0xffffdf02, 0xfffffe21, 0x000001df, 0x000016ee,
+        0xffffe912, 0xffffee17, 0x000011e9, 0x00001df1, 0xffffe20f, 0xfffff11e, 0x00000ee2, 0x00002e16,
+        0xffffd1ea, 0x0000162e, 0xffffe9d2, 0x00002f0d, 0xffffd0f3, 0x00000d2f, 0xfffff2d1, 0x00003123,
+        0xffffcedd, 0x00002331, 0xffffdccf, 0x000028f5, 0xffffd70b, 0xfffff529, 0x00000ad7, 0x00003304,
+        0xffffccfc, 0x00000433, 0xfffffbcd, 0x00003636, 0xffffc9ca, 0x000021de, 0xffffde22, 0x000029e3,
+        0xffffd61d, 0xffffe32a, 0x00001cd6, 0x00003bfa, 0xffffc406, 0xfffffa3c, 0x000005c4, 0x00004c1b,
+        0xffffb3e5, 0x00001b4c, 0xffffe4b4, 0x00004d2b, 0xffffb2d5, 0x00002b4d, 0xffffd4b3, 0x000036e8,
+        0xffffc918, 0xffffe837, 0x000017c9, 0x00004f0e, 0xffffb0f2, 0x00000e4f, 0xfffff1b1, 0x0000533f,
+        0xffffacc1, 0x00003f53, 0xffffc0ad, 0x000049ec, 0xffffb614, 0xffffec4a, 0x000013b6, 0x00005802,
+        0xffffa7fe, 0x00000258, 0xfffffda8, 0x00005d5d, 0xffffa2a3, 0x00003ccc, 0xffffc334, 0xffffcc3d,
+        0x000033c3, 0x00007834, 0xffff87cc, 0x00003478, 0xffffcb88, 0x00004ad3, 0xffffb52d, 0xffffd34b,
+        0x00002cb5, 0x00007d4b, 0xffff82b5, 0x00004b7d, 0xffffb483, 0x00007a21, 0xffff85df, 0x0000217a,
+        0xffffde86, 0x000066f3, 0xffff990d, 0xfffff367, 0x00000c99, 0x00005fd8, 0xffffa028, 0xffffd860,
+        0x000027a0, 0x00007ede, 0xffff8122, 0xffffde7f, 0x00002181, 0x000058a7, 0xffffa759, 0x000068b2,
+        0xffff974e, 0xffffb269, 0x00004d97, 0x00000c0c, 0xfffff3f4, 0x00001717, 0xffffe8e9, 0x00002a2a,
+        0xffffd5d6, 0x00004949, 0xffffb6b7, 0x00000000, 0x02020000, 0xfdfe0000, 0x02ff0000, 0xfd010000,
+        0xff030000, 0x00fd0000, 0x00000202, 0x02020202, 0xfdfe0202, 0x02ff0202, 0xfd010202, 0xff030202,
+        0x00fd0202, 0xfffffdfe, 0x0201fdfe, 0xfdfdfdfe, 0x02fefdfe, 0xfd00fdfe, 0xff02fdfe, 0x00fcfdfe,
+        0x000002ff, 0x020202ff, 0xfdfe02ff, 0x02ff02ff, 0xfd0102ff, 0xff0302ff, 0x00fd02ff, 0xfffffd01,
+        0x0201fd01, 0xfdfdfd01, 0x02fefd01, 0xfd00fd01, 0xff02fd01, 0x00fcfd01, 0xffffff03, 0x0201ff03,
+        0xfdfdff03, 0x02feff03, 0xfd00ff03, 0xff02ff03, 0x00fcff03, 0x000000fd, 0x020200fd, 0xfdfe00fd,
+        0x02ff00fd, 0xfd0100fd, 0xff0300fd, 0x00fd00fd, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000303, 0xfffffcfd, 0x000003ff, 0xfffffc01, 0xffffff04, 0x000000fc, 0x00000707,
+        0xfffff8f9, 0x00000802, 0xfffff7fe, 0x00000208, 0xfffffdf8, 0x000008fe, 0xfffff702, 0xfffffe09,
+        0x000001f7, 0x000005fa, 0xfffffa06, 0x00000d06, 0xfffff2fa, 0x0000060d, 0xfffff9f3, 0x00000d0d,
+        0xfffff2f3, 0x00000e01, 0xfffff1ff, 0x0000010e, 0xfffffef2, 0x00000bf8, 0xfffff408, 0xfffff80c,
+        0x000007f4, 0x0000170e, 0xffffe8f2, 0x00000e17, 0xfffff1e9, 0x000011fb, 0xffffee05, 0xfffffb12,
+        0x000004ee, 0x00001806, 0xffffe7fa, 0x00000618, 0xfffff9e8, 0x00001818, 0xffffe7e8, 0x00001aff,
+        0xffffe501, 0xffffff1b, 0x000000e5, 0x000010ef, 0xffffef11, 0x000016f3, 0xffffe90d, 0xfffff317,
+        0x00000ce9, 0x00002810, 0xffffd7f0, 0x00001028, 0xffffefd8, 0x0000291c, 0xffffd6e4, 0x00001c29,
+        0xffffe3d7, 0x000020f7, 0xffffdf09, 0xfffff721, 0x000008df, 0x00002b06, 0xffffd4fa, 0x0000062b,
+        0xfffff9d5, 0x00002e2e, 0xffffd1d2, 0x000031fc, 0xffffce04, 0xfffffc32, 0x000003ce, 0x000021e5,
+        0xffffde1b, 0xffffe522, 0x00001ade, 0x00002cea, 0xffffd316, 0xffffea2d, 0x000015d3, 0x00004522,
+        0xffffbade, 0x00002245, 0xffffddbb, 0x00004613, 0xffffb9ed, 0x00001346, 0xffffecba, 0x00004935,
+        0xffffb6cb, 0x00003549, 0xffffcab7, 0x00003def, 0xffffc211, 0xffffef3e, 0x000010c2, 0x00004d05,
+        0xffffb2fb, 0x0000054d, 0xfffffab3, 0x00005252, 0xffffadae, 0x000032cd, 0xffffcd33, 0x00003fd5,
+        0xffffc02b, 0xffffd540, 0x00002ac0, 0x000059f6, 0xffffa60a, 0xfffff65a, 0x000009a6, 0x00007229,
+        0xffff8dd7, 0x00002972, 0xffffd68e, 0x00007440, 0xffff8bc0, 0x00004074, 0xffffbf8c, 0x000051db,
+        0xffffae25, 0xffffdb52, 0x000024ae, 0x00007716, 0xffff88ea, 0x00001677, 0xffffe989, 0x00007c5f,
+        0xffff83a1, 0x00005f7c, 0xffffa084, 0x00006ee2, 0xffff911e, 0xffffe26f, 0x00001d91, 0x00005bb2,
+        0xffffa44e, 0xffffb25c, 0x00004da4, 0x000070bc, 0xffff8f44, 0xffffbc71, 0x0000438f, 0x00001212,
+        0xffffedee, 0x00002222, 0xffffddde, 0x00003f3f, 0xffffc0c1, 0x00006d6d, 0xffff9293, 0x00000000,
+        0x03030000, 0xfcfd0000, 0x03ff0000, 0xfc010000, 0xff040000, 0x00fc0000, 0x07070000, 0xf8f90000,
+        0x00000303, 0x03030303, 0xfcfd0303, 0x03ff0303, 0xfc010303, 0xff040303, 0x00fc0303, 0x07070303,
+        0xf8f90303, 0xfffffcfd, 0x0302fcfd, 0xfcfcfcfd, 0x03fefcfd, 0xfc00fcfd, 0xff03fcfd, 0x00fbfcfd,
+        0x0706fcfd, 0xf8f8fcfd, 0x000003ff, 0x030303ff, 0xfcfd03ff, 0x03ff03ff, 0xfc0103ff, 0xff0403ff,
+        0x00fc03ff, 0x070703ff, 0xf8f903ff, 0xfffffc01, 0x0302fc01, 0xfcfcfc01, 0x03fefc01, 0xfc00fc01,
+        0xff03fc01, 0x00fbfc01, 0x0706fc01, 0xf8f8fc01, 0xffffff04, 0x0302ff04, 0xfcfcff04, 0x03feff04,
+        0xfc00ff04, 0xff03ff04, 0x00fbff04, 0x0706ff04, 0xf8f8ff04, 0x000000fc, 0x030300fc, 0xfcfd00fc,
+        0x03ff00fc, 0xfc0100fc, 0xff0400fc, 0x00fc00fc, 0x070700fc, 0xf8f900fc, 0x00000707, 0x03030707,
+        0xfcfd0707, 0x03ff0707, 0xfc010707, 0xff040707, 0x00fc0707, 0x07070707, 0xf8f90707, 0xfffff8f9,
+        0x0302f8f9, 0xfcfcf8f9, 0x03fef8f9, 0xfc00f8f9, 0xff03f8f9, 0x00fbf8f9, 0x0706f8f9, 0xf8f8f8f9,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000404, 0xfffffbfc, 0x000004ff, 0xfffffb01, 0xffffff05, 0x000000fb, 0x00000a03,
+        0xfffff5fd, 0x0000030a, 0xfffffcf6, 0x00000909, 0xfffff6f7, 0x000006f9, 0xfffff907, 0x00000bfd,
+        0xfffff403, 0xfffffd0c, 0x000002f4, 0x00001108, 0xffffeef8, 0x00000811, 0xfffff7ef, 0x00001111,
+        0xffffeeef, 0x00001301, 0xffffecff, 0x00000113, 0xfffffeed, 0x00000ff5, 0xfffff00b, 0xfffff510,
+        0x00000af0, 0x000016fa, 0xffffe906, 0xfffffa17, 0x000005e9, 0x00001f12, 0xffffe0ee, 0x0000121f,
+        0xffffede1, 0x00002008, 0xffffdff8, 0x00000820, 0xfffff7e0, 0x00002121, 0xffffdedf, 0x000023ff,
+        0xffffdc01, 0xffffff24, 0x000000dc, 0x000016e9, 0xffffe917, 0x00001eef, 0xffffe111, 0xffffef1f,
+        0x000010e1, 0x00003615, 0xffffc9eb, 0x00001536, 0xffffeaca, 0x00003725, 0xffffc8db, 0x00002537,
+        0xffffdac9, 0x00002bf4, 0xffffd40c, 0xfffff42c, 0x00000bd4, 0x00003908, 0xffffc6f8, 0x00000839,
+        0xfffff7c7, 0x00003d3d, 0xffffc2c3, 0x000041fb, 0xffffbe05, 0xfffffb42, 0x000004be, 0x00002cdc,
+        0xffffd324, 0xffffdc2d, 0x000023d3, 0x00003be3, 0xffffc41d, 0xffffe33c, 0x00001cc4, 0x00005c2d,
+        0xffffa3d3, 0x00002d5c, 0xffffd2a4, 0x00005d19, 0xffffa2e7, 0x0000195d, 0xffffe6a3, 0x00006147,
+        0xffff9eb9, 0x00004761, 0xffffb89f, 0x000052ea, 0xffffad16, 0xffffea53, 0x000015ad, 0x00006607,
+        0xffff99f9, 0x00000766, 0xfffff89a, 0x00006d6d, 0xffff9293, 0x000043bc, 0xffffbc44, 0x000054c7,
+        0xffffab39, 0xffffc755, 0x000038ab, 0x000077f3, 0xffff880d, 0xfffff378, 0x00000c88, 0x00006dcf,
+        0xffff9231, 0xffffcf6e, 0x00003092, 0x00007a98, 0xffff8568, 0xffff987b, 0x00006785, 0x00001818,
+        0xffffe7e8, 0x00002e2e, 0xffffd1d2, 0x00005454, 0xffffabac, 0x00000000, 0x04040000, 0xfbfc0000,
+        0x04ff0000, 0xfb010000, 0xff050000, 0x00fb0000, 0x0a030000, 0xf5fd0000, 0x030a0000, 0x00000404,
+        0x04040404, 0xfbfc0404, 0x04ff0404, 0xfb010404, 0xff050404, 0x00fb0404, 0x0a030404, 0xf5fd0404,
+        0x030a0404, 0xfffffbfc, 0x0403fbfc, 0xfbfbfbfc, 0x04fefbfc, 0xfb00fbfc, 0xff04fbfc, 0x00fafbfc,
+        0x0a02fbfc, 0xf5fcfbfc, 0x0309fbfc, 0x000004ff, 0x040404ff, 0xfbfc04ff, 0x04ff04ff, 0xfb0104ff,
+        0xff0504ff, 0x00fb04ff, 0x0a0304ff, 0xf5fd04ff, 0x030a04ff, 0xfffffb01, 0x0403fb01, 0xfbfbfb01,
+        0x04fefb01, 0xfb00fb01, 0xff04fb01, 0x00fafb01, 0x0a02fb01, 0xf5fcfb01, 0x0309fb01, 0xffffff05,
+        0x0403ff05, 0xfbfbff05, 0x04feff05, 0xfb00ff05, 0xff04ff05, 0x00faff05, 0x0a02ff05, 0xf5fcff05,
+        0x0309ff05, 0x000000fb, 0x040400fb, 0xfbfc00fb, 0x04ff00fb, 0xfb0100fb, 0xff0500fb, 0x00fb00fb,
+        0x0a0300fb, 0xf5fd00fb, 0x030a00fb, 0x00000a03, 0x04040a03, 0xfbfc0a03, 0x04ff0a03, 0xfb010a03,
+        0xff050a03, 0x00fb0a03, 0x0a030a03, 0xf5fd0a03, 0x030a0a03, 0xfffff5fd, 0x0403f5fd, 0xfbfbf5fd,
+        0x04fef5fd, 0xfb00f5fd, 0xff04f5fd, 0x00faf5fd, 0x0a02f5fd, 0xf5fcf5fd, 0x0309f5fd, 0x0000030a,
+        0x0404030a, 0xfbfc030a, 0x04ff030a, 0xfb01030a, 0xff05030a, 0x00fb030a, 0x0a03030a, 0xf5fd030a,
+        0x030a030a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000505, 0xfffffafb, 0x000006fe, 0xfffff902, 0xfffffe07, 0x000001f9, 0x00000b0b,
+        0xfffff4f5, 0x00000d03, 0xfffff2fd, 0x0000030d, 0xfffffcf3, 0x000008f7, 0xfffff709, 0x00000efc,
+        0xfffff104, 0xfffffc0f, 0x000003f1, 0x0000160b, 0xffffe9f5, 0x00000b16, 0xfffff4ea, 0x00001515,
+        0xffffeaeb, 0x00001802, 0xffffe7fe, 0x00000218, 0xfffffde8, 0x000013f2, 0xffffec0e, 0xfffff214,
+        0x00000dec, 0x00002617, 0xffffd9e9, 0x00001726, 0xffffe8da, 0x00001cf8, 0xffffe308, 0xfffff81d,
+        0x000007e3, 0x0000270b, 0xffffd8f5, 0x00000b27, 0xfffff4d9, 0x00002929, 0xffffd6d7, 0x00002cff,
+        0xffffd301, 0xffffff2d, 0x000000d3, 0x00001ce3, 0xffffe31d, 0x000026ea, 0xffffd916, 0xffffea27,
+        0x000015d9, 0x0000431b, 0xffffbce5, 0x00001b43, 0xffffe4bd, 0x0000452f, 0xffffbad1, 0x00002f45,
+        0xffffd0bb, 0x000037f1, 0xffffc80f, 0xfffff138, 0x00000ec8, 0x0000470b, 0xffffb8f5, 0x00000b47,
+        0xfffff4b9, 0x00004c4c, 0xffffb3b4, 0x000052fa, 0xffffad06, 0xfffffa53, 0x000005ad, 0x000038d3,
+        0xffffc72d, 0xffffd339, 0x00002cc7, 0x00004adc, 0xffffb524, 0xffffdc4b, 0x000023b5, 0x00007338,
+        0xffff8cc8, 0x00003873, 0xffffc78d, 0x0000751f, 0xffff8ae1, 0x00001f75, 0xffffe08b, 0x00007a58,
+        0xffff85a8, 0x0000587a, 0xffffa786, 0x000067e4, 0xffff981c, 0xffffe468, 0x00001b98, 0x000054ab,
+        0xffffab55, 0x000069b8, 0xffff9648, 0xffffb86a, 0x00004796, 0x00001e1e, 0xffffe1e2, 0x00003a3a,
+        0xffffc5c6, 0x00006969, 0xffff9697, 0x00000000, 0x05050000, 0xfafb0000, 0x06fe0000, 0xf9020000,
+        0xfe070000, 0x01f90000, 0x0b0b0000, 0xf4f50000, 0x0d030000, 0xf2fd0000, 0x00000505, 0x05050505,
+        0xfafb0505, 0x06fe0505, 0xf9020505, 0xfe070505, 0x01f90505, 0x0b0b0505, 0xf4f50505, 0x0d030505,
+        0xf2fd0505, 0xfffffafb, 0x0504fafb, 0xfafafafb, 0x06fdfafb, 0xf901fafb, 0xfe06fafb, 0x01f8fafb,
+        0x0b0afafb, 0xf4f4fafb, 0x0d02fafb, 0xf2fcfafb, 0x000006fe, 0x050506fe, 0xfafb06fe, 0x06fe06fe,
+        0xf90206fe, 0xfe0706fe, 0x01f906fe, 0x0b0b06fe, 0xf4f506fe, 0x0d0306fe, 0xf2fd06fe, 0xfffff902,
+        0x0504f902, 0xfafaf902, 0x06fdf902, 0xf901f902, 0xfe06f902, 0x01f8f902, 0x0b0af902, 0xf4f4f902,
+        0x0d02f902, 0xf2fcf902, 0xfffffe07, 0x0504fe07, 0xfafafe07, 0x06fdfe07, 0xf901fe07, 0xfe06fe07,
+        0x01f8fe07, 0x0b0afe07, 0xf4f4fe07, 0x0d02fe07, 0xf2fcfe07, 0x000001f9, 0x050501f9, 0xfafb01f9,
+        0x06fe01f9, 0xf90201f9, 0xfe0701f9, 0x01f901f9, 0x0b0b01f9, 0xf4f501f9, 0x0d0301f9, 0xf2fd01f9,
+        0x00000b0b, 0x05050b0b, 0xfafb0b0b, 0x06fe0b0b, 0xf9020b0b, 0xfe070b0b, 0x01f90b0b, 0x0b0b0b0b,
+        0xf4f50b0b, 0x0d030b0b, 0xf2fd0b0b, 0xfffff4f5, 0x0504f4f5, 0xfafaf4f5, 0x06fdf4f5, 0xf901f4f5,
+        0xfe06f4f5, 0x01f8f4f5, 0x0b0af4f5, 0xf4f4f4f5, 0x0d02f4f5, 0xf2fcf4f5, 0x00000d03, 0x05050d03,
+        0xfafb0d03, 0x06fe0d03, 0xf9020d03, 0xfe070d03, 0x01f90d03, 0x0b0b0d03, 0xf4f50d03, 0x0d030d03,
+        0xf2fd0d03, 0xfffff2fd, 0x0504f2fd, 0xfafaf2fd, 0x06fdf2fd, 0xf901f2fd, 0xfe06f2fd, 0x01f8f2fd,
+        0x0b0af2fd, 0xf4f4f2fd, 0x0d02f2fd, 0xf2fcf2fd, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000606, 0xfffff9fa, 0x000007fe, 0xfffff802, 0xfffffe08, 0x000001f8, 0x00000d0d,
+        0xfffff2f3, 0x00000f04, 0xfffff0fc, 0x0000040f, 0xfffffbf1, 0x00000af5, 0xfffff50b, 0x000011fb,
+        0xffffee05, 0xfffffb12, 0x000004ee, 0x00001a0d, 0xffffe5f3, 0x00000d1a, 0xfffff2e6, 0x00001a1a,
+        0xffffe5e6, 0x00001d02, 0xffffe2fe, 0x0000021d, 0xfffffde3, 0x000017f0, 0xffffe810, 0xfffff018,
+        0x00000fe8, 0x00002e1c, 0xffffd1e4, 0x00001c2e, 0xffffe3d2, 0x000022f7, 0xffffdd09, 0xfffff723,
+        0x000008dd, 0x00002f0d, 0xffffd0f3, 0x00000d2f, 0xfffff2d1, 0x00003131, 0xffffcecf, 0x000035ff,
+        0xffffca01, 0xffffff36, 0x000000ca, 0x000022dd, 0xffffdd23, 0x00002ee6, 0xffffd11a, 0xffffe62f,
+        0x000019d1, 0x00005120, 0xffffaee0, 0x00002051, 0xffffdfaf, 0x00005338, 0xffffacc8, 0x00003853,
+        0xffffc7ad, 0x000042ee, 0xffffbd12, 0xffffee43, 0x000011bd, 0x0000560d, 0xffffa9f3, 0x00000d56,
+        0xfffff2aa, 0x00005b5b, 0xffffa4a5, 0x000062f9, 0xffff9d07, 0xfffff963, 0x0000069d, 0x000043ca,
+        0xffffbc36, 0xffffca44, 0x000035bc, 0x000059d4, 0xffffa62c, 0xffffd45a, 0x00002ba6, 0x00007bdf,
+        0xffff8421, 0xffffdf7c, 0x00002084, 0x00006699, 0xffff9967, 0x00007eaa, 0xffff8156, 0xffffaa7f,
+        0x00005581, 0x00002525, 0xffffdadb, 0x00004545, 0xffffbabb, 0x00000000, 0x06060000, 0xf9fa0000,
+        0x07fe0000, 0xf8020000, 0xfe080000, 0x01f80000, 0x0d0d0000, 0xf2f30000, 0x0f040000, 0xf0fc0000,
+        0x040f0000, 0x00000606, 0x06060606, 0xf9fa0606, 0x07fe0606, 0xf8020606, 0xfe080606, 0x01f80606,
+        0x0d0d0606, 0xf2f30606, 0x0f040606, 0xf0fc0606, 0x040f0606, 0xfffff9fa, 0x0605f9fa, 0xf9f9f9fa,
+        0x07fdf9fa, 0xf801f9fa, 0xfe07f9fa, 0x01f7f9fa, 0x0d0cf9fa, 0xf2f2f9fa, 0x0f03f9fa, 0xf0fbf9fa,
+        0x040ef9fa, 0x000007fe, 0x060607fe, 0xf9fa07fe, 0x07fe07fe, 0xf80207fe, 0xfe0807fe, 0x01f807fe,
+        0x0d0d07fe, 0xf2f307fe, 0x0f0407fe, 0xf0fc07fe, 0x040f07fe, 0xfffff802, 0x0605f802, 0xf9f9f802,
+        0x07fdf802, 0xf801f802, 0xfe07f802, 0x01f7f802, 0x0d0cf802, 0xf2f2f802, 0x0f03f802, 0xf0fbf802,
+        0x040ef802, 0xfffffe08, 0x0605fe08, 0xf9f9fe08, 0x07fdfe08, 0xf801fe08, 0xfe07fe08, 0x01f7fe08,
+        0x0d0cfe08, 0xf2f2fe08, 0x0f03fe08, 0xf0fbfe08, 0x040efe08, 0x000001f8, 0x060601f8, 0xf9fa01f8,
+        0x07fe01f8, 0xf80201f8, 0xfe0801f8, 0x01f801f8, 0x0d0d01f8, 0xf2f301f8, 0x0f0401f8, 0xf0fc01f8,
+        0x040f01f8, 0x00000d0d, 0x06060d0d, 0xf9fa0d0d, 0x07fe0d0d, 0xf8020d0d, 0xfe080d0d, 0x01f80d0d,
+        0x0d0d0d0d, 0xf2f30d0d, 0x0f040d0d, 0xf0fc0d0d, 0x040f0d0d, 0xfffff2f3, 0x0605f2f3, 0xf9f9f2f3,
+        0x07fdf2f3, 0xf801f2f3, 0xfe07f2f3, 0x01f7f2f3, 0x0d0cf2f3, 0xf2f2f2f3, 0x0f03f2f3, 0xf0fbf2f3,
+        0x040ef2f3, 0x00000f04, 0x06060f04, 0xf9fa0f04, 0x07fe0f04, 0xf8020f04, 0xfe080f04, 0x01f80f04,
+        0x0d0d0f04, 0xf2f30f04, 0x0f040f04, 0xf0fc0f04, 0x040f0f04, 0xfffff0fc, 0x0605f0fc, 0xf9f9f0fc,
+        0x07fdf0fc, 0xf801f0fc, 0xfe07f0fc, 0x01f7f0fc, 0x0d0cf0fc, 0xf2f2f0fc, 0x0f03f0fc, 0xf0fbf0fc,
+        0x040ef0fc, 0x0000040f, 0x0606040f, 0xf9fa040f, 0x07fe040f, 0xf802040f, 0xfe08040f, 0x01f8040f,
+        0x0d0d040f, 0xf2f3040f, 0x0f04040f, 0xf0fc040f, 0x040f040f, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000707, 0xfffff8f9, 0x000009fd, 0xfffff603, 0xfffffd0a, 0x000002f6, 0x00001010,
+        0xffffeff0, 0x00001205, 0xffffedfb, 0x00000512, 0xfffffaee, 0x00000cf3, 0xfffff30d, 0x000014fa,
+        0xffffeb06, 0xfffffa15, 0x000005eb, 0x00001e0f, 0xffffe1f1, 0x00000f1e, 0xfffff0e2, 0x00001e1e,
+        0xffffe1e2, 0x00002202, 0xffffddfe, 0x00000222, 0xfffffdde, 0x00001bed, 0xffffe413, 0xffffed1c,
+        0x000012e4, 0x00003620, 0xffffc9e0, 0x00002036, 0xffffdfca, 0x000028f5, 0xffffd70b, 0xfffff529,
+        0x00000ad7, 0x0000370f, 0xffffc8f1, 0x00000f37, 0xfffff0c9, 0x00003939, 0xffffc6c7, 0x00003eff,
+        0xffffc101, 0xffffff3f, 0x000000c1, 0x000027d8, 0xffffd828, 0x000036e2, 0xffffc91e, 0xffffe237,
+        0x00001dc9, 0x00005e25, 0xffffa1db, 0x0000255e, 0xffffdaa2, 0x00006041, 0xffff9fbf, 0x00004160,
+        0xffffbea0, 0x00004deb, 0xffffb215, 0xffffeb4e, 0x000014b2, 0x0000640f, 0xffff9bf1, 0x00000f64,
+        0xfffff09c, 0x00006a6a, 0xffff9596, 0x000073f8, 0xffff8c08, 0xfffff874, 0x0000078c, 0x00004ec1,
+        0xffffb13f, 0xffffc14f, 0x00003eb1, 0x000068cd, 0xffff9733, 0xffffcd69, 0x00003297, 0x00007788,
+        0xffff8878, 0x00002b2b, 0xffffd4d5, 0x00005050, 0xffffafb0, 0x00000000, 0x07070000, 0xf8f90000,
+        0x09fd0000, 0xf6030000, 0xfd0a0000, 0x02f60000, 0x10100000, 0xeff00000, 0x12050000, 0xedfb0000,
+        0x05120000, 0x00000707, 0x07070707, 0xf8f90707, 0x09fd0707, 0xf6030707, 0xfd0a0707, 0x02f60707,
+        0x10100707, 0xeff00707, 0x12050707, 0xedfb0707, 0x05120707, 0xfffff8f9, 0x0706f8f9, 0xf8f8f8f9,
+        0x09fcf8f9, 0xf602f8f9, 0xfd09f8f9, 0x02f5f8f9, 0x100ff8f9, 0xefeff8f9, 0x1204f8f9, 0xedfaf8f9,
+        0x0511f8f9, 0x000009fd, 0x070709fd, 0xf8f909fd, 0x09fd09fd, 0xf60309fd, 0xfd0a09fd, 0x02f609fd,
+        0x101009fd, 0xeff009fd, 0x120509fd, 0xedfb09fd, 0x051209fd, 0xfffff603, 0x0706f603, 0xf8f8f603,
+        0x09fcf603, 0xf602f603, 0xfd09f603, 0x02f5f603, 0x100ff603, 0xefeff603, 0x1204f603, 0xedfaf603,
+        0x0511f603, 0xfffffd0a, 0x0706fd0a, 0xf8f8fd0a, 0x09fcfd0a, 0xf602fd0a, 0xfd09fd0a, 0x02f5fd0a,
+        0x100ffd0a, 0xefeffd0a, 0x1204fd0a, 0xedfafd0a, 0x0511fd0a, 0x000002f6, 0x070702f6, 0xf8f902f6,
+        0x09fd02f6, 0xf60302f6, 0xfd0a02f6, 0x02f602f6, 0x101002f6, 0xeff002f6, 0x120502f6, 0xedfb02f6,
+        0x051202f6, 0x00001010, 0x07071010, 0xf8f91010, 0x09fd1010, 0xf6031010, 0xfd0a1010, 0x02f61010,
+        0x10101010, 0xeff01010, 0x12051010, 0xedfb1010, 0x05121010, 0xffffeff0, 0x0706eff0, 0xf8f8eff0,
+        0x09fceff0, 0xf602eff0, 0xfd09eff0, 0x02f5eff0, 0x100feff0, 0xefefeff0, 0x1204eff0, 0xedfaeff0,
+        0x0511eff0, 0x00001205, 0x07071205, 0xf8f91205, 0x09fd1205, 0xf6031205, 0xfd0a1205, 0x02f61205,
+        0x10101205, 0xeff01205, 0x12051205, 0xedfb1205, 0x05121205, 0xffffedfb, 0x0706edfb, 0xf8f8edfb,
+        0x09fcedfb, 0xf602edfb, 0xfd09edfb, 0x02f5edfb, 0x100fedfb, 0xefefedfb, 0x1204edfb, 0xedfaedfb,
+        0x0511edfb, 0x00000512, 0x07070512, 0xf8f90512, 0x09fd0512, 0xf6030512, 0xfd0a0512, 0x02f60512,
+        0x10100512, 0xeff00512, 0x12050512, 0xedfb0512, 0x05120512, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000808, 0xfffff7f8, 0x00000afd, 0xfffff503, 0xfffffd0b, 0x000002f5, 0x00001212,
+        0xffffedee, 0x00001405, 0xffffebfb, 0x00000514, 0xfffffaec, 0x00000ef1, 0xfffff10f, 0x000017f9,
+        0xffffe807, 0xfffff918, 0x000006e8, 0x00002311, 0xffffdcef, 0x00001123, 0xffffeedd, 0x00002222,
+        0xffffddde, 0x00002603, 0xffffd9fd, 0x00000326, 0xfffffcda, 0x00001fea, 0xffffe016, 0xffffea20,
+        0x000015e0, 0x00003d25, 0xffffc2db, 0x0000253d, 0xffffdac3, 0x00002ef3, 0xffffd10d, 0xfffff32f,
+        0x00000cd1, 0x00003f11, 0xffffc0ef, 0x0000113f, 0xffffeec1, 0x00004141, 0xffffbebf, 0x000047ff,
+        0xffffb801, 0xffffff48, 0x000000b8, 0x00002dd2, 0xffffd22e, 0x00003edd, 0xffffc123, 0xffffdd3f,
+        0x000022c1, 0x00006b2b, 0xffff94d5, 0x00002b6b, 0xffffd495, 0x00006e4b, 0xffff91b5, 0x00004b6e,
+        0xffffb492, 0x000058e8, 0xffffa718, 0xffffe859, 0x000017a7, 0x00007211, 0xffff8def, 0x00001172,
+        0xffffee8e, 0x00007979, 0xffff8687, 0x00005ab8, 0xffffa548, 0xffffb85b, 0x000047a5, 0x000077c6,
+        0xffff883a, 0xffffc678, 0x00003988, 0x00003131, 0xffffcecf, 0x00005c5c, 0xffffa3a4, 0x00000000,
+        0x08080000, 0xf7f80000, 0x0afd0000, 0xf5030000, 0xfd0b0000, 0x02f50000, 0x12120000, 0xedee0000,
+        0x14050000, 0xebfb0000, 0x05140000, 0x00000808, 0x08080808, 0xf7f80808, 0x0afd0808, 0xf5030808,
+        0xfd0b0808, 0x02f50808, 0x12120808, 0xedee0808, 0x14050808, 0xebfb0808, 0x05140808, 0xfffff7f8,
+        0x0807f7f8, 0xf7f7f7f8, 0x0afcf7f8, 0xf502f7f8, 0xfd0af7f8, 0x02f4f7f8, 0x1211f7f8, 0xededf7f8,
+        0x1404f7f8, 0xebfaf7f8, 0x0513f7f8, 0x00000afd, 0x08080afd, 0xf7f80afd, 0x0afd0afd, 0xf5030afd,
+        0xfd0b0afd, 0x02f50afd, 0x12120afd, 0xedee0afd, 0x14050afd, 0xebfb0afd, 0x05140afd, 0xfffff503,
+        0x0807f503, 0xf7f7f503, 0x0afcf503, 0xf502f503, 0xfd0af503, 0x02f4f503, 0x1211f503, 0xededf503,
+        0x1404f503, 0xebfaf503, 0x0513f503, 0xfffffd0b, 0x0807fd0b, 0xf7f7fd0b, 0x0afcfd0b, 0xf502fd0b,
+        0xfd0afd0b, 0x02f4fd0b, 0x1211fd0b, 0xededfd0b, 0x1404fd0b, 0xebfafd0b, 0x0513fd0b, 0x000002f5,
+        0x080802f5, 0xf7f802f5, 0x0afd02f5, 0xf50302f5, 0xfd0b02f5, 0x02f502f5, 0x121202f5, 0xedee02f5,
+        0x140502f5, 0xebfb02f5, 0x051402f5, 0x00001212, 0x08081212, 0xf7f81212, 0x0afd1212, 0xf5031212,
+        0xfd0b1212, 0x02f51212, 0x12121212, 0xedee1212, 0x14051212, 0xebfb1212, 0x05141212, 0xffffedee,
+        0x0807edee, 0xf7f7edee, 0x0afcedee, 0xf502edee, 0xfd0aedee, 0x02f4edee, 0x1211edee, 0xedededee,
+        0x1404edee, 0xebfaedee, 0x0513edee, 0x00001405, 0x08081405, 0xf7f81405, 0x0afd1405, 0xf5031405,
+        0xfd0b1405, 0x02f51405, 0x12121405, 0xedee1405, 0x14051405, 0xebfb1405, 0x05141405, 0xffffebfb,
+        0x0807ebfb, 0xf7f7ebfb, 0x0afcebfb, 0xf502ebfb, 0xfd0aebfb, 0x02f4ebfb, 0x1211ebfb, 0xededebfb,
+        0x1404ebfb, 0xebfaebfb, 0x0513ebfb, 0x00000514, 0x08080514, 0xf7f80514, 0x0afd0514, 0xf5030514,
+        0xfd0b0514, 0x02f50514, 0x12120514, 0xedee0514, 0x14050514, 0xebfb0514, 0x05140514, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000909, 0xfffff6f7, 0x00000bfd, 0xfffff403, 0xfffffd0c, 0x000002f4, 0x00001414,
+        0xffffebec, 0x00001706, 0xffffe8fa, 0x00000617, 0xfffff9e9, 0x000010ef, 0xffffef11, 0x00001af9,
+        0xffffe507, 0xfffff91b, 0x000006e5, 0x00002713, 0xffffd8ed, 0x00001327, 0xffffecd9, 0x00002727,
+        0xffffd8d9, 0x00002b03, 0xffffd4fd, 0x0000032b, 0xfffffcd5, 0x000023e8, 0xffffdc18, 0xffffe824,
+        0x000017dc, 0x0000452a, 0xffffbad6, 0x00002a45, 0xffffd5bb, 0x000034f2, 0xffffcb0e, 0xfffff235,
+        0x00000dcb, 0x00004713, 0xffffb8ed, 0x00001347, 0xffffecb9, 0x00004949, 0xffffb6b7, 0x00004ffe,
+        0xffffb002, 0xfffffe50, 0x000001b0, 0x000033cc, 0xffffcc34, 0x000045d9, 0xffffba27, 0xffffd946,
+        0x000026ba, 0x00007930, 0xffff86d0, 0x00003079, 0xffffcf87, 0x00007c54, 0xffff83ac, 0x0000547c,
+        0xffffab84, 0x000063e5, 0xffff9c1b, 0xffffe564, 0x00001a9c, 0x000065af, 0xffff9a51, 0xffffaf66,
+        0x0000509a, 0x00003737, 0xffffc8c9, 0x00006868, 0xffff9798, 0x00000000, 0x09090000, 0xf6f70000,
+        0x0bfd0000, 0xf4030000, 0xfd0c0000, 0x02f40000, 0x14140000, 0xebec0000, 0x17060000, 0xe8fa0000,
+        0x06170000, 0xf9e90000, 0x00000909, 0x09090909, 0xf6f70909, 0x0bfd0909, 0xf4030909, 0xfd0c0909,
+        0x02f40909, 0x14140909, 0xebec0909, 0x17060909, 0xe8fa0909, 0x06170909, 0xf9e90909, 0xfffff6f7,
+        0x0908f6f7, 0xf6f6f6f7, 0x0bfcf6f7, 0xf402f6f7, 0xfd0bf6f7, 0x02f3f6f7, 0x1413f6f7, 0xebebf6f7,
+        0x1705f6f7, 0xe8f9f6f7, 0x0616f6f7, 0xf9e8f6f7, 0x00000bfd, 0x09090bfd, 0xf6f70bfd, 0x0bfd0bfd,
+        0xf4030bfd, 0xfd0c0bfd, 0x02f40bfd, 0x14140bfd, 0xebec0bfd, 0x17060bfd, 0xe8fa0bfd, 0x06170bfd,
+        0xf9e90bfd, 0xfffff403, 0x0908f403, 0xf6f6f403, 0x0bfcf403, 0xf402f403, 0xfd0bf403, 0x02f3f403,
+        0x1413f403, 0xebebf403, 0x1705f403, 0xe8f9f403, 0x0616f403, 0xf9e8f403, 0xfffffd0c, 0x0908fd0c,
+        0xf6f6fd0c, 0x0bfcfd0c, 0xf402fd0c, 0xfd0bfd0c, 0x02f3fd0c, 0x1413fd0c, 0xebebfd0c, 0x1705fd0c,
+        0xe8f9fd0c, 0x0616fd0c, 0xf9e8fd0c, 0x000002f4, 0x090902f4, 0xf6f702f4, 0x0bfd02f4, 0xf40302f4,
+        0xfd0c02f4, 0x02f402f4, 0x141402f4, 0xebec02f4, 0x170602f4, 0xe8fa02f4, 0x061702f4, 0xf9e902f4,
+        0x00001414, 0x09091414, 0xf6f71414, 0x0bfd1414, 0xf4031414, 0xfd0c1414, 0x02f41414, 0x14141414,
+        0xebec1414, 0x17061414, 0xe8fa1414, 0x06171414, 0xf9e91414, 0xffffebec, 0x0908ebec, 0xf6f6ebec,
+        0x0bfcebec, 0xf402ebec, 0xfd0bebec, 0x02f3ebec, 0x1413ebec, 0xebebebec, 0x1705ebec, 0xe8f9ebec,
+        0x0616ebec, 0xf9e8ebec, 0x00001706, 0x09091706, 0xf6f71706, 0x0bfd1706, 0xf4031706, 0xfd0c1706,
+        0x02f41706, 0x14141706, 0xebec1706, 0x17061706, 0xe8fa1706, 0x06171706, 0xf9e91706, 0xffffe8fa,
+        0x0908e8fa, 0xf6f6e8fa, 0x0bfce8fa, 0xf402e8fa, 0xfd0be8fa, 0x02f3e8fa, 0x1413e8fa, 0xebebe8fa,
+        0x1705e8fa, 0xe8f9e8fa, 0x0616e8fa, 0xf9e8e8fa, 0x00000617, 0x09090617, 0xf6f70617, 0x0bfd0617,
+        0xf4030617, 0xfd0c0617, 0x02f40617, 0x14140617, 0xebec0617, 0x17060617, 0xe8fa0617, 0x06170617,
+        0xf9e90617, 0xfffff9e9, 0x0908f9e9, 0xf6f6f9e9, 0x0bfcf9e9, 0xf402f9e9, 0xfd0bf9e9, 0x02f3f9e9,
+        0x1413f9e9, 0xebebf9e9, 0x1705f9e9, 0xe8f9f9e9, 0x0616f9e9, 0xf9e8f9e9, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000202, 0xfffffdfe, 0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000404,
+        0xfffffbfc, 0x00000400, 0xfffffc00, 0x00000004, 0xfffffffc, 0x000003fc, 0xfffffc04, 0x000005fe,
+        0xfffffa02, 0xfffffe06, 0x000001fa, 0x00000804, 0xfffff7fc, 0x00000408, 0xfffffbf8, 0x00000808,
+        0xfffff7f8, 0x00000a00, 0xfffff600, 0x0000000a, 0xfffffff6, 0x000007fc, 0xfffff804, 0xfffffc08,
+        0x000003f8, 0x00000e08, 0xfffff1f8, 0x0000080e, 0xfffff7f2, 0x00000bfe, 0xfffff402, 0xfffffe0c,
+        0x000001f4, 0x00001004, 0xffffeffc, 0x00000410, 0xfffffbf0, 0x00001010, 0xffffeff0, 0x00001200,
+        0xffffee00, 0x00000012, 0xffffffee, 0x00000bf4, 0xfffff40c, 0x00000ff8, 0xfffff008, 0xfffff810,
+        0x000007f0, 0x00001a0a, 0xffffe5f6, 0x00000a1a, 0xfffff5e6, 0x00001c12, 0xffffe3ee, 0x0000121c,
+        0xffffede4, 0x000015fa, 0xffffea06, 0xfffffa16, 0x000005ea, 0x00001c04, 0xffffe3fc, 0x0000041c,
+        0xfffffbe4, 0x00001e1e, 0xffffe1e2, 0x00001ffe, 0xffffe002, 0xfffffe20, 0x000001e0, 0x000015ee,
+        0xffffea12, 0xffffee16, 0x000011ea, 0x00001df2, 0xffffe20e, 0xfffff21e, 0x00000de2, 0x00002e16,
+        0xffffd1ea, 0x0000162e, 0xffffe9d2, 0x00002e0c, 0xffffd1f4, 0x00000c2e, 0xfffff3d2, 0x00003022,
+        0xffffcfde, 0x00002230, 0xffffddd0, 0x000027f6, 0xffffd80a, 0xfffff628, 0x000009d8, 0x00003204,
+        0xffffcdfc, 0x00000432, 0xfffffbce, 0x00003636, 0xffffc9ca, 0x000021de, 0xffffde22, 0x000029e4,
+        0xffffd61c, 0xffffe42a, 0x00001bd6, 0x00003bfa, 0xffffc406, 0xfffffa3c, 0x000005c4, 0x00004c1a,
+        0xffffb3e6, 0x00001a4c, 0xffffe5b4, 0x00004c2a, 0xffffb3d6, 0x00002a4c, 0xffffd5b4, 0x000035e8,
+        0xffffca18, 0xffffe836, 0x000017ca, 0x00004e0e, 0xffffb1f2, 0x00000e4e, 0xfffff1b2, 0x0000523e,
+        0xffffadc2, 0x00003e52, 0xffffc1ae, 0x000049ec, 0xffffb614, 0xffffec4a, 0x000013b6, 0x00005802,
+        0xffffa7fe, 0x00000258, 0xfffffda8, 0x00005c5c, 0xffffa3a4, 0x00003bcc, 0xffffc434, 0xffffcc3c,
+        0x000033c4, 0x00007634, 0xffff89cc, 0x00003476, 0xffffcb8a, 0x000049d4, 0xffffb62c, 0xffffd44a,
+        0x00002bb6, 0x0000764a, 0xffff89b6, 0x00004a76, 0xffffb58a, 0x00007620, 0xffff89e0, 0x00002076,
+        0xffffdf8a, 0x000065f4, 0xffff9a0c, 0xfffff466, 0x00000b9a, 0x00005fd8, 0xffffa028, 0xffffd860,
+        0x000027a0, 0x000075de, 0xffff8a22, 0xffffde76, 0x0000218a, 0x000057a8, 0xffffa858, 0x000067b2,
+        0xffff984e, 0xffffb268, 0x00004d98, 0x00000c0c, 0xfffff3f4, 0x00001616, 0xffffe9ea, 0x00002a2a,
+        0xffffd5d6, 0x00004848, 0xffffb7b8, 0x00000000, 0x02020000, 0xfdfe0000, 0x02000000, 0xfe000000,
+        0x00020000, 0xfffe0000, 0x00000202, 0x02020202, 0xfdfe0202, 0x02000202, 0xfe000202, 0x00020202,
+        0xfffe0202, 0xfffffdfe, 0x0201fdfe, 0xfdfdfdfe, 0x01fffdfe, 0xfdfffdfe, 0x0001fdfe, 0xfffdfdfe,
+        0x00000200, 0x02020200, 0xfdfe0200, 0x02000200, 0xfe000200, 0x00020200, 0xfffe0200, 0xfffffe00,
+        0x0201fe00, 0xfdfdfe00, 0x01fffe00, 0xfdfffe00, 0x0001fe00, 0xfffdfe00, 0x00000002, 0x02020002,
+        0xfdfe0002, 0x02000002, 0xfe000002, 0x00020002, 0xfffe0002, 0xfffffffe, 0x0201fffe, 0xfdfdfffe,
+        0x01fffffe, 0xfdfffffe, 0x0001fffe, 0xfffdfffe, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000303, 0xfffffcfd, 0x00000300, 0xfffffd00, 0x00000003, 0xfffffffd, 0x00000606,
+        0xfffff9fa, 0x00000903, 0xfffff6fd, 0x00000309, 0xfffffcf7, 0x000008fd, 0xfffff703, 0xfffffd09,
+        0x000002f7, 0x000005fa, 0xfffffa06, 0x00000c06, 0xfffff3fa, 0x0000060c, 0xfffff9f4, 0x00000c0c,
+        0xfffff3f4, 0x00000f00, 0xfffff100, 0x0000000f, 0xfffffff1, 0x00000bf7, 0xfffff409, 0xfffff70c,
+        0x000008f4, 0x0000180f, 0xffffe7f1, 0x00000f18, 0xfffff0e8, 0x000011fa, 0xffffee06, 0xfffffa12,
+        0x000005ee, 0x00001806, 0xffffe7fa, 0x00000618, 0xfffff9e8, 0x00001818, 0xffffe7e8, 0x00001b00,
+        0xffffe500, 0x0000001b, 0xffffffe5, 0x000011ee, 0xffffee12, 0x000017f4, 0xffffe80c, 0xfffff418,
+        0x00000be8, 0x0000270f, 0xffffd8f1, 0x00000f27, 0xfffff0d9, 0x00002a1b, 0xffffd5e5, 0x00001b2a,
+        0xffffe4d6, 0x000020f7, 0xffffdf09, 0xfffff721, 0x000008df, 0x00002a06, 0xffffd5fa, 0x0000062a,
+        0xfffff9d6, 0x00002d2d, 0xffffd2d3, 0x000032fd, 0xffffcd03, 0xfffffd33, 0x000002cd, 0x000020e5,
+        0xffffdf1b, 0xffffe521, 0x00001adf, 0x00002ceb, 0xffffd315, 0xffffeb2d, 0x000014d3, 0x00004521,
+        0xffffbadf, 0x00002145, 0xffffdebb, 0x00004512, 0xffffbaee, 0x00001245, 0xffffedbb, 0x00004836,
+        0xffffb7ca, 0x00003648, 0xffffc9b8, 0x00003eee, 0xffffc112, 0xffffee3f, 0x000011c1, 0x00004e06,
+        0xffffb1fa, 0x0000064e, 0xfffff9b2, 0x00005151, 0xffffaeaf, 0x000032cd, 0xffffcd33, 0x00003ed6,
+        0xffffc12a, 0xffffd63f, 0x000029c1, 0x000059f7, 0xffffa609, 0xfffff75a, 0x000008a6, 0x0000722a,
+        0xffff8dd6, 0x00002a72, 0xffffd58e, 0x0000753f, 0xffff8ac1, 0x00003f75, 0xffffc08b, 0x000050dc,
+        0xffffaf24, 0xffffdc51, 0x000023af, 0x00007815, 0xffff87eb, 0x00001578, 0xffffea88, 0x00007b60,
+        0xffff84a0, 0x0000607b, 0xffff9f85, 0x00006ee2, 0xffff911e, 0xffffe26f, 0x00001d91, 0x00005cb2,
+        0xffffa34e, 0xffffb25d, 0x00004da3, 0x000071bb, 0xffff8e45, 0xffffbb72, 0x0000448e, 0x00001212,
+        0xffffedee, 0x00002121, 0xffffdedf, 0x00003f3f, 0xffffc0c1, 0x00006c6c, 0xffff9394, 0x00000000,
+        0x03030000, 0xfcfd0000, 0x03000000, 0xfd000000, 0x00030000, 0xfffd0000, 0x06060000, 0xf9fa0000,
+        0x00000303, 0x03030303, 0xfcfd0303, 0x03000303, 0xfd000303, 0x00030303, 0xfffd0303, 0x06060303,
+        0xf9fa0303, 0xfffffcfd, 0x0302fcfd, 0xfcfcfcfd, 0x02fffcfd, 0xfcfffcfd, 0x0002fcfd, 0xfffcfcfd,
+        0x0605fcfd, 0xf9f9fcfd, 0x00000300, 0x03030300, 0xfcfd0300, 0x03000300, 0xfd000300, 0x00030300,
+        0xfffd0300, 0x06060300, 0xf9fa0300, 0xfffffd00, 0x0302fd00, 0xfcfcfd00, 0x02fffd00, 0xfcfffd00,
+        0x0002fd00, 0xfffcfd00, 0x0605fd00, 0xf9f9fd00, 0x00000003, 0x03030003, 0xfcfd0003, 0x03000003,
+        0xfd000003, 0x00030003, 0xfffd0003, 0x06060003, 0xf9fa0003, 0xfffffffd, 0x0302fffd, 0xfcfcfffd,
+        0x02fffffd, 0xfcfffffd, 0x0002fffd, 0xfffcfffd, 0x0605fffd, 0xf9f9fffd, 0x00000606, 0x03030606,
+        0xfcfd0606, 0x03000606, 0xfd000606, 0x00030606, 0xfffd0606, 0x06060606, 0xf9fa0606, 0xfffff9fa,
+        0x0302f9fa, 0xfcfcf9fa, 0x02fff9fa, 0xfcfff9fa, 0x0002f9fa, 0xfffcf9fa, 0x0605f9fa, 0xf9f9f9fa,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000404, 0xfffffbfc, 0x00000400, 0xfffffc00, 0x00000004, 0xfffffffc, 0x00000804,
+        0xfffff7fc, 0x00000408, 0xfffffbf8, 0x00000808, 0xfffff7f8, 0x000007f8, 0xfffff808, 0x00000bfc,
+        0xfffff404, 0xfffffc0c, 0x000003f4, 0x00001008, 0xffffeff8, 0x00000810, 0xfffff7f0, 0x00001010,
+        0xffffeff0, 0x00001400, 0xffffec00, 0x00000014, 0xffffffec, 0x00000ff4, 0xfffff00c, 0xfffff410,
+        0x00000bf0, 0x000017fc, 0xffffe804, 0xfffffc18, 0x000003e8, 0x00002010, 0xffffdff0, 0x00001020,
+        0xffffefe0, 0x00002008, 0xffffdff8, 0x00000820, 0xfffff7e0, 0x00002020, 0xffffdfe0, 0x00002400,
+        0xffffdc00, 0x00000024, 0xffffffdc, 0x000017e8, 0xffffe818, 0x00001ff0, 0xffffe010, 0xfffff020,
+        0x00000fe0, 0x00003414, 0xffffcbec, 0x00001434, 0xffffebcc, 0x00003824, 0xffffc7dc, 0x00002438,
+        0xffffdbc8, 0x00002bf4, 0xffffd40c, 0xfffff42c, 0x00000bd4, 0x00003808, 0xffffc7f8, 0x00000838,
+        0xfffff7c8, 0x00003c3c, 0xffffc3c4, 0x00003ffc, 0xffffc004, 0xfffffc40, 0x000003c0, 0x00002bdc,
+        0xffffd424, 0xffffdc2c, 0x000023d4, 0x00003be4, 0xffffc41c, 0xffffe43c, 0x00001bc4, 0x00005c2c,
+        0xffffa3d4, 0x00002c5c, 0xffffd3a4, 0x00005c18, 0xffffa3e8, 0x0000185c, 0xffffe7a4, 0x00006048,
+        0xffff9fb8, 0x00004860, 0xffffb7a0, 0x000053ec, 0xffffac14, 0xffffec54, 0x000013ac, 0x00006408,
+        0xffff9bf8, 0x00000864, 0xfffff79c, 0x00006c6c, 0xffff9394, 0x000043bc, 0xffffbc44, 0x000053c8,
+        0xffffac38, 0xffffc854, 0x000037ac, 0x000077f4, 0xffff880c, 0xfffff478, 0x00000b88, 0x00006bd0,
+        0xffff9430, 0xffffd06c, 0x00002f94, 0x00007b98, 0xffff8468, 0xffff987c, 0x00006784, 0x00001818,
+        0xffffe7e8, 0x00002c2c, 0xffffd3d4, 0x00005454, 0xffffabac, 0x00000000, 0x04040000, 0xfbfc0000,
+        0x04000000, 0xfc000000, 0x00040000, 0xfffc0000, 0x08040000, 0xf7fc0000, 0x04080000, 0x00000404,
+        0x04040404, 0xfbfc0404, 0x04000404, 0xfc000404, 0x00040404, 0xfffc0404, 0x08040404, 0xf7fc0404,
+        0x04080404, 0xfffffbfc, 0x0403fbfc, 0xfbfbfbfc, 0x03fffbfc, 0xfbfffbfc, 0x0003fbfc, 0xfffbfbfc,
+        0x0803fbfc, 0xf7fbfbfc, 0x0407fbfc, 0x00000400, 0x04040400, 0xfbfc0400, 0x04000400, 0xfc000400,
+        0x00040400, 0xfffc0400, 0x08040400, 0xf7fc0400, 0x04080400, 0xfffffc00, 0x0403fc00, 0xfbfbfc00,
+        0x03fffc00, 0xfbfffc00, 0x0003fc00, 0xfffbfc00, 0x0803fc00, 0xf7fbfc00, 0x0407fc00, 0x00000004,
+        0x04040004, 0xfbfc0004, 0x04000004, 0xfc000004, 0x00040004, 0xfffc0004, 0x08040004, 0xf7fc0004,
+        0x04080004, 0xfffffffc, 0x0403fffc, 0xfbfbfffc, 0x03fffffc, 0xfbfffffc, 0x0003fffc, 0xfffbfffc,
+        0x0803fffc, 0xf7fbfffc, 0x0407fffc, 0x00000804, 0x04040804, 0xfbfc0804, 0x04000804, 0xfc000804,
+        0x00040804, 0xfffc0804, 0x08040804, 0xf7fc0804, 0x04080804, 0xfffff7fc, 0x0403f7fc, 0xfbfbf7fc,
+        0x03fff7fc, 0xfbfff7fc, 0x0003f7fc, 0xfffbf7fc, 0x0803f7fc, 0xf7fbf7fc, 0x0407f7fc, 0x00000408,
+        0x04040408, 0xfbfc0408, 0x04000408, 0xfc000408, 0x00040408, 0xfffc0408, 0x08040408, 0xf7fc0408,
+        0x04080408, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000505, 0xfffffafb, 0x00000500, 0xfffffb00, 0x00000005, 0xfffffffb, 0x00000a0a,
+        0xfffff5f6, 0x00000f05, 0xfffff0fb, 0x0000050f, 0xfffffaf1, 0x000009f6, 0xfffff60a, 0x00000efb,
+        0xfffff105, 0xfffffb0f, 0x000004f1, 0x0000140a, 0xffffebf6, 0x00000a14, 0xfffff5ec, 0x00001414,
+        0xffffebec, 0x00001900, 0xffffe700, 0x00000019, 0xffffffe7, 0x000013f1, 0xffffec0f, 0xfffff114,
+        0x00000eec, 0x00002819, 0xffffd7e7, 0x00001928, 0xffffe6d8, 0x00001df6, 0xffffe20a, 0xfffff61e,
+        0x000009e2, 0x0000280a, 0xffffd7f6, 0x00000a28, 0xfffff5d8, 0x00002828, 0xffffd7d8, 0x00002d00,
+        0xffffd300, 0x0000002d, 0xffffffd3, 0x00001de2, 0xffffe21e, 0x000027ec, 0xffffd814, 0xffffec28,
+        0x000013d8, 0x00004119, 0xffffbee7, 0x00001941, 0xffffe6bf, 0x0000462d, 0xffffb9d3, 0x00002d46,
+        0xffffd2ba, 0x000036f1, 0xffffc90f, 0xfffff137, 0x00000ec9, 0x0000460a, 0xffffb9f6, 0x00000a46,
+        0xfffff5ba, 0x00004b4b, 0xffffb4b5, 0x000054fb, 0xffffab05, 0xfffffb55, 0x000004ab, 0x000036d3,
+        0xffffc92d, 0xffffd337, 0x00002cc9, 0x00004add, 0xffffb523, 0xffffdd4b, 0x000022b5, 0x00007337,
+        0xffff8cc9, 0x00003773, 0xffffc88d, 0x0000731e, 0xffff8ce2, 0x00001e73, 0xffffe18d, 0x0000785a,
+        0xffff87a6, 0x00005a78, 0xffffa588, 0x000068e2, 0xffff971e, 0xffffe269, 0x00001d97, 0x000054ab,
+        0xffffab55, 0x000068ba, 0xffff9746, 0xffffba69, 0x00004597, 0x00001e1e, 0xffffe1e2, 0x00003c3c,
+        0xffffc3c4, 0x00006969, 0xffff9697, 0x00000000, 0x05050000, 0xfafb0000, 0x05000000, 0xfb000000,
+        0x00050000, 0xfffb0000, 0x0a0a0000, 0xf5f60000, 0x0f050000, 0xf0fb0000, 0x00000505, 0x05050505,
+        0xfafb0505, 0x05000505, 0xfb000505, 0x00050505, 0xfffb0505, 0x0a0a0505, 0xf5f60505, 0x0f050505,
+        0xf0fb0505, 0xfffffafb, 0x0504fafb, 0xfafafafb, 0x04fffafb, 0xfafffafb, 0x0004fafb, 0xfffafafb,
+        0x0a09fafb, 0xf5f5fafb, 0x0f04fafb, 0xf0fafafb, 0x00000500, 0x05050500, 0xfafb0500, 0x05000500,
+        0xfb000500, 0x00050500, 0xfffb0500, 0x0a0a0500, 0xf5f60500, 0x0f050500, 0xf0fb0500, 0xfffffb00,
+        0x0504fb00, 0xfafafb00, 0x04fffb00, 0xfafffb00, 0x0004fb00, 0xfffafb00, 0x0a09fb00, 0xf5f5fb00,
+        0x0f04fb00, 0xf0fafb00, 0x00000005, 0x05050005, 0xfafb0005, 0x05000005, 0xfb000005, 0x00050005,
+        0xfffb0005, 0x0a0a0005, 0xf5f60005, 0x0f050005, 0xf0fb0005, 0xfffffffb, 0x0504fffb, 0xfafafffb,
+        0x04fffffb, 0xfafffffb, 0x0004fffb, 0xfffafffb, 0x0a09fffb, 0xf5f5fffb, 0x0f04fffb, 0xf0fafffb,
+        0x00000a0a, 0x05050a0a, 0xfafb0a0a, 0x05000a0a, 0xfb000a0a, 0x00050a0a, 0xfffb0a0a, 0x0a0a0a0a,
+        0xf5f60a0a, 0x0f050a0a, 0xf0fb0a0a, 0xfffff5f6, 0x0504f5f6, 0xfafaf5f6, 0x04fff5f6, 0xfafff5f6,
+        0x0004f5f6, 0xfffaf5f6, 0x0a09f5f6, 0xf5f5f5f6, 0x0f04f5f6, 0xf0faf5f6, 0x00000f05, 0x05050f05,
+        0xfafb0f05, 0x05000f05, 0xfb000f05, 0x00050f05, 0xfffb0f05, 0x0a0a0f05, 0xf5f60f05, 0x0f050f05,
+        0xf0fb0f05, 0xfffff0fb, 0x0504f0fb, 0xfafaf0fb, 0x04fff0fb, 0xfafff0fb, 0x0004f0fb, 0xfffaf0fb,
+        0x0a09f0fb, 0xf5f5f0fb, 0x0f04f0fb, 0xf0faf0fb, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000606, 0xfffff9fa, 0x00000600, 0xfffffa00, 0x00000006, 0xfffffffa, 0x00000c0c,
+        0xfffff3f4, 0x00000c06, 0xfffff3fa, 0x0000060c, 0xfffff9f4, 0x00000bf4, 0xfffff40c, 0x000011fa,
+        0xffffee06, 0xfffffa12, 0x000005ee, 0x0000180c, 0xffffe7f4, 0x00000c18, 0xfffff3e8, 0x00001818,
+        0xffffe7e8, 0x00001e00, 0xffffe200, 0x0000001e, 0xffffffe2, 0x000017ee, 0xffffe812, 0xffffee18,
+        0x000011e8, 0x0000301e, 0xffffcfe2, 0x00001e30, 0xffffe1d0, 0x000023fa, 0xffffdc06, 0xfffffa24,
+        0x000005dc, 0x0000300c, 0xffffcff4, 0x00000c30, 0xfffff3d0, 0x00003030, 0xffffcfd0, 0x00003600,
+        0xffffca00, 0x00000036, 0xffffffca, 0x000023dc, 0xffffdc24, 0x00002fe8, 0xffffd018, 0xffffe830,
+        0x000017d0, 0x00004e1e, 0xffffb1e2, 0x00001e4e, 0xffffe1b2, 0x00005436, 0xffffabca, 0x00003654,
+        0xffffc9ac, 0x000041ee, 0xffffbe12, 0xffffee42, 0x000011be, 0x0000540c, 0xffffabf4, 0x00000c54,
+        0xfffff3ac, 0x00005a5a, 0xffffa5a6, 0x00005ffa, 0xffffa006, 0xfffffa60, 0x000005a0, 0x000041ca,
+        0xffffbe36, 0xffffca42, 0x000035be, 0x000059d6, 0xffffa62a, 0xffffd65a, 0x000029a6, 0x00007de2,
+        0xffff821e, 0xffffe27e, 0x00001d82, 0x0000659a, 0xffff9a66, 0x00007dac, 0xffff8254, 0xffffac7e,
+        0x00005382, 0x00002424, 0xffffdbdc, 0x00004242, 0xffffbdbe, 0x00000000, 0x06060000, 0xf9fa0000,
+        0x06000000, 0xfa000000, 0x00060000, 0xfffa0000, 0x0c0c0000, 0xf3f40000, 0x0c060000, 0xf3fa0000,
+        0x060c0000, 0x00000606, 0x06060606, 0xf9fa0606, 0x06000606, 0xfa000606, 0x00060606, 0xfffa0606,
+        0x0c0c0606, 0xf3f40606, 0x0c060606, 0xf3fa0606, 0x060c0606, 0xfffff9fa, 0x0605f9fa, 0xf9f9f9fa,
+        0x05fff9fa, 0xf9fff9fa, 0x0005f9fa, 0xfff9f9fa, 0x0c0bf9fa, 0xf3f3f9fa, 0x0c05f9fa, 0xf3f9f9fa,
+        0x060bf9fa, 0x00000600, 0x06060600, 0xf9fa0600, 0x06000600, 0xfa000600, 0x00060600, 0xfffa0600,
+        0x0c0c0600, 0xf3f40600, 0x0c060600, 0xf3fa0600, 0x060c0600, 0xfffffa00, 0x0605fa00, 0xf9f9fa00,
+        0x05fffa00, 0xf9fffa00, 0x0005fa00, 0xfff9fa00, 0x0c0bfa00, 0xf3f3fa00, 0x0c05fa00, 0xf3f9fa00,
+        0x060bfa00, 0x00000006, 0x06060006, 0xf9fa0006, 0x06000006, 0xfa000006, 0x00060006, 0xfffa0006,
+        0x0c0c0006, 0xf3f40006, 0x0c060006, 0xf3fa0006, 0x060c0006, 0xfffffffa, 0x0605fffa, 0xf9f9fffa,
+        0x05fffffa, 0xf9fffffa, 0x0005fffa, 0xfff9fffa, 0x0c0bfffa, 0xf3f3fffa, 0x0c05fffa, 0xf3f9fffa,
+        0x060bfffa, 0x00000c0c, 0x06060c0c, 0xf9fa0c0c, 0x06000c0c, 0xfa000c0c, 0x00060c0c, 0xfffa0c0c,
+        0x0c0c0c0c, 0xf3f40c0c, 0x0c060c0c, 0xf3fa0c0c, 0x060c0c0c, 0xfffff3f4, 0x0605f3f4, 0xf9f9f3f4,
+        0x05fff3f4, 0xf9fff3f4, 0x0005f3f4, 0xfff9f3f4, 0x0c0bf3f4, 0xf3f3f3f4, 0x0c05f3f4, 0xf3f9f3f4,
+        0x060bf3f4, 0x00000c06, 0x06060c06, 0xf9fa0c06, 0x06000c06, 0xfa000c06, 0x00060c06, 0xfffa0c06,
+        0x0c0c0c06, 0xf3f40c06, 0x0c060c06, 0xf3fa0c06, 0x060c0c06, 0xfffff3fa, 0x0605f3fa, 0xf9f9f3fa,
+        0x05fff3fa, 0xf9fff3fa, 0x0005f3fa, 0xfff9f3fa, 0x0c0bf3fa, 0xf3f3f3fa, 0x0c05f3fa, 0xf3f9f3fa,
+        0x060bf3fa, 0x0000060c, 0x0606060c, 0xf9fa060c, 0x0600060c, 0xfa00060c, 0x0006060c, 0xfffa060c,
+        0x0c0c060c, 0xf3f4060c, 0x0c06060c, 0xf3fa060c, 0x060c060c, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000707, 0xfffff8f9, 0x00000700, 0xfffff900, 0x00000007, 0xfffffff9, 0x00000e0e,
+        0xfffff1f2, 0x00001507, 0xffffeaf9, 0x00000715, 0xfffff8eb, 0x00000df2, 0xfffff20e, 0x000014f9,
+        0xffffeb07, 0xfffff915, 0x000006eb, 0x00001c0e, 0xffffe3f2, 0x00000e1c, 0xfffff1e4, 0x00001c1c,
+        0xffffe3e4, 0x00002300, 0xffffdd00, 0x00000023, 0xffffffdd, 0x00001beb, 0xffffe415, 0xffffeb1c,
+        0x000014e4, 0x00003823, 0xffffc7dd, 0x00002338, 0xffffdcc8, 0x000029f2, 0xffffd60e, 0xfffff22a,
+        0x00000dd6, 0x0000380e, 0xffffc7f2, 0x00000e38, 0xfffff1c8, 0x00003838, 0xffffc7c8, 0x00003f00,
+        0xffffc100, 0x0000003f, 0xffffffc1, 0x000029d6, 0xffffd62a, 0x000037e4, 0xffffc81c, 0xffffe438,
+        0x00001bc8, 0x00005b23, 0xffffa4dd, 0x0000235b, 0xffffdca5, 0x0000623f, 0xffff9dc1, 0x00003f62,
+        0xffffc09e, 0x00004ceb, 0xffffb315, 0xffffeb4d, 0x000014b3, 0x0000620e, 0xffff9df2, 0x00000e62,
+        0xfffff19e, 0x00006969, 0xffff9697, 0x000076f9, 0xffff8907, 0xfffff977, 0x00000689, 0x00004cc1,
+        0xffffb33f, 0xffffc14d, 0x00003eb3, 0x000068cf, 0xffff9731, 0xffffcf69, 0x00003097, 0x00007689,
+        0xffff8977, 0x00002a2a, 0xffffd5d6, 0x00004d4d, 0xffffb2b3, 0x00000000, 0x07070000, 0xf8f90000,
+        0x07000000, 0xf9000000, 0x00070000, 0xfff90000, 0x0e0e0000, 0xf1f20000, 0x15070000, 0xeaf90000,
+        0x07150000, 0x00000707, 0x07070707, 0xf8f90707, 0x07000707, 0xf9000707, 0x00070707, 0xfff90707,
+        0x0e0e0707, 0xf1f20707, 0x15070707, 0xeaf90707, 0x07150707, 0xfffff8f9, 0x0706f8f9, 0xf8f8f8f9,
+        0x06fff8f9, 0xf8fff8f9, 0x0006f8f9, 0xfff8f8f9, 0x0e0df8f9, 0xf1f1f8f9, 0x1506f8f9, 0xeaf8f8f9,
+        0x0714f8f9, 0x00000700, 0x07070700, 0xf8f90700, 0x07000700, 0xf9000700, 0x00070700, 0xfff90700,
+        0x0e0e0700, 0xf1f20700, 0x15070700, 0xeaf90700, 0x07150700, 0xfffff900, 0x0706f900, 0xf8f8f900,
+        0x06fff900, 0xf8fff900, 0x0006f900, 0xfff8f900, 0x0e0df900, 0xf1f1f900, 0x1506f900, 0xeaf8f900,
+        0x0714f900, 0x00000007, 0x07070007, 0xf8f90007, 0x07000007, 0xf9000007, 0x00070007, 0xfff90007,
+        0x0e0e0007, 0xf1f20007, 0x15070007, 0xeaf90007, 0x07150007, 0xfffffff9, 0x0706fff9, 0xf8f8fff9,
+        0x06fffff9, 0xf8fffff9, 0x0006fff9, 0xfff8fff9, 0x0e0dfff9, 0xf1f1fff9, 0x1506fff9, 0xeaf8fff9,
+        0x0714fff9, 0x00000e0e, 0x07070e0e, 0xf8f90e0e, 0x07000e0e, 0xf9000e0e, 0x00070e0e, 0xfff90e0e,
+        0x0e0e0e0e, 0xf1f20e0e, 0x15070e0e, 0xeaf90e0e, 0x07150e0e, 0xfffff1f2, 0x0706f1f2, 0xf8f8f1f2,
+        0x06fff1f2, 0xf8fff1f2, 0x0006f1f2, 0xfff8f1f2, 0x0e0df1f2, 0xf1f1f1f2, 0x1506f1f2, 0xeaf8f1f2,
+        0x0714f1f2, 0x00001507, 0x07071507, 0xf8f91507, 0x07001507, 0xf9001507, 0x00071507, 0xfff91507,
+        0x0e0e1507, 0xf1f21507, 0x15071507, 0xeaf91507, 0x07151507, 0xffffeaf9, 0x0706eaf9, 0xf8f8eaf9,
+        0x06ffeaf9, 0xf8ffeaf9, 0x0006eaf9, 0xfff8eaf9, 0x0e0deaf9, 0xf1f1eaf9, 0x1506eaf9, 0xeaf8eaf9,
+        0x0714eaf9, 0x00000715, 0x07070715, 0xf8f90715, 0x07000715, 0xf9000715, 0x00070715, 0xfff90715,
+        0x0e0e0715, 0xf1f20715, 0x15070715, 0xeaf90715, 0x07150715, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000808, 0xfffff7f8, 0x00000800, 0xfffff800, 0x00000008, 0xfffffff8, 0x00001010,
+        0xffffeff0, 0x00001008, 0xffffeff8, 0x00000810, 0xfffff7f0, 0x00000ff0, 0xfffff010, 0x000017f8,
+        0xffffe808, 0xfffff818, 0x000007e8, 0x00002010, 0xffffdff0, 0x00001020, 0xffffefe0, 0x00002020,
+        0xffffdfe0, 0x00002800, 0xffffd800, 0x00000028, 0xffffffd8, 0x00001fe8, 0xffffe018, 0xffffe820,
+        0x000017e0, 0x00004028, 0xffffbfd8, 0x00002840, 0xffffd7c0, 0x00002ff0, 0xffffd010, 0xfffff030,
+        0x00000fd0, 0x00004010, 0xffffbff0, 0x00001040, 0xffffefc0, 0x00004040, 0xffffbfc0, 0x00004800,
+        0xffffb800, 0x00000048, 0xffffffb8, 0x00002fd0, 0xffffd030, 0x00003fe0, 0xffffc020, 0xffffe040,
+        0x00001fc0, 0x00006828, 0xffff97d8, 0x00002868, 0xffffd798, 0x00007048, 0xffff8fb8, 0x00004870,
+        0xffffb790, 0x000057e8, 0xffffa818, 0xffffe858, 0x000017a8, 0x00007010, 0xffff8ff0, 0x00001070,
+        0xffffef90, 0x00007878, 0xffff8788, 0x000057b8, 0xffffa848, 0xffffb858, 0x000047a8, 0x000077c8,
+        0xffff8838, 0xffffc878, 0x00003788, 0x00003030, 0xffffcfd0, 0x00005858, 0xffffa7a8, 0x00000000,
+        0x08080000, 0xf7f80000, 0x08000000, 0xf8000000, 0x00080000, 0xfff80000, 0x10100000, 0xeff00000,
+        0x10080000, 0xeff80000, 0x08100000, 0x00000808, 0x08080808, 0xf7f80808, 0x08000808, 0xf8000808,
+        0x00080808, 0xfff80808, 0x10100808, 0xeff00808, 0x10080808, 0xeff80808, 0x08100808, 0xfffff7f8,
+        0x0807f7f8, 0xf7f7f7f8, 0x07fff7f8, 0xf7fff7f8, 0x0007f7f8, 0xfff7f7f8, 0x100ff7f8, 0xefeff7f8,
+        0x1007f7f8, 0xeff7f7f8, 0x080ff7f8, 0x00000800, 0x08080800, 0xf7f80800, 0x08000800, 0xf8000800,
+        0x00080800, 0xfff80800, 0x10100800, 0xeff00800, 0x10080800, 0xeff80800, 0x08100800, 0xfffff800,
+        0x0807f800, 0xf7f7f800, 0x07fff800, 0xf7fff800, 0x0007f800, 0xfff7f800, 0x100ff800, 0xefeff800,
+        0x1007f800, 0xeff7f800, 0x080ff800, 0x00000008, 0x08080008, 0xf7f80008, 0x08000008, 0xf8000008,
+        0x00080008, 0xfff80008, 0x10100008, 0xeff00008, 0x10080008, 0xeff80008, 0x08100008, 0xfffffff8,
+        0x0807fff8, 0xf7f7fff8, 0x07fffff8, 0xf7fffff8, 0x0007fff8, 0xfff7fff8, 0x100ffff8, 0xefeffff8,
+        0x1007fff8, 0xeff7fff8, 0x080ffff8, 0x00001010, 0x08081010, 0xf7f81010, 0x08001010, 0xf8001010,
+        0x00081010, 0xfff81010, 0x10101010, 0xeff01010, 0x10081010, 0xeff81010, 0x08101010, 0xffffeff0,
+        0x0807eff0, 0xf7f7eff0, 0x07ffeff0, 0xf7ffeff0, 0x0007eff0, 0xfff7eff0, 0x100feff0, 0xefefeff0,
+        0x1007eff0, 0xeff7eff0, 0x080feff0, 0x00001008, 0x08081008, 0xf7f81008, 0x08001008, 0xf8001008,
+        0x00081008, 0xfff81008, 0x10101008, 0xeff01008, 0x10081008, 0xeff81008, 0x08101008, 0xffffeff8,
+        0x0807eff8, 0xf7f7eff8, 0x07ffeff8, 0xf7ffeff8, 0x0007eff8, 0xfff7eff8, 0x100feff8, 0xefefeff8,
+        0x1007eff8, 0xeff7eff8, 0x080feff8, 0x00000810, 0x08080810, 0xf7f80810, 0x08000810, 0xf8000810,
+        0x00080810, 0xfff80810, 0x10100810, 0xeff00810, 0x10080810, 0xeff80810, 0x08100810, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000909, 0xfffff6f7, 0x00000900, 0xfffff700, 0x00000009, 0xfffffff7, 0x00001212,
+        0xffffedee, 0x00001b09, 0xffffe4f7, 0x0000091b, 0xfffff6e5, 0x000011ee, 0xffffee12, 0x00001af7,
+        0xffffe509, 0xfffff71b, 0x000008e5, 0x00002412, 0xffffdbee, 0x00001224, 0xffffeddc, 0x00002424,
+        0xffffdbdc, 0x00002d00, 0xffffd300, 0x0000002d, 0xffffffd3, 0x000023e5, 0xffffdc1b, 0xffffe524,
+        0x00001adc, 0x0000482d, 0xffffb7d3, 0x00002d48, 0xffffd2b8, 0x000035ee, 0xffffca12, 0xffffee36,
+        0x000011ca, 0x00004812, 0xffffb7ee, 0x00001248, 0xffffedb8, 0x00004848, 0xffffb7b8, 0x00005100,
+        0xffffaf00, 0x00000051, 0xffffffaf, 0x000035ca, 0xffffca36, 0x000047dc, 0xffffb824, 0xffffdc48,
+        0x000023b8, 0x0000752d, 0xffff8ad3, 0x00002d75, 0xffffd28b, 0x00007e51, 0xffff81af, 0x0000517e,
+        0xffffae82, 0x000062e5, 0xffff9d1b, 0xffffe563, 0x00001a9d, 0x000062af, 0xffff9d51, 0xffffaf63,
+        0x0000509d, 0x00003636, 0xffffc9ca, 0x00006c6c, 0xffff9394, 0x00000000, 0x09090000, 0xf6f70000,
+        0x09000000, 0xf7000000, 0x00090000, 0xfff70000, 0x12120000, 0xedee0000, 0x1b090000, 0xe4f70000,
+        0x091b0000, 0xf6e50000, 0x00000909, 0x09090909, 0xf6f70909, 0x09000909, 0xf7000909, 0x00090909,
+        0xfff70909, 0x12120909, 0xedee0909, 0x1b090909, 0xe4f70909, 0x091b0909, 0xf6e50909, 0xfffff6f7,
+        0x0908f6f7, 0xf6f6f6f7, 0x08fff6f7, 0xf6fff6f7, 0x0008f6f7, 0xfff6f6f7, 0x1211f6f7, 0xededf6f7,
+        0x1b08f6f7, 0xe4f6f6f7, 0x091af6f7, 0xf6e4f6f7, 0x00000900, 0x09090900, 0xf6f70900, 0x09000900,
+        0xf7000900, 0x00090900, 0xfff70900, 0x12120900, 0xedee0900, 0x1b090900, 0xe4f70900, 0x091b0900,
+        0xf6e50900, 0xfffff700, 0x0908f700, 0xf6f6f700, 0x08fff700, 0xf6fff700, 0x0008f700, 0xfff6f700,
+        0x1211f700, 0xededf700, 0x1b08f700, 0xe4f6f700, 0x091af700, 0xf6e4f700, 0x00000009, 0x09090009,
+        0xf6f70009, 0x09000009, 0xf7000009, 0x00090009, 0xfff70009, 0x12120009, 0xedee0009, 0x1b090009,
+        0xe4f70009, 0x091b0009, 0xf6e50009, 0xfffffff7, 0x0908fff7, 0xf6f6fff7, 0x08fffff7, 0xf6fffff7,
+        0x0008fff7, 0xfff6fff7, 0x1211fff7, 0xededfff7, 0x1b08fff7, 0xe4f6fff7, 0x091afff7, 0xf6e4fff7,
+        0x00001212, 0x09091212, 0xf6f71212, 0x09001212, 0xf7001212, 0x00091212, 0xfff71212, 0x12121212,
+        0xedee1212, 0x1b091212, 0xe4f71212, 0x091b1212, 0xf6e51212, 0xffffedee, 0x0908edee, 0xf6f6edee,
+        0x08ffedee, 0xf6ffedee, 0x0008edee, 0xfff6edee, 0x1211edee, 0xedededee, 0x1b08edee, 0xe4f6edee,
+        0x091aedee, 0xf6e4edee, 0x00001b09, 0x09091b09, 0xf6f71b09, 0x09001b09, 0xf7001b09, 0x00091b09,
+        0xfff71b09, 0x12121b09, 0xedee1b09, 0x1b091b09, 0xe4f71b09, 0x091b1b09, 0xf6e51b09, 0xffffe4f7,
+        0x0908e4f7, 0xf6f6e4f7, 0x08ffe4f7, 0xf6ffe4f7, 0x0008e4f7, 0xfff6e4f7, 0x1211e4f7, 0xedede4f7,
+        0x1b08e4f7, 0xe4f6e4f7, 0x091ae4f7, 0xf6e4e4f7, 0x0000091b, 0x0909091b, 0xf6f7091b, 0x0900091b,
+        0xf700091b, 0x0009091b, 0xfff7091b, 0x1212091b, 0xedee091b, 0x1b09091b, 0xe4f7091b, 0x091b091b,
+        0xf6e5091b, 0xfffff6e5, 0x0908f6e5, 0xf6f6f6e5, 0x08fff6e5, 0xf6fff6e5, 0x0008f6e5, 0xfff6f6e5,
+        0x1211f6e5, 0xededf6e5, 0x1b08f6e5, 0xe4f6f6e5, 0x091af6e5, 0xf6e4f6e5, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000202, 0xfffffdfe, 0x00000300, 0xfffffd00, 0x00000003, 0xfffffffd, 0x00000606,
+        0xfffff9fa, 0x00000700, 0xfffff900, 0x00000007, 0xfffffff9, 0x000004fb, 0xfffffb05, 0xfffffb05,
+        0x000004fb, 0x00000b06, 0xfffff4fa, 0x0000060b, 0xfffff9f5, 0x00000800, 0xfffff800, 0x00000008,
+        0xfffffff8, 0x00000b0b, 0xfffff4f5, 0x00000c00, 0xfffff400, 0x0000000c, 0xfffffff4, 0x0000110c,
+        0xffffeef4, 0x00000c11, 0xfffff3ef, 0x00001111, 0xffffeeef, 0x00001206, 0xffffedfa, 0x00000612,
+        0xfffff9ee, 0x00000af8, 0xfffff508, 0xfffff80b, 0x000007f5, 0x00000f00, 0xfffff100, 0x0000000f,
+        0xfffffff1, 0x00001400, 0xffffec00, 0x00000014, 0xffffffec, 0x00001912, 0xffffe6ee, 0x00001219,
+        0xffffede7, 0x0000190b, 0xffffe6f5, 0x00000b19, 0xfffff4e7, 0x00001919, 0xffffe6e7, 0x00000df2,
+        0xfffff20e, 0xfffff20e, 0x00000df2, 0x00001a00, 0xffffe600, 0x0000001a, 0xffffffe6, 0x000011f5,
+        0xffffee0b, 0xfffff512, 0x00000aee, 0x000015f9, 0xffffea07, 0xfffff916, 0x000006ea, 0x0000221a,
+        0xffffdde6, 0x00001a22, 0xffffe5de, 0x00002212, 0xffffddee, 0x00001222, 0xffffedde, 0x00002222,
+        0xffffddde, 0x0000230b, 0xffffdcf5, 0x00000b23, 0xfffff4dd, 0x00001d00, 0xffffe300, 0x0000001d,
+        0xffffffe3, 0x000015ed, 0xffffea13, 0xffffed16, 0x000012ea, 0x000019f1, 0xffffe60f, 0xfffff11a,
+        0x00000ee6, 0x00002500, 0xffffdb00, 0x00000025, 0xffffffdb, 0x00002c1b, 0xffffd3e5, 0x00001b2c,
+        0xffffe4d4, 0x00002c24, 0xffffd3dc, 0x0000242c, 0xffffdbd4, 0x00002c12, 0xffffd3ee, 0x0000122c,
+        0xffffedd4, 0x000020f6, 0xffffdf0a, 0xfffff621, 0x000009df, 0x00002d2d, 0xffffd2d3, 0x00000000,
+        0x00000000, 0x00000202, 0xfffffdfe, 0x00000300, 0xfffffd00, 0x00000003, 0xfffffffd, 0x00000606,
+        0xfffff9fa, 0x00000700, 0xfffff900, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020300, 0x0201fd00,
+        0x02020003, 0x0201fffd, 0x02020606, 0x0201f9fa, 0x02020700, 0x0201f900, 0xfdfe0000, 0xfdfe0202,
+        0xfdfdfdfe, 0xfdfe0300, 0xfdfdfd00, 0xfdfe0003, 0xfdfdfffd, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0700,
+        0xfdfdf900, 0x03000000, 0x03000202, 0x02fffdfe, 0x03000300, 0x02fffd00, 0x03000003, 0x02fffffd,
+        0x03000606, 0x02fff9fa, 0x03000700, 0x02fff900, 0xfd000000, 0xfd000202, 0xfcfffdfe, 0xfd000300,
+        0xfcfffd00, 0xfd000003, 0xfcfffffd, 0xfd000606, 0xfcfff9fa, 0xfd000700, 0xfcfff900, 0x00030000,
+        0x00030202, 0x0002fdfe, 0x00030300, 0x0002fd00, 0x00030003, 0x0002fffd, 0x00030606, 0x0002f9fa,
+        0x00030700, 0x0002f900, 0xfffd0000, 0xfffd0202, 0xfffcfdfe, 0xfffd0300, 0xfffcfd00, 0xfffd0003,
+        0xfffcfffd, 0xfffd0606, 0xfffcf9fa, 0xfffd0700, 0xfffcf900, 0x06060000, 0x06060202, 0x0605fdfe,
+        0x06060300, 0x0605fd00, 0x06060003, 0x0605fffd, 0x06060606, 0x0605f9fa, 0x06060700, 0x0605f900,
+        0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0300, 0xf9f9fd00, 0xf9fa0003, 0xf9f9fffd, 0xf9fa0606,
+        0xf9f9f9fa, 0xf9fa0700, 0xf9f9f900, 0x07000000, 0x07000202, 0x06fffdfe, 0x07000300, 0x06fffd00,
+        0x07000003, 0x06fffffd, 0x07000606, 0x06fff9fa, 0x07000700, 0x06fff900, 0xf9000000, 0xf9000202,
+        0xf8fffdfe, 0xf9000300, 0xf8fffd00, 0xf9000003, 0xf8fffffd, 0xf9000606, 0xf8fff9fa, 0xf9000700,
+        0xf8fff900, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000202, 0xfffffdfe, 0x00000606,
+        0xfffff9fa, 0x00000600, 0xfffffa00, 0x00000006, 0xfffffffa, 0x000003fc, 0xfffffc04, 0xfffffa0a,
+        0x000005f6, 0xfffff400, 0x00000c00, 0xfffff3fa, 0xfffff406, 0x00000bfa, 0x00000c06, 0xfffffff2,
+        0x0000000e, 0x00000c0c, 0xfffff3f4, 0xffffee00, 0x00001200, 0xfffff40e, 0x00000bf2, 0xfffff9ee,
+        0xfffffa12, 0x000005ee, 0x00000612, 0xffffedf6, 0xffffee0a, 0x000011f6, 0x0000120a, 0xffffffea,
+        0x00000016, 0xffffe800, 0x00001800, 0xfffff3ea, 0xfffff416, 0x00000bea, 0x00000c16, 0xffffe7f8,
+        0xffffe808, 0x000017f8, 0x00001808, 0xfffff9e6, 0xfffffa1a, 0x000005e6, 0x0000061a, 0xffffffe4,
+        0x0000001c, 0x00001414, 0xffffebec, 0xffffe5f2, 0x00001a0e, 0xfffff3e2, 0x00000c1e, 0xffffdff6,
+        0x0000200a, 0xffffdfee, 0x00002012, 0xffffe5e6, 0x00001a1a, 0xffffebde, 0x00001422, 0xfffff3da,
+        0x00000c26, 0xffffdfe0, 0x00002020, 0x00002020, 0xffffd7ea, 0xffffddde, 0x00002222, 0x00000000,
+        0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa,
+        0x00000600, 0xfffffa00, 0x00000006, 0xfffffffa, 0x02000000, 0x02000200, 0x01fffe00, 0x02000002,
+        0x01fffffe, 0x02000202, 0x01fffdfe, 0x02000606, 0x01fff9fa, 0x02000600, 0x01fffa00, 0x02000006,
+        0x01fffffa, 0xfe000000, 0xfe000200, 0xfdfffe00, 0xfe000002, 0xfdfffffe, 0xfe000202, 0xfdfffdfe,
+        0xfe000606, 0xfdfff9fa, 0xfe000600, 0xfdfffa00, 0xfe000006, 0xfdfffffa, 0x00020000, 0x00020200,
+        0x0001fe00, 0x00020002, 0x0001fffe, 0x00020202, 0x0001fdfe, 0x00020606, 0x0001f9fa, 0x00020600,
+        0x0001fa00, 0x00020006, 0x0001fffa, 0xfffe0000, 0xfffe0200, 0xfffdfe00, 0xfffe0002, 0xfffdfffe,
+        0xfffe0202, 0xfffdfdfe, 0xfffe0606, 0xfffdf9fa, 0xfffe0600, 0xfffdfa00, 0xfffe0006, 0xfffdfffa,
+        0x02020000, 0x02020200, 0x0201fe00, 0x02020002, 0x0201fffe, 0x02020202, 0x0201fdfe, 0x02020606,
+        0x0201f9fa, 0x02020600, 0x0201fa00, 0x02020006, 0x0201fffa, 0xfdfe0000, 0xfdfe0200, 0xfdfdfe00,
+        0xfdfe0002, 0xfdfdfffe, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0600, 0xfdfdfa00,
+        0xfdfe0006, 0xfdfdfffa, 0x06060000, 0x06060200, 0x0605fe00, 0x06060002, 0x0605fffe, 0x06060202,
+        0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060600, 0x0605fa00, 0x06060006, 0x0605fffa, 0xf9fa0000,
+        0xf9fa0200, 0xf9f9fe00, 0xf9fa0002, 0xf9f9fffe, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa,
+        0xf9fa0600, 0xf9f9fa00, 0xf9fa0006, 0xf9f9fffa, 0x06000000, 0x06000200, 0x05fffe00, 0x06000002,
+        0x05fffffe, 0x06000202, 0x05fffdfe, 0x06000606, 0x05fff9fa, 0x06000600, 0x05fffa00, 0x06000006,
+        0x05fffffa, 0xfa000000, 0xfa000200, 0xf9fffe00, 0xfa000002, 0xf9fffffe, 0xfa000202, 0xf9fffdfe,
+        0xfa000606, 0xf9fff9fa, 0xfa000600, 0xf9fffa00, 0xfa000006, 0xf9fffffa, 0x00060000, 0x00060200,
+        0x0005fe00, 0x00060002, 0x0005fffe, 0x00060202, 0x0005fdfe, 0x00060606, 0x0005f9fa, 0x00060600,
+        0x0005fa00, 0x00060006, 0x0005fffa, 0xfffa0000, 0xfffa0200, 0xfff9fe00, 0xfffa0002, 0xfff9fffe,
+        0xfffa0202, 0xfff9fdfe, 0xfffa0606, 0xfff9f9fa, 0xfffa0600, 0xfff9fa00, 0xfffa0006, 0xfff9fffa,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000404, 0xfffffbfc, 0x00000a0a,
+        0xfffff5f6, 0x00000a00, 0xfffff600, 0x0000000a, 0xfffffff6, 0x000005fa, 0xfffffa06, 0xfffff80e,
+        0x000007f2, 0xffffffee, 0x00000012, 0xfffff00a, 0x00000ff6, 0xffffe800, 0x00001800, 0xfffff7e8,
+        0xfffff818, 0x000007e8, 0x00000818, 0x00001212, 0xffffedee, 0xfffff014, 0x00000fec, 0xffffe5f2,
+        0xffffe60e, 0x000019f2, 0x00001a0e, 0xffffffe2, 0x0000001e, 0xffffde00, 0x00002200, 0xfffff7de,
+        0xfffff822, 0x000007de, 0x00000822, 0xffffede2, 0xffffee1e, 0x000011e2, 0x0000121e, 0xffffddf6,
+        0xffffde0a, 0x000021f6, 0x0000220a, 0xffffddec, 0x00002214, 0xffffffd8, 0x00000028, 0x00001e1e,
+        0xffffe1e2, 0xffffedd8, 0x00001228, 0xffffd400, 0x00002c00, 0xffffd3f0, 0x00002c10, 0xffffdbdc,
+        0xffffdbdc, 0x00002424, 0xffffd3e6, 0x00002c1a, 0xffffe5d2, 0x00001a2e, 0xffffedcc, 0x00001234,
+        0xffffc9ec, 0xffffd3d4, 0x00002c2c, 0xffffc9e0, 0xffffd1d2, 0xffffd1d2, 0x00002e2e, 0x00000000,
+        0x00000200, 0xfffffe00, 0x00000002, 0xfffffffe, 0x00000404, 0xfffffbfc, 0x00000a0a, 0xfffff5f6,
+        0x00000a00, 0xfffff600, 0x0000000a, 0xfffffff6, 0x02000000, 0x02000200, 0x01fffe00, 0x02000002,
+        0x01fffffe, 0x02000404, 0x01fffbfc, 0x02000a0a, 0x01fff5f6, 0x02000a00, 0x01fff600, 0x0200000a,
+        0x01fffff6, 0xfe000000, 0xfe000200, 0xfdfffe00, 0xfe000002, 0xfdfffffe, 0xfe000404, 0xfdfffbfc,
+        0xfe000a0a, 0xfdfff5f6, 0xfe000a00, 0xfdfff600, 0xfe00000a, 0xfdfffff6, 0x00020000, 0x00020200,
+        0x0001fe00, 0x00020002, 0x0001fffe, 0x00020404, 0x0001fbfc, 0x00020a0a, 0x0001f5f6, 0x00020a00,
+        0x0001f600, 0x0002000a, 0x0001fff6, 0xfffe0000, 0xfffe0200, 0xfffdfe00, 0xfffe0002, 0xfffdfffe,
+        0xfffe0404, 0xfffdfbfc, 0xfffe0a0a, 0xfffdf5f6, 0xfffe0a00, 0xfffdf600, 0xfffe000a, 0xfffdfff6,
+        0x04040000, 0x04040200, 0x0403fe00, 0x04040002, 0x0403fffe, 0x04040404, 0x0403fbfc, 0x04040a0a,
+        0x0403f5f6, 0x04040a00, 0x0403f600, 0x0404000a, 0x0403fff6, 0xfbfc0000, 0xfbfc0200, 0xfbfbfe00,
+        0xfbfc0002, 0xfbfbfffe, 0xfbfc0404, 0xfbfbfbfc, 0xfbfc0a0a, 0xfbfbf5f6, 0xfbfc0a00, 0xfbfbf600,
+        0xfbfc000a, 0xfbfbfff6, 0x0a0a0000, 0x0a0a0200, 0x0a09fe00, 0x0a0a0002, 0x0a09fffe, 0x0a0a0404,
+        0x0a09fbfc, 0x0a0a0a0a, 0x0a09f5f6, 0x0a0a0a00, 0x0a09f600, 0x0a0a000a, 0x0a09fff6, 0xf5f60000,
+        0xf5f60200, 0xf5f5fe00, 0xf5f60002, 0xf5f5fffe, 0xf5f60404, 0xf5f5fbfc, 0xf5f60a0a, 0xf5f5f5f6,
+        0xf5f60a00, 0xf5f5f600, 0xf5f6000a, 0xf5f5fff6, 0x0a000000, 0x0a000200, 0x09fffe00, 0x0a000002,
+        0x09fffffe, 0x0a000404, 0x09fffbfc, 0x0a000a0a, 0x09fff5f6, 0x0a000a00, 0x09fff600, 0x0a00000a,
+        0x09fffff6, 0xf6000000, 0xf6000200, 0xf5fffe00, 0xf6000002, 0xf5fffffe, 0xf6000404, 0xf5fffbfc,
+        0xf6000a0a, 0xf5fff5f6, 0xf6000a00, 0xf5fff600, 0xf600000a, 0xf5fffff6, 0x000a0000, 0x000a0200,
+        0x0009fe00, 0x000a0002, 0x0009fffe, 0x000a0404, 0x0009fbfc, 0x000a0a0a, 0x0009f5f6, 0x000a0a00,
+        0x0009f600, 0x000a000a, 0x0009fff6, 0xfff60000, 0xfff60200, 0xfff5fe00, 0xfff60002, 0xfff5fffe,
+        0xfff60404, 0xfff5fbfc, 0xfff60a0a, 0xfff5f5f6, 0xfff60a00, 0xfff5f600, 0xfff6000a, 0xfff5fff6,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000400, 0xfffffc00, 0x00000004, 0xfffffffc, 0x00000404, 0xfffffbfc, 0x00000c0c,
+        0xfffff3f4, 0x00000c00, 0xfffff400, 0x0000000c, 0xfffffff4, 0x000007f8, 0xfffff808, 0xfffff008,
+        0x00000ff8, 0xffffe800, 0x00001800, 0xfffff7e8, 0xfffff818, 0x000007e8, 0x00000818, 0xfffff014,
+        0x00000fec, 0xffffffe4, 0x0000001c, 0xffffe7f0, 0xffffe810, 0x000017f0, 0x00001810, 0xffffe000,
+        0x00002000, 0xffffefe4, 0xfffff01c, 0x00000fe4, 0x0000101c, 0xffffdff8, 0xffffe008, 0xfffff7e0,
+        0xfffff820, 0x000007e0, 0x00000820, 0x00001ff8, 0x00002008, 0x00001818, 0xffffe7e8, 0xffffe818,
+        0x000017e8, 0xffffdfec, 0x00002014, 0xffffffd8, 0x00000028, 0xffffefd8, 0x00001028, 0xffffd400,
+        0xffffd400, 0xffffffd4, 0x0000002c, 0x00002c00, 0x00002c00, 0xffffdfe0, 0x00002020, 0xffffd3f0,
+        0x00002c10, 0xffffd3e8, 0xffffe7d4, 0x0000182c, 0x00002c18, 0xffffefd0, 0x00001030, 0xffffdbdc,
+        0xffffdbdc, 0x00002424, 0x00002424, 0xffffcbec, 0x00002828, 0xffffd7d8, 0xffffcbe0, 0x00000000,
+        0x00000400, 0xfffffc00, 0x00000004, 0xfffffffc, 0x00000404, 0xfffffbfc, 0x00000c0c, 0xfffff3f4,
+        0x00000c00, 0xfffff400, 0x0000000c, 0xfffffff4, 0x04000000, 0x04000400, 0x03fffc00, 0x04000004,
+        0x03fffffc, 0x04000404, 0x03fffbfc, 0x04000c0c, 0x03fff3f4, 0x04000c00, 0x03fff400, 0x0400000c,
+        0x03fffff4, 0xfc000000, 0xfc000400, 0xfbfffc00, 0xfc000004, 0xfbfffffc, 0xfc000404, 0xfbfffbfc,
+        0xfc000c0c, 0xfbfff3f4, 0xfc000c00, 0xfbfff400, 0xfc00000c, 0xfbfffff4, 0x00040000, 0x00040400,
+        0x0003fc00, 0x00040004, 0x0003fffc, 0x00040404, 0x0003fbfc, 0x00040c0c, 0x0003f3f4, 0x00040c00,
+        0x0003f400, 0x0004000c, 0x0003fff4, 0xfffc0000, 0xfffc0400, 0xfffbfc00, 0xfffc0004, 0xfffbfffc,
+        0xfffc0404, 0xfffbfbfc, 0xfffc0c0c, 0xfffbf3f4, 0xfffc0c00, 0xfffbf400, 0xfffc000c, 0xfffbfff4,
+        0x04040000, 0x04040400, 0x0403fc00, 0x04040004, 0x0403fffc, 0x04040404, 0x0403fbfc, 0x04040c0c,
+        0x0403f3f4, 0x04040c00, 0x0403f400, 0x0404000c, 0x0403fff4, 0xfbfc0000, 0xfbfc0400, 0xfbfbfc00,
+        0xfbfc0004, 0xfbfbfffc, 0xfbfc0404, 0xfbfbfbfc, 0xfbfc0c0c, 0xfbfbf3f4, 0xfbfc0c00, 0xfbfbf400,
+        0xfbfc000c, 0xfbfbfff4, 0x0c0c0000, 0x0c0c0400, 0x0c0bfc00, 0x0c0c0004, 0x0c0bfffc, 0x0c0c0404,
+        0x0c0bfbfc, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c0c00, 0x0c0bf400, 0x0c0c000c, 0x0c0bfff4, 0xf3f40000,
+        0xf3f40400, 0xf3f3fc00, 0xf3f40004, 0xf3f3fffc, 0xf3f40404, 0xf3f3fbfc, 0xf3f40c0c, 0xf3f3f3f4,
+        0xf3f40c00, 0xf3f3f400, 0xf3f4000c, 0xf3f3fff4, 0x0c000000, 0x0c000400, 0x0bfffc00, 0x0c000004,
+        0x0bfffffc, 0x0c000404, 0x0bfffbfc, 0x0c000c0c, 0x0bfff3f4, 0x0c000c00, 0x0bfff400, 0x0c00000c,
+        0x0bfffff4, 0xf4000000, 0xf4000400, 0xf3fffc00, 0xf4000004, 0xf3fffffc, 0xf4000404, 0xf3fffbfc,
+        0xf4000c0c, 0xf3fff3f4, 0xf4000c00, 0xf3fff400, 0xf400000c, 0xf3fffff4, 0x000c0000, 0x000c0400,
+        0x000bfc00, 0x000c0004, 0x000bfffc, 0x000c0404, 0x000bfbfc, 0x000c0c0c, 0x000bf3f4, 0x000c0c00,
+        0x000bf400, 0x000c000c, 0x000bfff4, 0xfff40000, 0xfff40400, 0xfff3fc00, 0xfff40004, 0xfff3fffc,
+        0xfff40404, 0xfff3fbfc, 0xfff40c0c, 0xfff3f3f4, 0xfff40c00, 0xfff3f400, 0xfff4000c, 0xfff3fff4,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414,
+        0xffffebec, 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 0xffffebec,
+        0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020606,
+        0x0201f9fa, 0x02020c0c, 0x0201f3f4, 0x02021414, 0x0201ebec, 0x02022020, 0x0201dfe0, 0x02022e2e,
+        0x0201d1d2, 0xfdfe0000, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0c0c, 0xfdfdf3f4,
+        0xfdfe1414, 0xfdfdebec, 0xfdfe2020, 0xfdfddfe0, 0xfdfe2e2e, 0xfdfdd1d2, 0x06060000, 0x06060202,
+        0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060c0c, 0x0605f3f4, 0x06061414, 0x0605ebec, 0x06062020,
+        0x0605dfe0, 0x06062e2e, 0x0605d1d2, 0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa,
+        0xf9fa0c0c, 0xf9f9f3f4, 0xf9fa1414, 0xf9f9ebec, 0xf9fa2020, 0xf9f9dfe0, 0xf9fa2e2e, 0xf9f9d1d2,
+        0x0c0c0000, 0x0c0c0202, 0x0c0bfdfe, 0x0c0c0606, 0x0c0bf9fa, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c1414,
+        0x0c0bebec, 0x0c0c2020, 0x0c0bdfe0, 0x0c0c2e2e, 0x0c0bd1d2, 0xf3f40000, 0xf3f40202, 0xf3f3fdfe,
+        0xf3f40606, 0xf3f3f9fa, 0xf3f40c0c, 0xf3f3f3f4, 0xf3f41414, 0xf3f3ebec, 0xf3f42020, 0xf3f3dfe0,
+        0xf3f42e2e, 0xf3f3d1d2, 0x14140000, 0x14140202, 0x1413fdfe, 0x14140606, 0x1413f9fa, 0x14140c0c,
+        0x1413f3f4, 0x14141414, 0x1413ebec, 0x14142020, 0x1413dfe0, 0x14142e2e, 0x1413d1d2, 0xebec0000,
+        0xebec0202, 0xebebfdfe, 0xebec0606, 0xebebf9fa, 0xebec0c0c, 0xebebf3f4, 0xebec1414, 0xebebebec,
+        0xebec2020, 0xebebdfe0, 0xebec2e2e, 0xebebd1d2, 0x20200000, 0x20200202, 0x201ffdfe, 0x20200606,
+        0x201ff9fa, 0x20200c0c, 0x201ff3f4, 0x20201414, 0x201febec, 0x20202020, 0x201fdfe0, 0x20202e2e,
+        0x201fd1d2, 0xdfe00000, 0xdfe00202, 0xdfdffdfe, 0xdfe00606, 0xdfdff9fa, 0xdfe00c0c, 0xdfdff3f4,
+        0xdfe01414, 0xdfdfebec, 0xdfe02020, 0xdfdfdfe0, 0xdfe02e2e, 0xdfdfd1d2, 0x2e2e0000, 0x2e2e0202,
+        0x2e2dfdfe, 0x2e2e0606, 0x2e2df9fa, 0x2e2e0c0c, 0x2e2df3f4, 0x2e2e1414, 0x2e2debec, 0x2e2e2020,
+        0x2e2ddfe0, 0x2e2e2e2e, 0x2e2dd1d2, 0xd1d20000, 0xd1d20202, 0xd1d1fdfe, 0xd1d20606, 0xd1d1f9fa,
+        0xd1d20c0c, 0xd1d1f3f4, 0xd1d21414, 0xd1d1ebec, 0xd1d22020, 0xd1d1dfe0, 0xd1d22e2e, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414,
+        0xffffebec, 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 0xffffebec,
+        0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020606,
+        0x0201f9fa, 0x02020c0c, 0x0201f3f4, 0x02021414, 0x0201ebec, 0x02022020, 0x0201dfe0, 0x02022e2e,
+        0x0201d1d2, 0xfdfe0000, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0c0c, 0xfdfdf3f4,
+        0xfdfe1414, 0xfdfdebec, 0xfdfe2020, 0xfdfddfe0, 0xfdfe2e2e, 0xfdfdd1d2, 0x06060000, 0x06060202,
+        0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060c0c, 0x0605f3f4, 0x06061414, 0x0605ebec, 0x06062020,
+        0x0605dfe0, 0x06062e2e, 0x0605d1d2, 0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa,
+        0xf9fa0c0c, 0xf9f9f3f4, 0xf9fa1414, 0xf9f9ebec, 0xf9fa2020, 0xf9f9dfe0, 0xf9fa2e2e, 0xf9f9d1d2,
+        0x0c0c0000, 0x0c0c0202, 0x0c0bfdfe, 0x0c0c0606, 0x0c0bf9fa, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c1414,
+        0x0c0bebec, 0x0c0c2020, 0x0c0bdfe0, 0x0c0c2e2e, 0x0c0bd1d2, 0xf3f40000, 0xf3f40202, 0xf3f3fdfe,
+        0xf3f40606, 0xf3f3f9fa, 0xf3f40c0c, 0xf3f3f3f4, 0xf3f41414, 0xf3f3ebec, 0xf3f42020, 0xf3f3dfe0,
+        0xf3f42e2e, 0xf3f3d1d2, 0x14140000, 0x14140202, 0x1413fdfe, 0x14140606, 0x1413f9fa, 0x14140c0c,
+        0x1413f3f4, 0x14141414, 0x1413ebec, 0x14142020, 0x1413dfe0, 0x14142e2e, 0x1413d1d2, 0xebec0000,
+        0xebec0202, 0xebebfdfe, 0xebec0606, 0xebebf9fa, 0xebec0c0c, 0xebebf3f4, 0xebec1414, 0xebebebec,
+        0xebec2020, 0xebebdfe0, 0xebec2e2e, 0xebebd1d2, 0x20200000, 0x20200202, 0x201ffdfe, 0x20200606,
+        0x201ff9fa, 0x20200c0c, 0x201ff3f4, 0x20201414, 0x201febec, 0x20202020, 0x201fdfe0, 0x20202e2e,
+        0x201fd1d2, 0xdfe00000, 0xdfe00202, 0xdfdffdfe, 0xdfe00606, 0xdfdff9fa, 0xdfe00c0c, 0xdfdff3f4,
+        0xdfe01414, 0xdfdfebec, 0xdfe02020, 0xdfdfdfe0, 0xdfe02e2e, 0xdfdfd1d2, 0x2e2e0000, 0x2e2e0202,
+        0x2e2dfdfe, 0x2e2e0606, 0x2e2df9fa, 0x2e2e0c0c, 0x2e2df3f4, 0x2e2e1414, 0x2e2debec, 0x2e2e2020,
+        0x2e2ddfe0, 0x2e2e2e2e, 0x2e2dd1d2, 0xd1d20000, 0xd1d20202, 0xd1d1fdfe, 0xd1d20606, 0xd1d1f9fa,
+        0xd1d20c0c, 0xd1d1f3f4, 0xd1d21414, 0xd1d1ebec, 0xd1d22020, 0xd1d1dfe0, 0xd1d22e2e, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414,
+        0xffffebec, 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 0xffffebec,
+        0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020606,
+        0x0201f9fa, 0x02020c0c, 0x0201f3f4, 0x02021414, 0x0201ebec, 0x02022020, 0x0201dfe0, 0x02022e2e,
+        0x0201d1d2, 0xfdfe0000, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0c0c, 0xfdfdf3f4,
+        0xfdfe1414, 0xfdfdebec, 0xfdfe2020, 0xfdfddfe0, 0xfdfe2e2e, 0xfdfdd1d2, 0x06060000, 0x06060202,
+        0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060c0c, 0x0605f3f4, 0x06061414, 0x0605ebec, 0x06062020,
+        0x0605dfe0, 0x06062e2e, 0x0605d1d2, 0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa,
+        0xf9fa0c0c, 0xf9f9f3f4, 0xf9fa1414, 0xf9f9ebec, 0xf9fa2020, 0xf9f9dfe0, 0xf9fa2e2e, 0xf9f9d1d2,
+        0x0c0c0000, 0x0c0c0202, 0x0c0bfdfe, 0x0c0c0606, 0x0c0bf9fa, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c1414,
+        0x0c0bebec, 0x0c0c2020, 0x0c0bdfe0, 0x0c0c2e2e, 0x0c0bd1d2, 0xf3f40000, 0xf3f40202, 0xf3f3fdfe,
+        0xf3f40606, 0xf3f3f9fa, 0xf3f40c0c, 0xf3f3f3f4, 0xf3f41414, 0xf3f3ebec, 0xf3f42020, 0xf3f3dfe0,
+        0xf3f42e2e, 0xf3f3d1d2, 0x14140000, 0x14140202, 0x1413fdfe, 0x14140606, 0x1413f9fa, 0x14140c0c,
+        0x1413f3f4, 0x14141414, 0x1413ebec, 0x14142020, 0x1413dfe0, 0x14142e2e, 0x1413d1d2, 0xebec0000,
+        0xebec0202, 0xebebfdfe, 0xebec0606, 0xebebf9fa, 0xebec0c0c, 0xebebf3f4, 0xebec1414, 0xebebebec,
+        0xebec2020, 0xebebdfe0, 0xebec2e2e, 0xebebd1d2, 0x20200000, 0x20200202, 0x201ffdfe, 0x20200606,
+        0x201ff9fa, 0x20200c0c, 0x201ff3f4, 0x20201414, 0x201febec, 0x20202020, 0x201fdfe0, 0x20202e2e,
+        0x201fd1d2, 0xdfe00000, 0xdfe00202, 0xdfdffdfe, 0xdfe00606, 0xdfdff9fa, 0xdfe00c0c, 0xdfdff3f4,
+        0xdfe01414, 0xdfdfebec, 0xdfe02020, 0xdfdfdfe0, 0xdfe02e2e, 0xdfdfd1d2, 0x2e2e0000, 0x2e2e0202,
+        0x2e2dfdfe, 0x2e2e0606, 0x2e2df9fa, 0x2e2e0c0c, 0x2e2df3f4, 0x2e2e1414, 0x2e2debec, 0x2e2e2020,
+        0x2e2ddfe0, 0x2e2e2e2e, 0x2e2dd1d2, 0xd1d20000, 0xd1d20202, 0xd1d1fdfe, 0xd1d20606, 0xd1d1f9fa,
+        0xd1d20c0c, 0xd1d1f3f4, 0xd1d21414, 0xd1d1ebec, 0xd1d22020, 0xd1d1dfe0, 0xd1d22e2e, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414,
+        0xffffebec, 0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000202, 0xfffffdfe, 0x00000606, 0xfffff9fa, 0x00000c0c, 0xfffff3f4, 0x00001414, 0xffffebec,
+        0x00002020, 0xffffdfe0, 0x00002e2e, 0xffffd1d2, 0x02020000, 0x02020202, 0x0201fdfe, 0x02020606,
+        0x0201f9fa, 0x02020c0c, 0x0201f3f4, 0x02021414, 0x0201ebec, 0x02022020, 0x0201dfe0, 0x02022e2e,
+        0x0201d1d2, 0xfdfe0000, 0xfdfe0202, 0xfdfdfdfe, 0xfdfe0606, 0xfdfdf9fa, 0xfdfe0c0c, 0xfdfdf3f4,
+        0xfdfe1414, 0xfdfdebec, 0xfdfe2020, 0xfdfddfe0, 0xfdfe2e2e, 0xfdfdd1d2, 0x06060000, 0x06060202,
+        0x0605fdfe, 0x06060606, 0x0605f9fa, 0x06060c0c, 0x0605f3f4, 0x06061414, 0x0605ebec, 0x06062020,
+        0x0605dfe0, 0x06062e2e, 0x0605d1d2, 0xf9fa0000, 0xf9fa0202, 0xf9f9fdfe, 0xf9fa0606, 0xf9f9f9fa,
+        0xf9fa0c0c, 0xf9f9f3f4, 0xf9fa1414, 0xf9f9ebec, 0xf9fa2020, 0xf9f9dfe0, 0xf9fa2e2e, 0xf9f9d1d2,
+        0x0c0c0000, 0x0c0c0202, 0x0c0bfdfe, 0x0c0c0606, 0x0c0bf9fa, 0x0c0c0c0c, 0x0c0bf3f4, 0x0c0c1414,
+        0x0c0bebec, 0x0c0c2020, 0x0c0bdfe0, 0x0c0c2e2e, 0x0c0bd1d2, 0xf3f40000, 0xf3f40202, 0xf3f3fdfe,
+        0xf3f40606, 0xf3f3f9fa, 0xf3f40c0c, 0xf3f3f3f4, 0xf3f41414, 0xf3f3ebec, 0xf3f42020, 0xf3f3dfe0,
+        0xf3f42e2e, 0xf3f3d1d2, 0x14140000, 0x14140202, 0x1413fdfe, 0x14140606, 0x1413f9fa, 0x14140c0c,
+        0x1413f3f4, 0x14141414, 0x1413ebec, 0x14142020, 0x1413dfe0, 0x14142e2e, 0x1413d1d2, 0xebec0000,
+        0xebec0202, 0xebebfdfe, 0xebec0606, 0xebebf9fa, 0xebec0c0c, 0xebebf3f4, 0xebec1414, 0xebebebec,
+        0xebec2020, 0xebebdfe0, 0xebec2e2e, 0xebebd1d2, 0x20200000, 0x20200202, 0x201ffdfe, 0x20200606,
+        0x201ff9fa, 0x20200c0c, 0x201ff3f4, 0x20201414, 0x201febec, 0x20202020, 0x201fdfe0, 0x20202e2e,
+        0x201fd1d2, 0xdfe00000, 0xdfe00202, 0xdfdffdfe, 0xdfe00606, 0xdfdff9fa, 0xdfe00c0c, 0xdfdff3f4,
+        0xdfe01414, 0xdfdfebec, 0xdfe02020, 0xdfdfdfe0, 0xdfe02e2e, 0xdfdfd1d2, 0x2e2e0000, 0x2e2e0202,
+        0x2e2dfdfe, 0x2e2e0606, 0x2e2df9fa, 0x2e2e0c0c, 0x2e2df3f4, 0x2e2e1414, 0x2e2debec, 0x2e2e2020,
+        0x2e2ddfe0, 0x2e2e2e2e, 0x2e2dd1d2, 0xd1d20000, 0xd1d20202, 0xd1d1fdfe, 0xd1d20606, 0xd1d1f9fa,
+        0xd1d20c0c, 0xd1d1f3f4, 0xd1d21414, 0xd1d1ebec, 0xd1d22020, 0xd1d1dfe0, 0xd1d22e2e, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
 };
 
 
 static const uint32_t correctionloworder[] = {
-	0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x04040404, 
-	0xfbfbfbfc, 0x05050101, 0xfafafeff, 0x01010505, 0xfefefafb, 0x0403fbfc, 0xfbfc0404, 0x0605fdfe, 
-	0xf9fa0202, 0xfdfe0606, 0x0201f9fa, 0x09090404, 0xf6f6fbfc, 0x04040909, 0xfbfbf6f7, 0x09090909, 
-	0xf6f6f6f7, 0x0a0a0101, 0xf5f5feff, 0x01010a0a, 0xfefef5f6, 0x0807fafb, 0xf7f80505, 0xfafb0808, 
-	0x0504f7f8, 0x0f0f0909, 0xf0f0f6f7, 0x09090f0f, 0xf6f6f0f1, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 
-	0x0302f3f4, 0x10100404, 0xefeffbfc, 0x04041010, 0xfbfbeff0, 0x10101010, 0xefefeff0, 0x12120000, 
-	0xedee0000, 0x00001212, 0xffffedee, 0x0c0bf3f4, 0xf3f40c0c, 0x100ff6f7, 0xeff00909, 0xf6f71010, 
-	0x0908eff0, 0x1b1b0b0b, 0xe4e4f4f5, 0x0b0b1b1b, 0xf4f4e4e5, 0x1c1c1313, 0xe3e3eced, 0x13131c1c, 
-	0xecece3e4, 0x1615f9fa, 0xe9ea0606, 0xf9fa1616, 0x0605e9ea, 0x1d1d0404, 0xe2e2fbfc, 0x04041d1d, 
-	0xfbfbe2e3, 0x1e1e1e1e, 0xe1e1e1e2, 0x2120fdfe, 0xdedf0202, 0xfdfe2121, 0x0201dedf, 0x1716edee, 
-	0xe8e91212, 0xedee1717, 0x1211e8e9, 0x1e1df0f1, 0xe1e20f0f, 0xf0f11e1e, 0x0f0ee1e2, 0x2e2e1616, 
-	0xd1d1e9ea, 0x16162e2e, 0xe9e9d1d2, 0x2f2f0d0d, 0xd0d0f2f3, 0x0d0d2f2f, 0xf2f2d0d1, 0x31312323, 
-	0xcecedcdd, 0x23233131, 0xdcdccecf, 0x2928f4f5, 0xd6d70b0b, 0xf4f52929, 0x0b0ad6d7, 0x33330404, 
-	0xccccfbfc, 0x04043333, 0xfbfbcccd, 0x36363636, 0xc9c9c9ca, 0x2221ddde, 0xddde2222, 0x2a29e2e3, 
-	0xd5d61d1d, 0xe2e32a2a, 0x1d1cd5d6, 0x3c3bf9fa, 0xc3c40606, 0xf9fa3c3c, 0x0605c3c4, 0x4c4c1b1b, 
-	0xb3b3e4e5, 0x1b1b4c4c, 0xe4e4b3b4, 0x4d4d2b2b, 0xb2b2d4d5, 0x2b2b4d4d, 0xd4d4b2b3, 0x3736e7e8, 
-	0xc8c91818, 0xe7e83737, 0x1817c8c9, 0x4f4f0e0e, 0xb0b0f1f2, 0x0e0e4f4f, 0xf1f1b0b1, 0x53533f3f, 
-	0xacacc0c1, 0x3f3f5353, 0xc0c0acad, 0x4a49ebec, 0xb5b61414, 0xebec4a4a, 0x1413b5b6, 0x58580202, 
-	0xa7a7fdfe, 0x02025858, 0xfdfda7a8, 0x5d5d5d5d, 0xa2a2a2a3, 0x3d3ccbcc, 0xc2c33434, 0xcbcc3d3d, 
-	0x3433c2c3, 0x78783434, 0x8787cbcc, 0x34347878, 0xcbcb8788, 0x4b4ad2d3, 0xb4b52d2d, 0xd2d34b4b, 
-	0x2d2cb4b5, 0x7d7d4b4b, 0x8282b4b5, 0x4b4b7d7d, 0xb4b48283, 0x7a7a2121, 0x8585dedf, 0x21217a7a, 
-	0xdede8586, 0x6766f2f3, 0x98990d0d, 0xf2f36767, 0x0d0c9899, 0x605fd7d8, 0x9fa02828, 0xd7d86060, 
-	0x28279fa0, 0x7f7eddde, 0x80812222, 0xddde7f7f, 0x22218081, 0x5958a6a7, 0xa6a75959, 0x6968b1b2, 
-	0x96974e4e, 0xb1b26969, 0x4e4d9697, 0x0c0c0c0c, 0xf3f3f3f4, 0x17171717, 0xe8e8e8e9, 0x2a2a2a2a, 
-	0xd5d5d5d6, 0x49494949, 0xb6b6b6b7, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 
-	0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 
-	0x0302feff, 0x0302feff, 0x0302feff, 0x0302feff, 0x0302feff, 0x0302feff, 0x0302feff, 0xfcfd0101, 
-	0xfcfd0101, 0xfcfd0101, 0xfcfd0101, 0xfcfd0101, 0xfcfd0101, 0xfcfd0101, 0xfeff0303, 0xfeff0303, 
-	0xfeff0303, 0xfeff0303, 0xfeff0303, 0xfeff0303, 0xfeff0303, 0x0100fcfd, 0x0100fcfd, 0x0100fcfd, 
-	0x0100fcfd, 0x0100fcfd, 0x0100fcfd, 0x0100fcfd, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 
-	0xf8f8f8f9, 0x08080202, 0xf7f7fdfe, 0x02020808, 0xfdfdf7f8, 0x0908fdfe, 0xf6f70202, 0xfdfe0909, 
-	0x0201f6f7, 0x0605f9fa, 0xf9fa0606, 0x0d0d0606, 0xf2f2f9fa, 0x06060d0d, 0xf9f9f2f3, 0x0d0d0d0d, 
-	0xf2f2f2f3, 0x0e0e0101, 0xf1f1feff, 0x01010e0e, 0xfefef1f2, 0x0c0bf7f8, 0xf3f40808, 0xf7f80c0c, 
-	0x0807f3f4, 0x17170e0e, 0xe8e8f1f2, 0x0e0e1717, 0xf1f1e8e9, 0x1211fafb, 0xedee0505, 0xfafb1212, 
-	0x0504edee, 0x18180606, 0xe7e7f9fa, 0x06061818, 0xf9f9e7e8, 0x18181818, 0xe7e7e7e8, 0x1b1afeff, 
-	0xe4e50101, 0xfeff1b1b, 0x0100e4e5, 0x1110eeef, 0xeeef1111, 0x1716f2f3, 0xe8e90d0d, 0xf2f31717, 
-	0x0d0ce8e9, 0x28281010, 0xd7d7eff0, 0x10102828, 0xefefd7d8, 0x29291c1c, 0xd6d6e3e4, 0x1c1c2929, 
-	0xe3e3d6d7, 0x2120f6f7, 0xdedf0909, 0xf6f72121, 0x0908dedf, 0x2b2b0606, 0xd4d4f9fa, 0x06062b2b, 
-	0xf9f9d4d5, 0x2e2e2e2e, 0xd1d1d1d2, 0x3231fbfc, 0xcdce0404, 0xfbfc3232, 0x0403cdce, 0x2221e4e5, 
-	0xddde1b1b, 0xe4e52222, 0x1b1addde, 0x2d2ce9ea, 0xd2d31616, 0xe9ea2d2d, 0x1615d2d3, 0x45452222, 
-	0xbabaddde, 0x22224545, 0xddddbabb, 0x46461313, 0xb9b9eced, 0x13134646, 0xececb9ba, 0x49493535, 
-	0xb6b6cacb, 0x35354949, 0xcacab6b7, 0x3e3deeef, 0xc1c21111, 0xeeef3e3e, 0x1110c1c2, 0x4d4d0505, 
-	0xb2b2fafb, 0x05054d4d, 0xfafab2b3, 0x52525252, 0xadadadae, 0x3332cccd, 0xcccd3333, 0x403fd4d5, 
-	0xbfc02b2b, 0xd4d54040, 0x2b2abfc0, 0x5a59f5f6, 0xa5a60a0a, 0xf5f65a5a, 0x0a09a5a6, 0x72722929, 
-	0x8d8dd6d7, 0x29297272, 0xd6d68d8e, 0x74744040, 0x8b8bbfc0, 0x40407474, 0xbfbf8b8c, 0x5251dadb, 
-	0xadae2525, 0xdadb5252, 0x2524adae, 0x77771616, 0x8888e9ea, 0x16167777, 0xe9e98889, 0x7c7c5f5f, 
-	0x8383a0a1, 0x5f5f7c7c, 0xa0a08384, 0x6f6ee1e2, 0x90911e1e, 0xe1e26f6f, 0x1e1d9091, 0x5c5bb1b2, 
-	0xa3a44e4e, 0xb1b25c5c, 0x4e4da3a4, 0x7170bbbc, 0x8e8f4444, 0xbbbc7171, 0x44438e8f, 0x12121212, 
-	0xedededee, 0x22222222, 0xddddddde, 0x3f3f3f3f, 0xc0c0c0c1, 0x6d6d6d6d, 0x92929293, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 
-	0x03030303, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 
-	0xfcfcfcfd, 0xfcfcfcfd, 0x0403feff, 0x0403feff, 0x0403feff, 0x0403feff, 0x0403feff, 0x0403feff, 
-	0x0403feff, 0x0403feff, 0x0403feff, 0xfbfc0101, 0xfbfc0101, 0xfbfc0101, 0xfbfc0101, 0xfbfc0101, 
-	0xfbfc0101, 0xfbfc0101, 0xfbfc0101, 0xfbfc0101, 0xfeff0404, 0xfeff0404, 0xfeff0404, 0xfeff0404, 
-	0xfeff0404, 0xfeff0404, 0xfeff0404, 0xfeff0404, 0xfeff0404, 0x0100fbfc, 0x0100fbfc, 0x0100fbfc, 
-	0x0100fbfc, 0x0100fbfc, 0x0100fbfc, 0x0100fbfc, 0x0100fbfc, 0x0100fbfc, 0x07070707, 0x07070707, 
-	0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0xf8f8f8f9, 
-	0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 
-	0xf5f5fcfd, 0x03030a0a, 0xfcfcf5f6, 0x09090909, 0xf6f6f6f7, 0x0706f8f9, 0xf8f90707, 0x0c0bfcfd, 
-	0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x11110808, 0xeeeef7f8, 0x08081111, 0xf7f7eeef, 0x11111111, 
-	0xeeeeeeef, 0x13130101, 0xececfeff, 0x01011313, 0xfefeeced, 0x100ff4f5, 0xeff00b0b, 0xf4f51010, 
-	0x0b0aeff0, 0x1716f9fa, 0xe8e90606, 0xf9fa1717, 0x0605e8e9, 0x1f1f1212, 0xe0e0edee, 0x12121f1f, 
-	0xedede0e1, 0x20200808, 0xdfdff7f8, 0x08082020, 0xf7f7dfe0, 0x21212121, 0xdedededf, 0x2423feff, 
-	0xdbdc0101, 0xfeff2424, 0x0100dbdc, 0x1716e8e9, 0xe8e91717, 0x1f1eeeef, 0xe0e11111, 0xeeef1f1f, 
-	0x1110e0e1, 0x36361515, 0xc9c9eaeb, 0x15153636, 0xeaeac9ca, 0x37372525, 0xc8c8dadb, 0x25253737, 
-	0xdadac8c9, 0x2c2bf3f4, 0xd3d40c0c, 0xf3f42c2c, 0x0c0bd3d4, 0x39390808, 0xc6c6f7f8, 0x08083939, 
-	0xf7f7c6c7, 0x3d3d3d3d, 0xc2c2c2c3, 0x4241fafb, 0xbdbe0505, 0xfafb4242, 0x0504bdbe, 0x2d2cdbdc, 
-	0xd2d32424, 0xdbdc2d2d, 0x2423d2d3, 0x3c3be2e3, 0xc3c41d1d, 0xe2e33c3c, 0x1d1cc3c4, 0x5c5c2d2d, 
-	0xa3a3d2d3, 0x2d2d5c5c, 0xd2d2a3a4, 0x5d5d1919, 0xa2a2e6e7, 0x19195d5d, 0xe6e6a2a3, 0x61614747, 
-	0x9e9eb8b9, 0x47476161, 0xb8b89e9f, 0x5352e9ea, 0xacad1616, 0xe9ea5353, 0x1615acad, 0x66660707, 
-	0x9999f8f9, 0x07076666, 0xf8f8999a, 0x6d6d6d6d, 0x92929293, 0x4443bbbc, 0xbbbc4444, 0x5554c6c7, 
-	0xaaab3939, 0xc6c75555, 0x3938aaab, 0x7877f2f3, 0x87880d0d, 0xf2f37878, 0x0d0c8788, 0x6e6dcecf, 
-	0x91923131, 0xcecf6e6e, 0x31309192, 0x7b7a9798, 0x84856868, 0x97987b7b, 0x68678485, 0x18181818, 
-	0xe7e7e7e8, 0x2e2e2e2e, 0xd1d1d1d2, 0x54545454, 0xabababac, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04040404, 
-	0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 
-	0x04040404, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 
-	0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0x0504feff, 0x0504feff, 0x0504feff, 0x0504feff, 0x0504feff, 
-	0x0504feff, 0x0504feff, 0x0504feff, 0x0504feff, 0x0504feff, 0xfafb0101, 0xfafb0101, 0xfafb0101, 
-	0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfeff0505, 
-	0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505, 
-	0xfeff0505, 0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0100fafb, 
-	0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 
-	0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 
-	0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0x03030a0a, 
-	0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a, 
-	0x03030a0a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 
-	0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x03030d0d, 0xfcfcf2f3, 0x0908f6f7, 0xf6f70909, 0x0f0efbfc, 
-	0xf0f10404, 0xfbfc0f0f, 0x0403f0f1, 0x16160b0b, 0xe9e9f4f5, 0x0b0b1616, 0xf4f4e9ea, 0x15151515, 
-	0xeaeaeaeb, 0x18180202, 0xe7e7fdfe, 0x02021818, 0xfdfde7e8, 0x1413f1f2, 0xebec0e0e, 0xf1f21414, 
-	0x0e0debec, 0x26261717, 0xd9d9e8e9, 0x17172626, 0xe8e8d9da, 0x1d1cf7f8, 0xe2e30808, 0xf7f81d1d, 
-	0x0807e2e3, 0x27270b0b, 0xd8d8f4f5, 0x0b0b2727, 0xf4f4d8d9, 0x29292929, 0xd6d6d6d7, 0x2d2cfeff, 
-	0xd2d30101, 0xfeff2d2d, 0x0100d2d3, 0x1d1ce2e3, 0xe2e31d1d, 0x2726e9ea, 0xd8d91616, 0xe9ea2727, 
-	0x1615d8d9, 0x43431b1b, 0xbcbce4e5, 0x1b1b4343, 0xe4e4bcbd, 0x45452f2f, 0xbabad0d1, 0x2f2f4545, 
-	0xd0d0babb, 0x3837f0f1, 0xc7c80f0f, 0xf0f13838, 0x0f0ec7c8, 0x47470b0b, 0xb8b8f4f5, 0x0b0b4747, 
-	0xf4f4b8b9, 0x4c4c4c4c, 0xb3b3b3b4, 0x5352f9fa, 0xacad0606, 0xf9fa5353, 0x0605acad, 0x3938d2d3, 
-	0xc6c72d2d, 0xd2d33939, 0x2d2cc6c7, 0x4b4adbdc, 0xb4b52424, 0xdbdc4b4b, 0x2423b4b5, 0x73733838, 
-	0x8c8cc7c8, 0x38387373, 0xc7c78c8d, 0x75751f1f, 0x8a8ae0e1, 0x1f1f7575, 0xe0e08a8b, 0x7a7a5858, 
-	0x8585a7a8, 0x58587a7a, 0xa7a78586, 0x6867e3e4, 0x97981c1c, 0xe3e46868, 0x1c1b9798, 0x5554aaab, 
-	0xaaab5555, 0x6a69b7b8, 0x95964848, 0xb7b86a6a, 0x48479596, 0x1e1e1e1e, 0xe1e1e1e2, 0x3a3a3a3a, 
-	0xc5c5c5c6, 0x69696969, 0x96969697, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x05050505, 0x05050505, 
-	0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 
-	0x05050505, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 
-	0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 
-	0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0xf8f90202, 
-	0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202, 
-	0xf8f90202, 0xf8f90202, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 
-	0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 
-	0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 
-	0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 
-	0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 
-	0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0x0d0d0303, 0x0d0d0303, 
-	0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 
-	0x0d0d0303, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 
-	0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 
-	0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0xfbfbf0f1, 0x0b0af4f5, 0xf4f50b0b, 0x1211fafb, 
-	0xedee0505, 0xfafb1212, 0x0504edee, 0x1a1a0d0d, 0xe5e5f2f3, 0x0d0d1a1a, 0xf2f2e5e6, 0x1a1a1a1a, 
-	0xe5e5e5e6, 0x1d1d0202, 0xe2e2fdfe, 0x02021d1d, 0xfdfde2e3, 0x1817eff0, 0xe7e81010, 0xeff01818, 
-	0x100fe7e8, 0x2e2e1c1c, 0xd1d1e3e4, 0x1c1c2e2e, 0xe3e3d1d2, 0x2322f6f7, 0xdcdd0909, 0xf6f72323, 
-	0x0908dcdd, 0x2f2f0d0d, 0xd0d0f2f3, 0x0d0d2f2f, 0xf2f2d0d1, 0x31313131, 0xcecececf, 0x3635feff, 
-	0xc9ca0101, 0xfeff3636, 0x0100c9ca, 0x2322dcdd, 0xdcdd2323, 0x2f2ee5e6, 0xd0d11a1a, 0xe5e62f2f, 
-	0x1a19d0d1, 0x51512020, 0xaeaedfe0, 0x20205151, 0xdfdfaeaf, 0x53533838, 0xacacc7c8, 0x38385353, 
-	0xc7c7acad, 0x4342edee, 0xbcbd1212, 0xedee4343, 0x1211bcbd, 0x56560d0d, 0xa9a9f2f3, 0x0d0d5656, 
-	0xf2f2a9aa, 0x5b5b5b5b, 0xa4a4a4a5, 0x6362f8f9, 0x9c9d0707, 0xf8f96363, 0x07069c9d, 0x4443c9ca, 
-	0xbbbc3636, 0xc9ca4444, 0x3635bbbc, 0x5a59d3d4, 0xa5a62c2c, 0xd3d45a5a, 0x2c2ba5a6, 0x7c7bdedf, 
-	0x83842121, 0xdedf7c7c, 0x21208384, 0x67669899, 0x98996767, 0x7f7ea9aa, 0x80815656, 0xa9aa7f7f, 
-	0x56558081, 0x25252525, 0xdadadadb, 0x45454545, 0xbabababb, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 
-	0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 
-	0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 
-	0xf9f9f9fa, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 
-	0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0xf7f80202, 0xf7f80202, 0xf7f80202, 
-	0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202, 
-	0xf7f80202, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 
-	0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 
-	0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 
-	0x0201f7f8, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 
-	0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 
-	0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 
-	0xf2f2f2f3, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 
-	0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 
-	0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 
-	0xf0f0fbfc, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 
-	0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 
-	0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0xfafaedee, 0x0d0cf2f3, 0xf2f30d0d, 0x1514f9fa, 
-	0xeaeb0606, 0xf9fa1515, 0x0605eaeb, 0x1e1e0f0f, 0xe1e1f0f1, 0x0f0f1e1e, 0xf0f0e1e2, 0x1e1e1e1e, 
-	0xe1e1e1e2, 0x22220202, 0xddddfdfe, 0x02022222, 0xfdfdddde, 0x1c1beced, 0xe3e41313, 0xeced1c1c, 
-	0x1312e3e4, 0x36362020, 0xc9c9dfe0, 0x20203636, 0xdfdfc9ca, 0x2928f4f5, 0xd6d70b0b, 0xf4f52929, 
-	0x0b0ad6d7, 0x37370f0f, 0xc8c8f0f1, 0x0f0f3737, 0xf0f0c8c9, 0x39393939, 0xc6c6c6c7, 0x3f3efeff, 
-	0xc0c10101, 0xfeff3f3f, 0x0100c0c1, 0x2827d7d8, 0xd7d82828, 0x3736e1e2, 0xc8c91e1e, 0xe1e23737, 
-	0x1e1dc8c9, 0x5e5e2525, 0xa1a1dadb, 0x25255e5e, 0xdadaa1a2, 0x60604141, 0x9f9fbebf, 0x41416060, 
-	0xbebe9fa0, 0x4e4deaeb, 0xb1b21515, 0xeaeb4e4e, 0x1514b1b2, 0x64640f0f, 0x9b9bf0f1, 0x0f0f6464, 
-	0xf0f09b9c, 0x6a6a6a6a, 0x95959596, 0x7473f7f8, 0x8b8c0808, 0xf7f87474, 0x08078b8c, 0x4f4ec0c1, 
-	0xb0b13f3f, 0xc0c14f4f, 0x3f3eb0b1, 0x6968cccd, 0x96973333, 0xcccd6969, 0x33329697, 0x78778788, 
-	0x87887878, 0x2b2b2b2b, 0xd4d4d4d5, 0x50505050, 0xafafafb0, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 
-	0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 
-	0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 
-	0xf8f8f8f9, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 
-	0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0xf5f60303, 0xf5f60303, 0xf5f60303, 
-	0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303, 
-	0xf5f60303, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 
-	0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 
-	0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 
-	0x0302f5f6, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 
-	0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0xefefeff0, 0xefefeff0, 0xefefeff0, 
-	0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 
-	0xefefeff0, 0x12120505, 0x12120505, 0x12120505, 0x12120505, 0x12120505, 0x12120505, 0x12120505, 
-	0x12120505, 0x12120505, 0x12120505, 0x12120505, 0x12120505, 0xededfafb, 0xededfafb, 0xededfafb, 
-	0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb, 
-	0xededfafb, 0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x05051212, 
-	0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 
-	0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0xfafaebec, 0x0f0ef0f1, 0xf0f10f0f, 0x1817f8f9, 
-	0xe7e80707, 0xf8f91818, 0x0706e7e8, 0x23231111, 0xdcdceeef, 0x11112323, 0xeeeedcdd, 0x22222222, 
-	0xddddddde, 0x26260303, 0xd9d9fcfd, 0x03032626, 0xfcfcd9da, 0x201fe9ea, 0xdfe01616, 0xe9ea2020, 
-	0x1615dfe0, 0x3d3d2525, 0xc2c2dadb, 0x25253d3d, 0xdadac2c3, 0x2f2ef2f3, 0xd0d10d0d, 0xf2f32f2f, 
-	0x0d0cd0d1, 0x3f3f1111, 0xc0c0eeef, 0x11113f3f, 0xeeeec0c1, 0x41414141, 0xbebebebf, 0x4847feff, 
-	0xb7b80101, 0xfeff4848, 0x0100b7b8, 0x2e2dd1d2, 0xd1d22e2e, 0x3f3edcdd, 0xc0c12323, 0xdcdd3f3f, 
-	0x2322c0c1, 0x6b6b2b2b, 0x9494d4d5, 0x2b2b6b6b, 0xd4d49495, 0x6e6e4b4b, 0x9191b4b5, 0x4b4b6e6e, 
-	0xb4b49192, 0x5958e7e8, 0xa6a71818, 0xe7e85959, 0x1817a6a7, 0x72721111, 0x8d8deeef, 0x11117272, 
-	0xeeee8d8e, 0x79797979, 0x86868687, 0x5b5ab7b8, 0xa4a54848, 0xb7b85b5b, 0x4847a4a5, 0x7877c5c6, 
-	0x87883a3a, 0xc5c67878, 0x3a398788, 0x31313131, 0xcecececf, 0x5c5c5c5c, 0xa3a3a3a4, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 
-	0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0xf7f7f7f8, 
-	0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 
-	0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 
-	0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0xf4f50303, 
-	0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303, 
-	0xf4f50303, 0xf4f50303, 0xf4f50303, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 
-	0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0x0302f4f5, 
-	0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 
-	0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 
-	0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0xedededee, 
-	0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 
-	0xedededee, 0xedededee, 0xedededee, 0x14140505, 0x14140505, 0x14140505, 0x14140505, 0x14140505, 
-	0x14140505, 0x14140505, 0x14140505, 0x14140505, 0x14140505, 0x14140505, 0x14140505, 0xebebfafb, 
-	0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb, 
-	0xebebfafb, 0xebebfafb, 0xebebfafb, 0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x05051414, 
-	0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 
-	0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x1110eeef, 0xeeef1111, 0x1b1af8f9, 
-	0xe4e50707, 0xf8f91b1b, 0x0706e4e5, 0x27271313, 0xd8d8eced, 0x13132727, 0xececd8d9, 0x27272727, 
-	0xd8d8d8d9, 0x2b2b0303, 0xd4d4fcfd, 0x03032b2b, 0xfcfcd4d5, 0x2423e7e8, 0xdbdc1818, 0xe7e82424, 
-	0x1817dbdc, 0x45452a2a, 0xbabad5d6, 0x2a2a4545, 0xd5d5babb, 0x3534f1f2, 0xcacb0e0e, 0xf1f23535, 
-	0x0e0dcacb, 0x47471313, 0xb8b8eced, 0x13134747, 0xececb8b9, 0x49494949, 0xb6b6b6b7, 0x504ffdfe, 
-	0xafb00202, 0xfdfe5050, 0x0201afb0, 0x3433cbcc, 0xcbcc3434, 0x4645d8d9, 0xb9ba2727, 0xd8d94646, 
-	0x2726b9ba, 0x79793030, 0x8686cfd0, 0x30307979, 0xcfcf8687, 0x7c7c5454, 0x8383abac, 0x54547c7c, 
-	0xabab8384, 0x6463e4e5, 0x9b9c1b1b, 0xe4e56464, 0x1b1a9b9c, 0x6665aeaf, 0x999a5151, 0xaeaf6666, 
-	0x5150999a, 0x37373737, 0xc8c8c8c9, 0x68686868, 0x97979798, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 
-	0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0xf6f6f6f7, 
-	0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 
-	0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 
-	0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 
-	0x0c0bfcfd, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 
-	0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xfcfd0c0c, 0xfcfd0c0c, 
-	0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 
-	0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 
-	0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 
-	0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 
-	0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec, 0xebebebec, 0xebebebec, 
-	0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 
-	0xebebebec, 0xebebebec, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 
-	0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0xe8e8f9fa, 
-	0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 
-	0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0x06061717, 0x06061717, 0x06061717, 0x06061717, 
-	0x06061717, 0x06061717, 0x06061717, 0x06061717, 0x06061717, 0x06061717, 0x06061717, 0x06061717, 
-	0x06061717, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 
-	0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 
-	0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x0403fbfc, 0xfbfc0404, 0x0605fdfe, 
-	0xf9fa0202, 0xfdfe0606, 0x0201f9fa, 0x08080404, 0xf7f7fbfc, 0x04040808, 0xfbfbf7f8, 0x08080808, 
-	0xf7f7f7f8, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x0807fbfc, 0xf7f80404, 0xfbfc0808, 
-	0x0403f7f8, 0x0e0e0808, 0xf1f1f7f8, 0x08080e0e, 0xf7f7f1f2, 0x0c0bfdfe, 0xf3f40202, 0xfdfe0c0c, 
-	0x0201f3f4, 0x10100404, 0xefeffbfc, 0x04041010, 0xfbfbeff0, 0x10101010, 0xefefeff0, 0x12120000, 
-	0xedee0000, 0x00001212, 0xffffedee, 0x0c0bf3f4, 0xf3f40c0c, 0x100ff7f8, 0xeff00808, 0xf7f81010, 
-	0x0807eff0, 0x1a1a0a0a, 0xe5e5f5f6, 0x0a0a1a1a, 0xf5f5e5e6, 0x1c1c1212, 0xe3e3edee, 0x12121c1c, 
-	0xedede3e4, 0x1615f9fa, 0xe9ea0606, 0xf9fa1616, 0x0605e9ea, 0x1c1c0404, 0xe3e3fbfc, 0x04041c1c, 
-	0xfbfbe3e4, 0x1e1e1e1e, 0xe1e1e1e2, 0x201ffdfe, 0xdfe00202, 0xfdfe2020, 0x0201dfe0, 0x1615edee, 
-	0xe9ea1212, 0xedee1616, 0x1211e9ea, 0x1e1df1f2, 0xe1e20e0e, 0xf1f21e1e, 0x0e0de1e2, 0x2e2e1616, 
-	0xd1d1e9ea, 0x16162e2e, 0xe9e9d1d2, 0x2e2e0c0c, 0xd1d1f3f4, 0x0c0c2e2e, 0xf3f3d1d2, 0x30302222, 
-	0xcfcfddde, 0x22223030, 0xddddcfd0, 0x2827f5f6, 0xd7d80a0a, 0xf5f62828, 0x0a09d7d8, 0x32320404, 
-	0xcdcdfbfc, 0x04043232, 0xfbfbcdce, 0x36363636, 0xc9c9c9ca, 0x2221ddde, 0xddde2222, 0x2a29e3e4, 
-	0xd5d61c1c, 0xe3e42a2a, 0x1c1bd5d6, 0x3c3bf9fa, 0xc3c40606, 0xf9fa3c3c, 0x0605c3c4, 0x4c4c1a1a, 
-	0xb3b3e5e6, 0x1a1a4c4c, 0xe5e5b3b4, 0x4c4c2a2a, 0xb3b3d5d6, 0x2a2a4c4c, 0xd5d5b3b4, 0x3635e7e8, 
-	0xc9ca1818, 0xe7e83636, 0x1817c9ca, 0x4e4e0e0e, 0xb1b1f1f2, 0x0e0e4e4e, 0xf1f1b1b2, 0x52523e3e, 
-	0xadadc1c2, 0x3e3e5252, 0xc1c1adae, 0x4a49ebec, 0xb5b61414, 0xebec4a4a, 0x1413b5b6, 0x58580202, 
-	0xa7a7fdfe, 0x02025858, 0xfdfda7a8, 0x5c5c5c5c, 0xa3a3a3a4, 0x3c3bcbcc, 0xc3c43434, 0xcbcc3c3c, 
-	0x3433c3c4, 0x76763434, 0x8989cbcc, 0x34347676, 0xcbcb898a, 0x4a49d3d4, 0xb5b62c2c, 0xd3d44a4a, 
-	0x2c2bb5b6, 0x76764a4a, 0x8989b5b6, 0x4a4a7676, 0xb5b5898a, 0x76762020, 0x8989dfe0, 0x20207676, 
-	0xdfdf898a, 0x6665f3f4, 0x999a0c0c, 0xf3f46666, 0x0c0b999a, 0x605fd7d8, 0x9fa02828, 0xd7d86060, 
-	0x28279fa0, 0x7675ddde, 0x898a2222, 0xddde7676, 0x2221898a, 0x5857a7a8, 0xa7a85858, 0x6867b1b2, 
-	0x97984e4e, 0xb1b26868, 0x4e4d9798, 0x0c0c0c0c, 0xf3f3f3f4, 0x16161616, 0xe9e9e9ea, 0x2a2a2a2a, 
-	0xd5d5d5d6, 0x48484848, 0xb7b7b7b8, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 
-	0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 
-	0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0xfdfe0000, 
-	0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0x00000202, 0x00000202, 
-	0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 
-	0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 
-	0xf9f9f9fa, 0x09090303, 0xf6f6fcfd, 0x03030909, 0xfcfcf6f7, 0x0908fcfd, 0xf6f70303, 0xfcfd0909, 
-	0x0302f6f7, 0x0605f9fa, 0xf9fa0606, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0xf9f9f3f4, 0x0c0c0c0c, 
-	0xf3f3f3f4, 0x0f0f0000, 0xf0f10000, 0x00000f0f, 0xfffff0f1, 0x0c0bf6f7, 0xf3f40909, 0xf6f70c0c, 
-	0x0908f3f4, 0x18180f0f, 0xe7e7f0f1, 0x0f0f1818, 0xf0f0e7e8, 0x1211f9fa, 0xedee0606, 0xf9fa1212, 
-	0x0605edee, 0x18180606, 0xe7e7f9fa, 0x06061818, 0xf9f9e7e8, 0x18181818, 0xe7e7e7e8, 0x1b1b0000, 
-	0xe4e50000, 0x00001b1b, 0xffffe4e5, 0x1211edee, 0xedee1212, 0x1817f3f4, 0xe7e80c0c, 0xf3f41818, 
-	0x0c0be7e8, 0x27270f0f, 0xd8d8f0f1, 0x0f0f2727, 0xf0f0d8d9, 0x2a2a1b1b, 0xd5d5e4e5, 0x1b1b2a2a, 
-	0xe4e4d5d6, 0x2120f6f7, 0xdedf0909, 0xf6f72121, 0x0908dedf, 0x2a2a0606, 0xd5d5f9fa, 0x06062a2a, 
-	0xf9f9d5d6, 0x2d2d2d2d, 0xd2d2d2d3, 0x3332fcfd, 0xcccd0303, 0xfcfd3333, 0x0302cccd, 0x2120e4e5, 
-	0xdedf1b1b, 0xe4e52121, 0x1b1adedf, 0x2d2ceaeb, 0xd2d31515, 0xeaeb2d2d, 0x1514d2d3, 0x45452121, 
-	0xbabadedf, 0x21214545, 0xdedebabb, 0x45451212, 0xbabaedee, 0x12124545, 0xededbabb, 0x48483636, 
-	0xb7b7c9ca, 0x36364848, 0xc9c9b7b8, 0x3f3eedee, 0xc0c11212, 0xedee3f3f, 0x1211c0c1, 0x4e4e0606, 
-	0xb1b1f9fa, 0x06064e4e, 0xf9f9b1b2, 0x51515151, 0xaeaeaeaf, 0x3332cccd, 0xcccd3333, 0x3f3ed5d6, 
-	0xc0c12a2a, 0xd5d63f3f, 0x2a29c0c1, 0x5a59f6f7, 0xa5a60909, 0xf6f75a5a, 0x0908a5a6, 0x72722a2a, 
-	0x8d8dd5d6, 0x2a2a7272, 0xd5d58d8e, 0x75753f3f, 0x8a8ac0c1, 0x3f3f7575, 0xc0c08a8b, 0x5150dbdc, 
-	0xaeaf2424, 0xdbdc5151, 0x2423aeaf, 0x78781515, 0x8787eaeb, 0x15157878, 0xeaea8788, 0x7b7b6060, 
-	0x84849fa0, 0x60607b7b, 0x9f9f8485, 0x6f6ee1e2, 0x90911e1e, 0xe1e26f6f, 0x1e1d9091, 0x5d5cb1b2, 
-	0xa2a34e4e, 0xb1b25d5d, 0x4e4da2a3, 0x7271babb, 0x8d8e4545, 0xbabb7272, 0x45448d8e, 0x12121212, 
-	0xedededee, 0x21212121, 0xdedededf, 0x3f3f3f3f, 0xc0c0c0c1, 0x6c6c6c6c, 0x93939394, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 
-	0x03030303, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 
-	0xfcfcfcfd, 0xfcfcfcfd, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 
-	0x03030000, 0x03030000, 0x03030000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 
-	0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0x00000303, 0x00000303, 0x00000303, 0x00000303, 
-	0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 
-	0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0x06060606, 0x06060606, 
-	0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 
-	0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 
-	0xf7f7fbfc, 0x04040808, 0xfbfbf7f8, 0x08080808, 0xf7f7f7f8, 0x0807f7f8, 0xf7f80808, 0x0c0bfbfc, 
-	0xf3f40404, 0xfbfc0c0c, 0x0403f3f4, 0x10100808, 0xefeff7f8, 0x08081010, 0xf7f7eff0, 0x10101010, 
-	0xefefeff0, 0x14140000, 0xebec0000, 0x00001414, 0xffffebec, 0x100ff3f4, 0xeff00c0c, 0xf3f41010, 
-	0x0c0beff0, 0x1817fbfc, 0xe7e80404, 0xfbfc1818, 0x0403e7e8, 0x20201010, 0xdfdfeff0, 0x10102020, 
-	0xefefdfe0, 0x20200808, 0xdfdff7f8, 0x08082020, 0xf7f7dfe0, 0x20202020, 0xdfdfdfe0, 0x24240000, 
-	0xdbdc0000, 0x00002424, 0xffffdbdc, 0x1817e7e8, 0xe7e81818, 0x201feff0, 0xdfe01010, 0xeff02020, 
-	0x100fdfe0, 0x34341414, 0xcbcbebec, 0x14143434, 0xebebcbcc, 0x38382424, 0xc7c7dbdc, 0x24243838, 
-	0xdbdbc7c8, 0x2c2bf3f4, 0xd3d40c0c, 0xf3f42c2c, 0x0c0bd3d4, 0x38380808, 0xc7c7f7f8, 0x08083838, 
-	0xf7f7c7c8, 0x3c3c3c3c, 0xc3c3c3c4, 0x403ffbfc, 0xbfc00404, 0xfbfc4040, 0x0403bfc0, 0x2c2bdbdc, 
-	0xd3d42424, 0xdbdc2c2c, 0x2423d3d4, 0x3c3be3e4, 0xc3c41c1c, 0xe3e43c3c, 0x1c1bc3c4, 0x5c5c2c2c, 
-	0xa3a3d3d4, 0x2c2c5c5c, 0xd3d3a3a4, 0x5c5c1818, 0xa3a3e7e8, 0x18185c5c, 0xe7e7a3a4, 0x60604848, 
-	0x9f9fb7b8, 0x48486060, 0xb7b79fa0, 0x5453ebec, 0xabac1414, 0xebec5454, 0x1413abac, 0x64640808, 
-	0x9b9bf7f8, 0x08086464, 0xf7f79b9c, 0x6c6c6c6c, 0x93939394, 0x4443bbbc, 0xbbbc4444, 0x5453c7c8, 
-	0xabac3838, 0xc7c85454, 0x3837abac, 0x7877f3f4, 0x87880c0c, 0xf3f47878, 0x0c0b8788, 0x6c6bcfd0, 
-	0x93943030, 0xcfd06c6c, 0x302f9394, 0x7c7b9798, 0x83846868, 0x97987c7c, 0x68678384, 0x18181818, 
-	0xe7e7e7e8, 0x2c2c2c2c, 0xd3d3d3d4, 0x54545454, 0xabababac, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04040404, 
-	0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 
-	0x04040404, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 
-	0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 
-	0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 
-	0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0x00000404, 
-	0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 
-	0x00000404, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 
-	0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0x08080404, 0x08080404, 0x08080404, 0x08080404, 0x08080404, 
-	0x08080404, 0x08080404, 0x08080404, 0x08080404, 0x08080404, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 
-	0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0x04040808, 
-	0x04040808, 0x04040808, 0x04040808, 0x04040808, 0x04040808, 0x04040808, 0x04040808, 0x04040808, 
-	0x04040808, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 
-	0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x05050f0f, 0xfafaf0f1, 0x0a09f5f6, 0xf5f60a0a, 0x0f0efafb, 
-	0xf0f10505, 0xfafb0f0f, 0x0504f0f1, 0x14140a0a, 0xebebf5f6, 0x0a0a1414, 0xf5f5ebec, 0x14141414, 
-	0xebebebec, 0x19190000, 0xe6e70000, 0x00001919, 0xffffe6e7, 0x1413f0f1, 0xebec0f0f, 0xf0f11414, 
-	0x0f0eebec, 0x28281919, 0xd7d7e6e7, 0x19192828, 0xe6e6d7d8, 0x1e1df5f6, 0xe1e20a0a, 0xf5f61e1e, 
-	0x0a09e1e2, 0x28280a0a, 0xd7d7f5f6, 0x0a0a2828, 0xf5f5d7d8, 0x28282828, 0xd7d7d7d8, 0x2d2d0000, 
-	0xd2d30000, 0x00002d2d, 0xffffd2d3, 0x1e1de1e2, 0xe1e21e1e, 0x2827ebec, 0xd7d81414, 0xebec2828, 
-	0x1413d7d8, 0x41411919, 0xbebee6e7, 0x19194141, 0xe6e6bebf, 0x46462d2d, 0xb9b9d2d3, 0x2d2d4646, 
-	0xd2d2b9ba, 0x3736f0f1, 0xc8c90f0f, 0xf0f13737, 0x0f0ec8c9, 0x46460a0a, 0xb9b9f5f6, 0x0a0a4646, 
-	0xf5f5b9ba, 0x4b4b4b4b, 0xb4b4b4b5, 0x5554fafb, 0xaaab0505, 0xfafb5555, 0x0504aaab, 0x3736d2d3, 
-	0xc8c92d2d, 0xd2d33737, 0x2d2cc8c9, 0x4b4adcdd, 0xb4b52323, 0xdcdd4b4b, 0x2322b4b5, 0x73733737, 
-	0x8c8cc8c9, 0x37377373, 0xc8c88c8d, 0x73731e1e, 0x8c8ce1e2, 0x1e1e7373, 0xe1e18c8d, 0x78785a5a, 
-	0x8787a5a6, 0x5a5a7878, 0xa5a58788, 0x6968e1e2, 0x96971e1e, 0xe1e26969, 0x1e1d9697, 0x5554aaab, 
-	0xaaab5555, 0x6968b9ba, 0x96974646, 0xb9ba6969, 0x46459697, 0x1e1e1e1e, 0xe1e1e1e2, 0x3c3c3c3c, 
-	0xc3c3c3c4, 0x69696969, 0x96969697, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x05050505, 0x05050505, 
-	0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 
-	0x05050505, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 
-	0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0x05050000, 0x05050000, 0x05050000, 0x05050000, 
-	0x05050000, 0x05050000, 0x05050000, 0x05050000, 0x05050000, 0x05050000, 0x05050000, 0xfafb0000, 
-	0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000, 
-	0xfafb0000, 0xfafb0000, 0x00000505, 0x00000505, 0x00000505, 0x00000505, 0x00000505, 0x00000505, 
-	0x00000505, 0x00000505, 0x00000505, 0x00000505, 0x00000505, 0xfffffafb, 0xfffffafb, 0xfffffafb, 
-	0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb, 
-	0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 
-	0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 
-	0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0x0f0f0505, 0x0f0f0505, 
-	0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 
-	0x0f0f0505, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 
-	0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 
-	0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0xf9f9f3f4, 0x0c0bf3f4, 0xf3f40c0c, 0x1211f9fa, 
-	0xedee0606, 0xf9fa1212, 0x0605edee, 0x18180c0c, 0xe7e7f3f4, 0x0c0c1818, 0xf3f3e7e8, 0x18181818, 
-	0xe7e7e7e8, 0x1e1e0000, 0xe1e20000, 0x00001e1e, 0xffffe1e2, 0x1817edee, 0xe7e81212, 0xedee1818, 
-	0x1211e7e8, 0x30301e1e, 0xcfcfe1e2, 0x1e1e3030, 0xe1e1cfd0, 0x2423f9fa, 0xdbdc0606, 0xf9fa2424, 
-	0x0605dbdc, 0x30300c0c, 0xcfcff3f4, 0x0c0c3030, 0xf3f3cfd0, 0x30303030, 0xcfcfcfd0, 0x36360000, 
-	0xc9ca0000, 0x00003636, 0xffffc9ca, 0x2423dbdc, 0xdbdc2424, 0x302fe7e8, 0xcfd01818, 0xe7e83030, 
-	0x1817cfd0, 0x4e4e1e1e, 0xb1b1e1e2, 0x1e1e4e4e, 0xe1e1b1b2, 0x54543636, 0xababc9ca, 0x36365454, 
-	0xc9c9abac, 0x4241edee, 0xbdbe1212, 0xedee4242, 0x1211bdbe, 0x54540c0c, 0xababf3f4, 0x0c0c5454, 
-	0xf3f3abac, 0x5a5a5a5a, 0xa5a5a5a6, 0x605ff9fa, 0x9fa00606, 0xf9fa6060, 0x06059fa0, 0x4241c9ca, 
-	0xbdbe3636, 0xc9ca4242, 0x3635bdbe, 0x5a59d5d6, 0xa5a62a2a, 0xd5d65a5a, 0x2a29a5a6, 0x7e7de1e2, 
-	0x81821e1e, 0xe1e27e7e, 0x1e1d8182, 0x6665999a, 0x999a6666, 0x7e7dabac, 0x81825454, 0xabac7e7e, 
-	0x54538182, 0x24242424, 0xdbdbdbdc, 0x42424242, 0xbdbdbdbe, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 
-	0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 
-	0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 
-	0xf9f9f9fa, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 
-	0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 
-	0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 
-	0xf9fa0000, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 
-	0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 
-	0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 
-	0xfffff9fa, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 
-	0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 
-	0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 
-	0xf3f3f3f4, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 
-	0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 
-	0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 
-	0xf3f3f9fa, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 
-	0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 
-	0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0xf8f8eaeb, 0x0e0df1f2, 0xf1f20e0e, 0x1514f8f9, 
-	0xeaeb0707, 0xf8f91515, 0x0706eaeb, 0x1c1c0e0e, 0xe3e3f1f2, 0x0e0e1c1c, 0xf1f1e3e4, 0x1c1c1c1c, 
-	0xe3e3e3e4, 0x23230000, 0xdcdd0000, 0x00002323, 0xffffdcdd, 0x1c1beaeb, 0xe3e41515, 0xeaeb1c1c, 
-	0x1514e3e4, 0x38382323, 0xc7c7dcdd, 0x23233838, 0xdcdcc7c8, 0x2a29f1f2, 0xd5d60e0e, 0xf1f22a2a, 
-	0x0e0dd5d6, 0x38380e0e, 0xc7c7f1f2, 0x0e0e3838, 0xf1f1c7c8, 0x38383838, 0xc7c7c7c8, 0x3f3f0000, 
-	0xc0c10000, 0x00003f3f, 0xffffc0c1, 0x2a29d5d6, 0xd5d62a2a, 0x3837e3e4, 0xc7c81c1c, 0xe3e43838, 
-	0x1c1bc7c8, 0x5b5b2323, 0xa4a4dcdd, 0x23235b5b, 0xdcdca4a5, 0x62623f3f, 0x9d9dc0c1, 0x3f3f6262, 
-	0xc0c09d9e, 0x4d4ceaeb, 0xb2b31515, 0xeaeb4d4d, 0x1514b2b3, 0x62620e0e, 0x9d9df1f2, 0x0e0e6262, 
-	0xf1f19d9e, 0x69696969, 0x96969697, 0x7776f8f9, 0x88890707, 0xf8f97777, 0x07068889, 0x4d4cc0c1, 
-	0xb2b33f3f, 0xc0c14d4d, 0x3f3eb2b3, 0x6968cecf, 0x96973131, 0xcecf6969, 0x31309697, 0x77768889, 
-	0x88897777, 0x2a2a2a2a, 0xd5d5d5d6, 0x4d4d4d4d, 0xb2b2b2b3, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 
-	0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 
-	0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 
-	0xf8f8f8f9, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 
-	0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 
-	0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 
-	0xf8f90000, 0x00000707, 0x00000707, 0x00000707, 0x00000707, 0x00000707, 0x00000707, 0x00000707, 
-	0x00000707, 0x00000707, 0x00000707, 0x00000707, 0x00000707, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 
-	0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 
-	0xfffff8f9, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 
-	0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 
-	0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 
-	0xf1f1f1f2, 0x15150707, 0x15150707, 0x15150707, 0x15150707, 0x15150707, 0x15150707, 0x15150707, 
-	0x15150707, 0x15150707, 0x15150707, 0x15150707, 0x15150707, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 
-	0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 
-	0xeaeaf8f9, 0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x07071515, 
-	0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 
-	0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0xf7f7eff0, 0x100feff0, 0xeff01010, 0x1817f7f8, 
-	0xe7e80808, 0xf7f81818, 0x0807e7e8, 0x20201010, 0xdfdfeff0, 0x10102020, 0xefefdfe0, 0x20202020, 
-	0xdfdfdfe0, 0x28280000, 0xd7d80000, 0x00002828, 0xffffd7d8, 0x201fe7e8, 0xdfe01818, 0xe7e82020, 
-	0x1817dfe0, 0x40402828, 0xbfbfd7d8, 0x28284040, 0xd7d7bfc0, 0x302feff0, 0xcfd01010, 0xeff03030, 
-	0x100fcfd0, 0x40401010, 0xbfbfeff0, 0x10104040, 0xefefbfc0, 0x40404040, 0xbfbfbfc0, 0x48480000, 
-	0xb7b80000, 0x00004848, 0xffffb7b8, 0x302fcfd0, 0xcfd03030, 0x403fdfe0, 0xbfc02020, 0xdfe04040, 
-	0x201fbfc0, 0x68682828, 0x9797d7d8, 0x28286868, 0xd7d79798, 0x70704848, 0x8f8fb7b8, 0x48487070, 
-	0xb7b78f90, 0x5857e7e8, 0xa7a81818, 0xe7e85858, 0x1817a7a8, 0x70701010, 0x8f8feff0, 0x10107070, 
-	0xefef8f90, 0x78787878, 0x87878788, 0x5857b7b8, 0xa7a84848, 0xb7b85858, 0x4847a7a8, 0x7877c7c8, 
-	0x87883838, 0xc7c87878, 0x38378788, 0x30303030, 0xcfcfcfd0, 0x58585858, 0xa7a7a7a8, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 
-	0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0xf7f7f7f8, 
-	0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 
-	0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0x08080000, 0x08080000, 0x08080000, 0x08080000, 0x08080000, 
-	0x08080000, 0x08080000, 0x08080000, 0x08080000, 0x08080000, 0x08080000, 0x08080000, 0xf7f80000, 
-	0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000, 
-	0xf7f80000, 0xf7f80000, 0xf7f80000, 0x00000808, 0x00000808, 0x00000808, 0x00000808, 0x00000808, 
-	0x00000808, 0x00000808, 0x00000808, 0x00000808, 0x00000808, 0x00000808, 0x00000808, 0xfffff7f8, 
-	0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 
-	0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 
-	0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0xefefeff0, 
-	0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 
-	0xefefeff0, 0xefefeff0, 0xefefeff0, 0x10100808, 0x10100808, 0x10100808, 0x10100808, 0x10100808, 
-	0x10100808, 0x10100808, 0x10100808, 0x10100808, 0x10100808, 0x10100808, 0x10100808, 0xefeff7f8, 
-	0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 
-	0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x08081010, 
-	0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 
-	0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x1211edee, 0xedee1212, 0x1b1af6f7, 
-	0xe4e50909, 0xf6f71b1b, 0x0908e4e5, 0x24241212, 0xdbdbedee, 0x12122424, 0xededdbdc, 0x24242424, 
-	0xdbdbdbdc, 0x2d2d0000, 0xd2d30000, 0x00002d2d, 0xffffd2d3, 0x2423e4e5, 0xdbdc1b1b, 0xe4e52424, 
-	0x1b1adbdc, 0x48482d2d, 0xb7b7d2d3, 0x2d2d4848, 0xd2d2b7b8, 0x3635edee, 0xc9ca1212, 0xedee3636, 
-	0x1211c9ca, 0x48481212, 0xb7b7edee, 0x12124848, 0xededb7b8, 0x48484848, 0xb7b7b7b8, 0x51510000, 
-	0xaeaf0000, 0x00005151, 0xffffaeaf, 0x3635c9ca, 0xc9ca3636, 0x4847dbdc, 0xb7b82424, 0xdbdc4848, 
-	0x2423b7b8, 0x75752d2d, 0x8a8ad2d3, 0x2d2d7575, 0xd2d28a8b, 0x7e7e5151, 0x8181aeaf, 0x51517e7e, 
-	0xaeae8182, 0x6362e4e5, 0x9c9d1b1b, 0xe4e56363, 0x1b1a9c9d, 0x6362aeaf, 0x9c9d5151, 0xaeaf6363, 
-	0x51509c9d, 0x36363636, 0xc9c9c9ca, 0x6c6c6c6c, 0x93939394, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 
-	0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0xf6f6f6f7, 
-	0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 
-	0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0x09090000, 0x09090000, 0x09090000, 0x09090000, 
-	0x09090000, 0x09090000, 0x09090000, 0x09090000, 0x09090000, 0x09090000, 0x09090000, 0x09090000, 
-	0x09090000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 
-	0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0x00000909, 0x00000909, 
-	0x00000909, 0x00000909, 0x00000909, 0x00000909, 0x00000909, 0x00000909, 0x00000909, 0x00000909, 
-	0x00000909, 0x00000909, 0x00000909, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 
-	0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 
-	0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 
-	0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0xedededee, 0xedededee, 0xedededee, 
-	0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 
-	0xedededee, 0xedededee, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 
-	0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0xe4e4f6f7, 
-	0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 
-	0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, 
-	0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, 
-	0x09091b1b, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 
-	0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 
-	0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0504fafb, 0xfafb0505, 0xfafb0505, 
-	0x0504fafb, 0x0b0b0606, 0xf4f4f9fa, 0x06060b0b, 0xf9f9f4f5, 0x08080000, 0xf7f80000, 0x00000808, 
-	0xfffff7f8, 0x0b0b0b0b, 0xf4f4f4f5, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x11110c0c, 
-	0xeeeef3f4, 0x0c0c1111, 0xf3f3eeef, 0x11111111, 0xeeeeeeef, 0x12120606, 0xededf9fa, 0x06061212, 
-	0xf9f9edee, 0x0b0af7f8, 0xf4f50808, 0xf7f80b0b, 0x0807f4f5, 0x0f0f0000, 0xf0f10000, 0x00000f0f, 
-	0xfffff0f1, 0x14140000, 0xebec0000, 0x00001414, 0xffffebec, 0x19191212, 0xe6e6edee, 0x12121919, 
-	0xedede6e7, 0x19190b0b, 0xe6e6f4f5, 0x0b0b1919, 0xf4f4e6e7, 0x19191919, 0xe6e6e6e7, 0x0e0df1f2, 
-	0xf1f20e0e, 0xf1f20e0e, 0x0e0df1f2, 0x1a1a0000, 0xe5e60000, 0x00001a1a, 0xffffe5e6, 0x1211f4f5, 
-	0xedee0b0b, 0xf4f51212, 0x0b0aedee, 0x1615f8f9, 0xe9ea0707, 0xf8f91616, 0x0706e9ea, 0x22221a1a, 
-	0xdddde5e6, 0x1a1a2222, 0xe5e5ddde, 0x22221212, 0xddddedee, 0x12122222, 0xededddde, 0x22222222, 
-	0xddddddde, 0x23230b0b, 0xdcdcf4f5, 0x0b0b2323, 0xf4f4dcdd, 0x1d1d0000, 0xe2e30000, 0x00001d1d, 
-	0xffffe2e3, 0x1615eced, 0xe9ea1313, 0xeced1616, 0x1312e9ea, 0x1a19f0f1, 0xe5e60f0f, 0xf0f11a1a, 
-	0x0f0ee5e6, 0x25250000, 0xdadb0000, 0x00002525, 0xffffdadb, 0x2c2c1b1b, 0xd3d3e4e5, 0x1b1b2c2c, 
-	0xe4e4d3d4, 0x2c2c2424, 0xd3d3dbdc, 0x24242c2c, 0xdbdbd3d4, 0x2c2c1212, 0xd3d3edee, 0x12122c2c, 
-	0xededd3d4, 0x2120f5f6, 0xdedf0a0a, 0xf5f62121, 0x0a09dedf, 0x2d2d2d2d, 0xd2d2d2d3, 0x00000000, 
-	0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 
-	0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 
-	0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 
-	0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 
-	0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 
-	0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 
-	0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 
-	0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 
-	0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 
-	0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 
-	0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 
-	0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 
-	0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 
-	0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 
-	0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 
-	0xf8f90000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 
-	0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0403fbfc, 0xfbfc0404, 0xf9fa0a0a, 
-	0x0605f5f6, 0xf3f40000, 0x0c0c0000, 0xf3f3f9fa, 0xf3f40606, 0x0c0bf9fa, 0x0c0c0606, 0xfffff1f2, 
-	0x00000e0e, 0x0c0c0c0c, 0xf3f3f3f4, 0xedee0000, 0x12120000, 0xf3f40e0e, 0x0c0bf1f2, 0xf9f9edee, 
-	0xf9fa1212, 0x0605edee, 0x06061212, 0xededf5f6, 0xedee0a0a, 0x1211f5f6, 0x12120a0a, 0xffffe9ea, 
-	0x00001616, 0xe7e80000, 0x18180000, 0xf3f3e9ea, 0xf3f41616, 0x0c0be9ea, 0x0c0c1616, 0xe7e7f7f8, 
-	0xe7e80808, 0x1817f7f8, 0x18180808, 0xf9f9e5e6, 0xf9fa1a1a, 0x0605e5e6, 0x06061a1a, 0xffffe3e4, 
-	0x00001c1c, 0x14141414, 0xebebebec, 0xe5e5f1f2, 0x1a1a0e0e, 0xf3f3e1e2, 0x0c0c1e1e, 0xdfdff5f6, 
-	0x20200a0a, 0xdfdfedee, 0x20201212, 0xe5e5e5e6, 0x1a1a1a1a, 0xebebddde, 0x14142222, 0xf3f3d9da, 
-	0x0c0c2626, 0xdfdfdfe0, 0x20202020, 0x20202020, 0xd7d7e9ea, 0xddddddde, 0x22222222, 0x00000000, 
-	0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 
-	0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 
-	0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 
-	0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 
-	0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 
-	0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 
-	0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 
-	0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 
-	0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 
-	0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 
-	0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 
-	0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 
-	0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 
-	0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 
-	0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 
-	0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 
-	0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 
-	0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 
-	0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 
-	0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 
-	0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 
-	0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x0605f9fa, 0xf9fa0606, 0xf7f80e0e, 
-	0x0807f1f2, 0xffffedee, 0x00001212, 0xeff00a0a, 0x100ff5f6, 0xe7e80000, 0x18180000, 0xf7f7e7e8, 
-	0xf7f81818, 0x0807e7e8, 0x08081818, 0x12121212, 0xedededee, 0xeff01414, 0x100febec, 0xe5e5f1f2, 
-	0xe5e60e0e, 0x1a19f1f2, 0x1a1a0e0e, 0xffffe1e2, 0x00001e1e, 0xddde0000, 0x22220000, 0xf7f7ddde, 
-	0xf7f82222, 0x0807ddde, 0x08082222, 0xedede1e2, 0xedee1e1e, 0x1211e1e2, 0x12121e1e, 0xddddf5f6, 
-	0xddde0a0a, 0x2221f5f6, 0x22220a0a, 0xddddebec, 0x22221414, 0xffffd7d8, 0x00002828, 0x1e1e1e1e, 
-	0xe1e1e1e2, 0xededd7d8, 0x12122828, 0xd3d40000, 0x2c2c0000, 0xd3d3eff0, 0x2c2c1010, 0xdbdbdbdc, 
-	0xdbdbdbdc, 0x24242424, 0xd3d3e5e6, 0x2c2c1a1a, 0xe5e5d1d2, 0x1a1a2e2e, 0xededcbcc, 0x12123434, 
-	0xc9c9ebec, 0xd3d3d3d4, 0x2c2c2c2c, 0xc9c9dfe0, 0xd1d1d1d2, 0xd1d1d1d2, 0x2e2e2e2e, 0x00000000, 
-	0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 
-	0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 
-	0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 
-	0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 
-	0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 
-	0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 
-	0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 
-	0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 
-	0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 
-	0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 
-	0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 
-	0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 
-	0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 
-	0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 
-	0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 
-	0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 
-	0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 
-	0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 
-	0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 
-	0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 
-	0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 
-	0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x0807f7f8, 0xf7f80808, 0xeff00808, 
-	0x100ff7f8, 0xe7e80000, 0x18180000, 0xf7f7e7e8, 0xf7f81818, 0x0807e7e8, 0x08081818, 0xeff01414, 
-	0x100febec, 0xffffe3e4, 0x00001c1c, 0xe7e7eff0, 0xe7e81010, 0x1817eff0, 0x18181010, 0xdfe00000, 
-	0x20200000, 0xefefe3e4, 0xeff01c1c, 0x100fe3e4, 0x10101c1c, 0xdfdff7f8, 0xdfe00808, 0xf7f7dfe0, 
-	0xf7f82020, 0x0807dfe0, 0x08082020, 0x201ff7f8, 0x20200808, 0x18181818, 0xe7e7e7e8, 0xe7e81818, 
-	0x1817e7e8, 0xdfdfebec, 0x20201414, 0xffffd7d8, 0x00002828, 0xefefd7d8, 0x10102828, 0xd3d40000, 
-	0xd3d40000, 0xffffd3d4, 0x00002c2c, 0x2c2c0000, 0x2c2c0000, 0xdfdfdfe0, 0x20202020, 0xd3d3eff0, 
-	0x2c2c1010, 0xd3d3e7e8, 0xe7e7d3d4, 0x18182c2c, 0x2c2c1818, 0xefefcfd0, 0x10103030, 0xdbdbdbdc, 
-	0xdbdbdbdc, 0x24242424, 0x24242424, 0xcbcbebec, 0x28282828, 0xd7d7d7d8, 0xcbcbdfe0, 0x00000000, 
-	0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 
-	0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 
-	0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 
-	0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 
-	0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 
-	0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 
-	0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 
-	0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 
-	0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 
-	0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 
-	0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 
-	0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 
-	0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 
-	0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 
-	0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 
-	0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 
-	0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 
-	0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 
-	0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 
-	0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 
-	0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 
-	0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 
-	0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 
-	0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 
-	0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 
-	0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 
-	0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 
-	0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 
-	0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 
-	0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 
-	0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 
-	0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 
-	0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 
-	0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 
-	0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 
-	0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 
-	0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 
-	0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 
-	0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 
-	0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 
-	0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 
-	0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 
-	0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 
-	0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 
-	0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 
-	0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 
-	0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 
-	0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 
-	0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 
-	0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 
-	0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 
-	0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 
-	0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 
-	0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 
-	0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 
-	0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 
-	0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 
-	0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 
-	0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 
-	0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 
-	0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 
-	0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 
-	0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 
-	0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 
-	0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 
-	0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 
-	0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 
-	0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 
-	0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 
-	0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 
-	0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 
-	0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 
-	0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 
-	0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 
-	0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 
-	0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 
-	0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 
-	0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 
-	0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 
-	0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 
-	0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 
-	0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 
-	0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 
-	0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 
-	0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 
-	0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 
-	0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 
-	0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 
-	0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 
-	0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 
-	0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 
-	0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 
-	0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 
-	0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 
-	0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 
-	0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 
-	0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 
-	0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 
-	0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 
-	0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 
-	0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 
-	0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 
-	0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 
-	0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 
-	0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000 
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x04040404,
+        0xfbfbfbfc, 0x05050101, 0xfafafeff, 0x01010505, 0xfefefafb, 0x0403fbfc, 0xfbfc0404, 0x0605fdfe,
+        0xf9fa0202, 0xfdfe0606, 0x0201f9fa, 0x09090404, 0xf6f6fbfc, 0x04040909, 0xfbfbf6f7, 0x09090909,
+        0xf6f6f6f7, 0x0a0a0101, 0xf5f5feff, 0x01010a0a, 0xfefef5f6, 0x0807fafb, 0xf7f80505, 0xfafb0808,
+        0x0504f7f8, 0x0f0f0909, 0xf0f0f6f7, 0x09090f0f, 0xf6f6f0f1, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c,
+        0x0302f3f4, 0x10100404, 0xefeffbfc, 0x04041010, 0xfbfbeff0, 0x10101010, 0xefefeff0, 0x12120000,
+        0xedee0000, 0x00001212, 0xffffedee, 0x0c0bf3f4, 0xf3f40c0c, 0x100ff6f7, 0xeff00909, 0xf6f71010,
+        0x0908eff0, 0x1b1b0b0b, 0xe4e4f4f5, 0x0b0b1b1b, 0xf4f4e4e5, 0x1c1c1313, 0xe3e3eced, 0x13131c1c,
+        0xecece3e4, 0x1615f9fa, 0xe9ea0606, 0xf9fa1616, 0x0605e9ea, 0x1d1d0404, 0xe2e2fbfc, 0x04041d1d,
+        0xfbfbe2e3, 0x1e1e1e1e, 0xe1e1e1e2, 0x2120fdfe, 0xdedf0202, 0xfdfe2121, 0x0201dedf, 0x1716edee,
+        0xe8e91212, 0xedee1717, 0x1211e8e9, 0x1e1df0f1, 0xe1e20f0f, 0xf0f11e1e, 0x0f0ee1e2, 0x2e2e1616,
+        0xd1d1e9ea, 0x16162e2e, 0xe9e9d1d2, 0x2f2f0d0d, 0xd0d0f2f3, 0x0d0d2f2f, 0xf2f2d0d1, 0x31312323,
+        0xcecedcdd, 0x23233131, 0xdcdccecf, 0x2928f4f5, 0xd6d70b0b, 0xf4f52929, 0x0b0ad6d7, 0x33330404,
+        0xccccfbfc, 0x04043333, 0xfbfbcccd, 0x36363636, 0xc9c9c9ca, 0x2221ddde, 0xddde2222, 0x2a29e2e3,
+        0xd5d61d1d, 0xe2e32a2a, 0x1d1cd5d6, 0x3c3bf9fa, 0xc3c40606, 0xf9fa3c3c, 0x0605c3c4, 0x4c4c1b1b,
+        0xb3b3e4e5, 0x1b1b4c4c, 0xe4e4b3b4, 0x4d4d2b2b, 0xb2b2d4d5, 0x2b2b4d4d, 0xd4d4b2b3, 0x3736e7e8,
+        0xc8c91818, 0xe7e83737, 0x1817c8c9, 0x4f4f0e0e, 0xb0b0f1f2, 0x0e0e4f4f, 0xf1f1b0b1, 0x53533f3f,
+        0xacacc0c1, 0x3f3f5353, 0xc0c0acad, 0x4a49ebec, 0xb5b61414, 0xebec4a4a, 0x1413b5b6, 0x58580202,
+        0xa7a7fdfe, 0x02025858, 0xfdfda7a8, 0x5d5d5d5d, 0xa2a2a2a3, 0x3d3ccbcc, 0xc2c33434, 0xcbcc3d3d,
+        0x3433c2c3, 0x78783434, 0x8787cbcc, 0x34347878, 0xcbcb8788, 0x4b4ad2d3, 0xb4b52d2d, 0xd2d34b4b,
+        0x2d2cb4b5, 0x7d7d4b4b, 0x8282b4b5, 0x4b4b7d7d, 0xb4b48283, 0x7a7a2121, 0x8585dedf, 0x21217a7a,
+        0xdede8586, 0x6766f2f3, 0x98990d0d, 0xf2f36767, 0x0d0c9899, 0x605fd7d8, 0x9fa02828, 0xd7d86060,
+        0x28279fa0, 0x7f7eddde, 0x80812222, 0xddde7f7f, 0x22218081, 0x5958a6a7, 0xa6a75959, 0x6968b1b2,
+        0x96974e4e, 0xb1b26969, 0x4e4d9697, 0x0c0c0c0c, 0xf3f3f3f4, 0x17171717, 0xe8e8e8e9, 0x2a2a2a2a,
+        0xd5d5d5d6, 0x49494949, 0xb6b6b6b7, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe,
+        0x0302feff, 0x0302feff, 0x0302feff, 0x0302feff, 0x0302feff, 0x0302feff, 0x0302feff, 0xfcfd0101,
+        0xfcfd0101, 0xfcfd0101, 0xfcfd0101, 0xfcfd0101, 0xfcfd0101, 0xfcfd0101, 0xfeff0303, 0xfeff0303,
+        0xfeff0303, 0xfeff0303, 0xfeff0303, 0xfeff0303, 0xfeff0303, 0x0100fcfd, 0x0100fcfd, 0x0100fcfd,
+        0x0100fcfd, 0x0100fcfd, 0x0100fcfd, 0x0100fcfd, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707,
+        0xf8f8f8f9, 0x08080202, 0xf7f7fdfe, 0x02020808, 0xfdfdf7f8, 0x0908fdfe, 0xf6f70202, 0xfdfe0909,
+        0x0201f6f7, 0x0605f9fa, 0xf9fa0606, 0x0d0d0606, 0xf2f2f9fa, 0x06060d0d, 0xf9f9f2f3, 0x0d0d0d0d,
+        0xf2f2f2f3, 0x0e0e0101, 0xf1f1feff, 0x01010e0e, 0xfefef1f2, 0x0c0bf7f8, 0xf3f40808, 0xf7f80c0c,
+        0x0807f3f4, 0x17170e0e, 0xe8e8f1f2, 0x0e0e1717, 0xf1f1e8e9, 0x1211fafb, 0xedee0505, 0xfafb1212,
+        0x0504edee, 0x18180606, 0xe7e7f9fa, 0x06061818, 0xf9f9e7e8, 0x18181818, 0xe7e7e7e8, 0x1b1afeff,
+        0xe4e50101, 0xfeff1b1b, 0x0100e4e5, 0x1110eeef, 0xeeef1111, 0x1716f2f3, 0xe8e90d0d, 0xf2f31717,
+        0x0d0ce8e9, 0x28281010, 0xd7d7eff0, 0x10102828, 0xefefd7d8, 0x29291c1c, 0xd6d6e3e4, 0x1c1c2929,
+        0xe3e3d6d7, 0x2120f6f7, 0xdedf0909, 0xf6f72121, 0x0908dedf, 0x2b2b0606, 0xd4d4f9fa, 0x06062b2b,
+        0xf9f9d4d5, 0x2e2e2e2e, 0xd1d1d1d2, 0x3231fbfc, 0xcdce0404, 0xfbfc3232, 0x0403cdce, 0x2221e4e5,
+        0xddde1b1b, 0xe4e52222, 0x1b1addde, 0x2d2ce9ea, 0xd2d31616, 0xe9ea2d2d, 0x1615d2d3, 0x45452222,
+        0xbabaddde, 0x22224545, 0xddddbabb, 0x46461313, 0xb9b9eced, 0x13134646, 0xececb9ba, 0x49493535,
+        0xb6b6cacb, 0x35354949, 0xcacab6b7, 0x3e3deeef, 0xc1c21111, 0xeeef3e3e, 0x1110c1c2, 0x4d4d0505,
+        0xb2b2fafb, 0x05054d4d, 0xfafab2b3, 0x52525252, 0xadadadae, 0x3332cccd, 0xcccd3333, 0x403fd4d5,
+        0xbfc02b2b, 0xd4d54040, 0x2b2abfc0, 0x5a59f5f6, 0xa5a60a0a, 0xf5f65a5a, 0x0a09a5a6, 0x72722929,
+        0x8d8dd6d7, 0x29297272, 0xd6d68d8e, 0x74744040, 0x8b8bbfc0, 0x40407474, 0xbfbf8b8c, 0x5251dadb,
+        0xadae2525, 0xdadb5252, 0x2524adae, 0x77771616, 0x8888e9ea, 0x16167777, 0xe9e98889, 0x7c7c5f5f,
+        0x8383a0a1, 0x5f5f7c7c, 0xa0a08384, 0x6f6ee1e2, 0x90911e1e, 0xe1e26f6f, 0x1e1d9091, 0x5c5bb1b2,
+        0xa3a44e4e, 0xb1b25c5c, 0x4e4da3a4, 0x7170bbbc, 0x8e8f4444, 0xbbbc7171, 0x44438e8f, 0x12121212,
+        0xedededee, 0x22222222, 0xddddddde, 0x3f3f3f3f, 0xc0c0c0c1, 0x6d6d6d6d, 0x92929293, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303,
+        0x03030303, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd,
+        0xfcfcfcfd, 0xfcfcfcfd, 0x0403feff, 0x0403feff, 0x0403feff, 0x0403feff, 0x0403feff, 0x0403feff,
+        0x0403feff, 0x0403feff, 0x0403feff, 0xfbfc0101, 0xfbfc0101, 0xfbfc0101, 0xfbfc0101, 0xfbfc0101,
+        0xfbfc0101, 0xfbfc0101, 0xfbfc0101, 0xfbfc0101, 0xfeff0404, 0xfeff0404, 0xfeff0404, 0xfeff0404,
+        0xfeff0404, 0xfeff0404, 0xfeff0404, 0xfeff0404, 0xfeff0404, 0x0100fbfc, 0x0100fbfc, 0x0100fbfc,
+        0x0100fbfc, 0x0100fbfc, 0x0100fbfc, 0x0100fbfc, 0x0100fbfc, 0x0100fbfc, 0x07070707, 0x07070707,
+        0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0xf8f8f8f9,
+        0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303,
+        0xf5f5fcfd, 0x03030a0a, 0xfcfcf5f6, 0x09090909, 0xf6f6f6f7, 0x0706f8f9, 0xf8f90707, 0x0c0bfcfd,
+        0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x11110808, 0xeeeef7f8, 0x08081111, 0xf7f7eeef, 0x11111111,
+        0xeeeeeeef, 0x13130101, 0xececfeff, 0x01011313, 0xfefeeced, 0x100ff4f5, 0xeff00b0b, 0xf4f51010,
+        0x0b0aeff0, 0x1716f9fa, 0xe8e90606, 0xf9fa1717, 0x0605e8e9, 0x1f1f1212, 0xe0e0edee, 0x12121f1f,
+        0xedede0e1, 0x20200808, 0xdfdff7f8, 0x08082020, 0xf7f7dfe0, 0x21212121, 0xdedededf, 0x2423feff,
+        0xdbdc0101, 0xfeff2424, 0x0100dbdc, 0x1716e8e9, 0xe8e91717, 0x1f1eeeef, 0xe0e11111, 0xeeef1f1f,
+        0x1110e0e1, 0x36361515, 0xc9c9eaeb, 0x15153636, 0xeaeac9ca, 0x37372525, 0xc8c8dadb, 0x25253737,
+        0xdadac8c9, 0x2c2bf3f4, 0xd3d40c0c, 0xf3f42c2c, 0x0c0bd3d4, 0x39390808, 0xc6c6f7f8, 0x08083939,
+        0xf7f7c6c7, 0x3d3d3d3d, 0xc2c2c2c3, 0x4241fafb, 0xbdbe0505, 0xfafb4242, 0x0504bdbe, 0x2d2cdbdc,
+        0xd2d32424, 0xdbdc2d2d, 0x2423d2d3, 0x3c3be2e3, 0xc3c41d1d, 0xe2e33c3c, 0x1d1cc3c4, 0x5c5c2d2d,
+        0xa3a3d2d3, 0x2d2d5c5c, 0xd2d2a3a4, 0x5d5d1919, 0xa2a2e6e7, 0x19195d5d, 0xe6e6a2a3, 0x61614747,
+        0x9e9eb8b9, 0x47476161, 0xb8b89e9f, 0x5352e9ea, 0xacad1616, 0xe9ea5353, 0x1615acad, 0x66660707,
+        0x9999f8f9, 0x07076666, 0xf8f8999a, 0x6d6d6d6d, 0x92929293, 0x4443bbbc, 0xbbbc4444, 0x5554c6c7,
+        0xaaab3939, 0xc6c75555, 0x3938aaab, 0x7877f2f3, 0x87880d0d, 0xf2f37878, 0x0d0c8788, 0x6e6dcecf,
+        0x91923131, 0xcecf6e6e, 0x31309192, 0x7b7a9798, 0x84856868, 0x97987b7b, 0x68678485, 0x18181818,
+        0xe7e7e7e8, 0x2e2e2e2e, 0xd1d1d1d2, 0x54545454, 0xabababac, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04040404,
+        0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404,
+        0x04040404, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc,
+        0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0x0504feff, 0x0504feff, 0x0504feff, 0x0504feff, 0x0504feff,
+        0x0504feff, 0x0504feff, 0x0504feff, 0x0504feff, 0x0504feff, 0xfafb0101, 0xfafb0101, 0xfafb0101,
+        0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfafb0101, 0xfeff0505,
+        0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505, 0xfeff0505,
+        0xfeff0505, 0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0100fafb,
+        0x0100fafb, 0x0100fafb, 0x0100fafb, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303,
+        0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0x0a0a0303, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd,
+        0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0xf5f5fcfd, 0x03030a0a,
+        0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a, 0x03030a0a,
+        0x03030a0a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b,
+        0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x03030d0d, 0xfcfcf2f3, 0x0908f6f7, 0xf6f70909, 0x0f0efbfc,
+        0xf0f10404, 0xfbfc0f0f, 0x0403f0f1, 0x16160b0b, 0xe9e9f4f5, 0x0b0b1616, 0xf4f4e9ea, 0x15151515,
+        0xeaeaeaeb, 0x18180202, 0xe7e7fdfe, 0x02021818, 0xfdfde7e8, 0x1413f1f2, 0xebec0e0e, 0xf1f21414,
+        0x0e0debec, 0x26261717, 0xd9d9e8e9, 0x17172626, 0xe8e8d9da, 0x1d1cf7f8, 0xe2e30808, 0xf7f81d1d,
+        0x0807e2e3, 0x27270b0b, 0xd8d8f4f5, 0x0b0b2727, 0xf4f4d8d9, 0x29292929, 0xd6d6d6d7, 0x2d2cfeff,
+        0xd2d30101, 0xfeff2d2d, 0x0100d2d3, 0x1d1ce2e3, 0xe2e31d1d, 0x2726e9ea, 0xd8d91616, 0xe9ea2727,
+        0x1615d8d9, 0x43431b1b, 0xbcbce4e5, 0x1b1b4343, 0xe4e4bcbd, 0x45452f2f, 0xbabad0d1, 0x2f2f4545,
+        0xd0d0babb, 0x3837f0f1, 0xc7c80f0f, 0xf0f13838, 0x0f0ec7c8, 0x47470b0b, 0xb8b8f4f5, 0x0b0b4747,
+        0xf4f4b8b9, 0x4c4c4c4c, 0xb3b3b3b4, 0x5352f9fa, 0xacad0606, 0xf9fa5353, 0x0605acad, 0x3938d2d3,
+        0xc6c72d2d, 0xd2d33939, 0x2d2cc6c7, 0x4b4adbdc, 0xb4b52424, 0xdbdc4b4b, 0x2423b4b5, 0x73733838,
+        0x8c8cc7c8, 0x38387373, 0xc7c78c8d, 0x75751f1f, 0x8a8ae0e1, 0x1f1f7575, 0xe0e08a8b, 0x7a7a5858,
+        0x8585a7a8, 0x58587a7a, 0xa7a78586, 0x6867e3e4, 0x97981c1c, 0xe3e46868, 0x1c1b9798, 0x5554aaab,
+        0xaaab5555, 0x6a69b7b8, 0x95964848, 0xb7b86a6a, 0x48479596, 0x1e1e1e1e, 0xe1e1e1e2, 0x3a3a3a3a,
+        0xc5c5c5c6, 0x69696969, 0x96969697, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x05050505, 0x05050505,
+        0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505,
+        0x05050505, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb,
+        0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe,
+        0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0x0706fdfe, 0xf8f90202,
+        0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202, 0xf8f90202,
+        0xf8f90202, 0xf8f90202, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707,
+        0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0xfdfe0707, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9,
+        0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9, 0x0201f8f9,
+        0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b,
+        0x0b0b0b0b, 0x0b0b0b0b, 0x0b0b0b0b, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5,
+        0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0xf4f4f4f5, 0x0d0d0303, 0x0d0d0303,
+        0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303, 0x0d0d0303,
+        0x0d0d0303, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd,
+        0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0xf2f2fcfd, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d,
+        0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0xfbfbf0f1, 0x0b0af4f5, 0xf4f50b0b, 0x1211fafb,
+        0xedee0505, 0xfafb1212, 0x0504edee, 0x1a1a0d0d, 0xe5e5f2f3, 0x0d0d1a1a, 0xf2f2e5e6, 0x1a1a1a1a,
+        0xe5e5e5e6, 0x1d1d0202, 0xe2e2fdfe, 0x02021d1d, 0xfdfde2e3, 0x1817eff0, 0xe7e81010, 0xeff01818,
+        0x100fe7e8, 0x2e2e1c1c, 0xd1d1e3e4, 0x1c1c2e2e, 0xe3e3d1d2, 0x2322f6f7, 0xdcdd0909, 0xf6f72323,
+        0x0908dcdd, 0x2f2f0d0d, 0xd0d0f2f3, 0x0d0d2f2f, 0xf2f2d0d1, 0x31313131, 0xcecececf, 0x3635feff,
+        0xc9ca0101, 0xfeff3636, 0x0100c9ca, 0x2322dcdd, 0xdcdd2323, 0x2f2ee5e6, 0xd0d11a1a, 0xe5e62f2f,
+        0x1a19d0d1, 0x51512020, 0xaeaedfe0, 0x20205151, 0xdfdfaeaf, 0x53533838, 0xacacc7c8, 0x38385353,
+        0xc7c7acad, 0x4342edee, 0xbcbd1212, 0xedee4343, 0x1211bcbd, 0x56560d0d, 0xa9a9f2f3, 0x0d0d5656,
+        0xf2f2a9aa, 0x5b5b5b5b, 0xa4a4a4a5, 0x6362f8f9, 0x9c9d0707, 0xf8f96363, 0x07069c9d, 0x4443c9ca,
+        0xbbbc3636, 0xc9ca4444, 0x3635bbbc, 0x5a59d3d4, 0xa5a62c2c, 0xd3d45a5a, 0x2c2ba5a6, 0x7c7bdedf,
+        0x83842121, 0xdedf7c7c, 0x21208384, 0x67669899, 0x98996767, 0x7f7ea9aa, 0x80815656, 0xa9aa7f7f,
+        0x56558081, 0x25252525, 0xdadadadb, 0x45454545, 0xbabababb, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe,
+        0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0x0807fdfe, 0xf7f80202, 0xf7f80202, 0xf7f80202,
+        0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202, 0xf7f80202,
+        0xf7f80202, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808,
+        0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0xfdfe0808, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8,
+        0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8, 0x0201f7f8,
+        0x0201f7f8, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d,
+        0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0x0d0d0d0d, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3,
+        0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3, 0xf2f2f2f3,
+        0xf2f2f2f3, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404,
+        0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0x0f0f0404, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc,
+        0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc, 0xf0f0fbfc,
+        0xf0f0fbfc, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f,
+        0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x04040f0f, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010,
+        0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0xfafaedee, 0x0d0cf2f3, 0xf2f30d0d, 0x1514f9fa,
+        0xeaeb0606, 0xf9fa1515, 0x0605eaeb, 0x1e1e0f0f, 0xe1e1f0f1, 0x0f0f1e1e, 0xf0f0e1e2, 0x1e1e1e1e,
+        0xe1e1e1e2, 0x22220202, 0xddddfdfe, 0x02022222, 0xfdfdddde, 0x1c1beced, 0xe3e41313, 0xeced1c1c,
+        0x1312e3e4, 0x36362020, 0xc9c9dfe0, 0x20203636, 0xdfdfc9ca, 0x2928f4f5, 0xd6d70b0b, 0xf4f52929,
+        0x0b0ad6d7, 0x37370f0f, 0xc8c8f0f1, 0x0f0f3737, 0xf0f0c8c9, 0x39393939, 0xc6c6c6c7, 0x3f3efeff,
+        0xc0c10101, 0xfeff3f3f, 0x0100c0c1, 0x2827d7d8, 0xd7d82828, 0x3736e1e2, 0xc8c91e1e, 0xe1e23737,
+        0x1e1dc8c9, 0x5e5e2525, 0xa1a1dadb, 0x25255e5e, 0xdadaa1a2, 0x60604141, 0x9f9fbebf, 0x41416060,
+        0xbebe9fa0, 0x4e4deaeb, 0xb1b21515, 0xeaeb4e4e, 0x1514b1b2, 0x64640f0f, 0x9b9bf0f1, 0x0f0f6464,
+        0xf0f09b9c, 0x6a6a6a6a, 0x95959596, 0x7473f7f8, 0x8b8c0808, 0xf7f87474, 0x08078b8c, 0x4f4ec0c1,
+        0xb0b13f3f, 0xc0c14f4f, 0x3f3eb0b1, 0x6968cccd, 0x96973333, 0xcccd6969, 0x33329697, 0x78778788,
+        0x87887878, 0x2b2b2b2b, 0xd4d4d4d5, 0x50505050, 0xafafafb0, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707,
+        0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9,
+        0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9,
+        0xf8f8f8f9, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd,
+        0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0x0a09fcfd, 0xf5f60303, 0xf5f60303, 0xf5f60303,
+        0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303, 0xf5f60303,
+        0xf5f60303, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a,
+        0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0xfcfd0a0a, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6,
+        0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6, 0x0302f5f6,
+        0x0302f5f6, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010,
+        0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0xefefeff0, 0xefefeff0, 0xefefeff0,
+        0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0,
+        0xefefeff0, 0x12120505, 0x12120505, 0x12120505, 0x12120505, 0x12120505, 0x12120505, 0x12120505,
+        0x12120505, 0x12120505, 0x12120505, 0x12120505, 0x12120505, 0xededfafb, 0xededfafb, 0xededfafb,
+        0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb, 0xededfafb,
+        0xededfafb, 0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x05051212,
+        0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x05051212, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212,
+        0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0xfafaebec, 0x0f0ef0f1, 0xf0f10f0f, 0x1817f8f9,
+        0xe7e80707, 0xf8f91818, 0x0706e7e8, 0x23231111, 0xdcdceeef, 0x11112323, 0xeeeedcdd, 0x22222222,
+        0xddddddde, 0x26260303, 0xd9d9fcfd, 0x03032626, 0xfcfcd9da, 0x201fe9ea, 0xdfe01616, 0xe9ea2020,
+        0x1615dfe0, 0x3d3d2525, 0xc2c2dadb, 0x25253d3d, 0xdadac2c3, 0x2f2ef2f3, 0xd0d10d0d, 0xf2f32f2f,
+        0x0d0cd0d1, 0x3f3f1111, 0xc0c0eeef, 0x11113f3f, 0xeeeec0c1, 0x41414141, 0xbebebebf, 0x4847feff,
+        0xb7b80101, 0xfeff4848, 0x0100b7b8, 0x2e2dd1d2, 0xd1d22e2e, 0x3f3edcdd, 0xc0c12323, 0xdcdd3f3f,
+        0x2322c0c1, 0x6b6b2b2b, 0x9494d4d5, 0x2b2b6b6b, 0xd4d49495, 0x6e6e4b4b, 0x9191b4b5, 0x4b4b6e6e,
+        0xb4b49192, 0x5958e7e8, 0xa6a71818, 0xe7e85959, 0x1817a6a7, 0x72721111, 0x8d8deeef, 0x11117272,
+        0xeeee8d8e, 0x79797979, 0x86868687, 0x5b5ab7b8, 0xa4a54848, 0xb7b85b5b, 0x4847a4a5, 0x7877c5c6,
+        0x87883a3a, 0xc5c67878, 0x3a398788, 0x31313131, 0xcecececf, 0x5c5c5c5c, 0xa3a3a3a4, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808,
+        0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0xf7f7f7f8,
+        0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8,
+        0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd,
+        0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0x0b0afcfd, 0xf4f50303,
+        0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303, 0xf4f50303,
+        0xf4f50303, 0xf4f50303, 0xf4f50303, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b,
+        0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0xfcfd0b0b, 0x0302f4f5,
+        0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x0302f4f5,
+        0x0302f4f5, 0x0302f4f5, 0x0302f4f5, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212,
+        0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0xedededee,
+        0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee,
+        0xedededee, 0xedededee, 0xedededee, 0x14140505, 0x14140505, 0x14140505, 0x14140505, 0x14140505,
+        0x14140505, 0x14140505, 0x14140505, 0x14140505, 0x14140505, 0x14140505, 0x14140505, 0xebebfafb,
+        0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb, 0xebebfafb,
+        0xebebfafb, 0xebebfafb, 0xebebfafb, 0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x05051414,
+        0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x05051414, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414,
+        0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x1110eeef, 0xeeef1111, 0x1b1af8f9,
+        0xe4e50707, 0xf8f91b1b, 0x0706e4e5, 0x27271313, 0xd8d8eced, 0x13132727, 0xececd8d9, 0x27272727,
+        0xd8d8d8d9, 0x2b2b0303, 0xd4d4fcfd, 0x03032b2b, 0xfcfcd4d5, 0x2423e7e8, 0xdbdc1818, 0xe7e82424,
+        0x1817dbdc, 0x45452a2a, 0xbabad5d6, 0x2a2a4545, 0xd5d5babb, 0x3534f1f2, 0xcacb0e0e, 0xf1f23535,
+        0x0e0dcacb, 0x47471313, 0xb8b8eced, 0x13134747, 0xececb8b9, 0x49494949, 0xb6b6b6b7, 0x504ffdfe,
+        0xafb00202, 0xfdfe5050, 0x0201afb0, 0x3433cbcc, 0xcbcc3434, 0x4645d8d9, 0xb9ba2727, 0xd8d94646,
+        0x2726b9ba, 0x79793030, 0x8686cfd0, 0x30307979, 0xcfcf8687, 0x7c7c5454, 0x8383abac, 0x54547c7c,
+        0xabab8384, 0x6463e4e5, 0x9b9c1b1b, 0xe4e56464, 0x1b1a9b9c, 0x6665aeaf, 0x999a5151, 0xaeaf6666,
+        0x5150999a, 0x37373737, 0xc8c8c8c9, 0x68686868, 0x97979798, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909,
+        0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0xf6f6f6f7,
+        0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7,
+        0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd,
+        0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd, 0x0c0bfcfd,
+        0x0c0bfcfd, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303,
+        0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xf3f40303, 0xfcfd0c0c, 0xfcfd0c0c,
+        0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c,
+        0xfcfd0c0c, 0xfcfd0c0c, 0xfcfd0c0c, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4,
+        0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4, 0x0302f3f4,
+        0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414,
+        0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec, 0xebebebec, 0xebebebec,
+        0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec,
+        0xebebebec, 0xebebebec, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606,
+        0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0x17170606, 0xe8e8f9fa,
+        0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa,
+        0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0xe8e8f9fa, 0x06061717, 0x06061717, 0x06061717, 0x06061717,
+        0x06061717, 0x06061717, 0x06061717, 0x06061717, 0x06061717, 0x06061717, 0x06061717, 0x06061717,
+        0x06061717, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9,
+        0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0xf9f9e8e9, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404,
+        0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x0403fbfc, 0xfbfc0404, 0x0605fdfe,
+        0xf9fa0202, 0xfdfe0606, 0x0201f9fa, 0x08080404, 0xf7f7fbfc, 0x04040808, 0xfbfbf7f8, 0x08080808,
+        0xf7f7f7f8, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x0807fbfc, 0xf7f80404, 0xfbfc0808,
+        0x0403f7f8, 0x0e0e0808, 0xf1f1f7f8, 0x08080e0e, 0xf7f7f1f2, 0x0c0bfdfe, 0xf3f40202, 0xfdfe0c0c,
+        0x0201f3f4, 0x10100404, 0xefeffbfc, 0x04041010, 0xfbfbeff0, 0x10101010, 0xefefeff0, 0x12120000,
+        0xedee0000, 0x00001212, 0xffffedee, 0x0c0bf3f4, 0xf3f40c0c, 0x100ff7f8, 0xeff00808, 0xf7f81010,
+        0x0807eff0, 0x1a1a0a0a, 0xe5e5f5f6, 0x0a0a1a1a, 0xf5f5e5e6, 0x1c1c1212, 0xe3e3edee, 0x12121c1c,
+        0xedede3e4, 0x1615f9fa, 0xe9ea0606, 0xf9fa1616, 0x0605e9ea, 0x1c1c0404, 0xe3e3fbfc, 0x04041c1c,
+        0xfbfbe3e4, 0x1e1e1e1e, 0xe1e1e1e2, 0x201ffdfe, 0xdfe00202, 0xfdfe2020, 0x0201dfe0, 0x1615edee,
+        0xe9ea1212, 0xedee1616, 0x1211e9ea, 0x1e1df1f2, 0xe1e20e0e, 0xf1f21e1e, 0x0e0de1e2, 0x2e2e1616,
+        0xd1d1e9ea, 0x16162e2e, 0xe9e9d1d2, 0x2e2e0c0c, 0xd1d1f3f4, 0x0c0c2e2e, 0xf3f3d1d2, 0x30302222,
+        0xcfcfddde, 0x22223030, 0xddddcfd0, 0x2827f5f6, 0xd7d80a0a, 0xf5f62828, 0x0a09d7d8, 0x32320404,
+        0xcdcdfbfc, 0x04043232, 0xfbfbcdce, 0x36363636, 0xc9c9c9ca, 0x2221ddde, 0xddde2222, 0x2a29e3e4,
+        0xd5d61c1c, 0xe3e42a2a, 0x1c1bd5d6, 0x3c3bf9fa, 0xc3c40606, 0xf9fa3c3c, 0x0605c3c4, 0x4c4c1a1a,
+        0xb3b3e5e6, 0x1a1a4c4c, 0xe5e5b3b4, 0x4c4c2a2a, 0xb3b3d5d6, 0x2a2a4c4c, 0xd5d5b3b4, 0x3635e7e8,
+        0xc9ca1818, 0xe7e83636, 0x1817c9ca, 0x4e4e0e0e, 0xb1b1f1f2, 0x0e0e4e4e, 0xf1f1b1b2, 0x52523e3e,
+        0xadadc1c2, 0x3e3e5252, 0xc1c1adae, 0x4a49ebec, 0xb5b61414, 0xebec4a4a, 0x1413b5b6, 0x58580202,
+        0xa7a7fdfe, 0x02025858, 0xfdfda7a8, 0x5c5c5c5c, 0xa3a3a3a4, 0x3c3bcbcc, 0xc3c43434, 0xcbcc3c3c,
+        0x3433c3c4, 0x76763434, 0x8989cbcc, 0x34347676, 0xcbcb898a, 0x4a49d3d4, 0xb5b62c2c, 0xd3d44a4a,
+        0x2c2bb5b6, 0x76764a4a, 0x8989b5b6, 0x4a4a7676, 0xb5b5898a, 0x76762020, 0x8989dfe0, 0x20207676,
+        0xdfdf898a, 0x6665f3f4, 0x999a0c0c, 0xf3f46666, 0x0c0b999a, 0x605fd7d8, 0x9fa02828, 0xd7d86060,
+        0x28279fa0, 0x7675ddde, 0x898a2222, 0xddde7676, 0x2221898a, 0x5857a7a8, 0xa7a85858, 0x6867b1b2,
+        0x97984e4e, 0xb1b26868, 0x4e4d9798, 0x0c0c0c0c, 0xf3f3f3f4, 0x16161616, 0xe9e9e9ea, 0x2a2a2a2a,
+        0xd5d5d5d6, 0x48484848, 0xb7b7b7b8, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe,
+        0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0xfdfe0000,
+        0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0x00000202, 0x00000202,
+        0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe,
+        0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606,
+        0xf9f9f9fa, 0x09090303, 0xf6f6fcfd, 0x03030909, 0xfcfcf6f7, 0x0908fcfd, 0xf6f70303, 0xfcfd0909,
+        0x0302f6f7, 0x0605f9fa, 0xf9fa0606, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0xf9f9f3f4, 0x0c0c0c0c,
+        0xf3f3f3f4, 0x0f0f0000, 0xf0f10000, 0x00000f0f, 0xfffff0f1, 0x0c0bf6f7, 0xf3f40909, 0xf6f70c0c,
+        0x0908f3f4, 0x18180f0f, 0xe7e7f0f1, 0x0f0f1818, 0xf0f0e7e8, 0x1211f9fa, 0xedee0606, 0xf9fa1212,
+        0x0605edee, 0x18180606, 0xe7e7f9fa, 0x06061818, 0xf9f9e7e8, 0x18181818, 0xe7e7e7e8, 0x1b1b0000,
+        0xe4e50000, 0x00001b1b, 0xffffe4e5, 0x1211edee, 0xedee1212, 0x1817f3f4, 0xe7e80c0c, 0xf3f41818,
+        0x0c0be7e8, 0x27270f0f, 0xd8d8f0f1, 0x0f0f2727, 0xf0f0d8d9, 0x2a2a1b1b, 0xd5d5e4e5, 0x1b1b2a2a,
+        0xe4e4d5d6, 0x2120f6f7, 0xdedf0909, 0xf6f72121, 0x0908dedf, 0x2a2a0606, 0xd5d5f9fa, 0x06062a2a,
+        0xf9f9d5d6, 0x2d2d2d2d, 0xd2d2d2d3, 0x3332fcfd, 0xcccd0303, 0xfcfd3333, 0x0302cccd, 0x2120e4e5,
+        0xdedf1b1b, 0xe4e52121, 0x1b1adedf, 0x2d2ceaeb, 0xd2d31515, 0xeaeb2d2d, 0x1514d2d3, 0x45452121,
+        0xbabadedf, 0x21214545, 0xdedebabb, 0x45451212, 0xbabaedee, 0x12124545, 0xededbabb, 0x48483636,
+        0xb7b7c9ca, 0x36364848, 0xc9c9b7b8, 0x3f3eedee, 0xc0c11212, 0xedee3f3f, 0x1211c0c1, 0x4e4e0606,
+        0xb1b1f9fa, 0x06064e4e, 0xf9f9b1b2, 0x51515151, 0xaeaeaeaf, 0x3332cccd, 0xcccd3333, 0x3f3ed5d6,
+        0xc0c12a2a, 0xd5d63f3f, 0x2a29c0c1, 0x5a59f6f7, 0xa5a60909, 0xf6f75a5a, 0x0908a5a6, 0x72722a2a,
+        0x8d8dd5d6, 0x2a2a7272, 0xd5d58d8e, 0x75753f3f, 0x8a8ac0c1, 0x3f3f7575, 0xc0c08a8b, 0x5150dbdc,
+        0xaeaf2424, 0xdbdc5151, 0x2423aeaf, 0x78781515, 0x8787eaeb, 0x15157878, 0xeaea8788, 0x7b7b6060,
+        0x84849fa0, 0x60607b7b, 0x9f9f8485, 0x6f6ee1e2, 0x90911e1e, 0xe1e26f6f, 0x1e1d9091, 0x5d5cb1b2,
+        0xa2a34e4e, 0xb1b25d5d, 0x4e4da2a3, 0x7271babb, 0x8d8e4545, 0xbabb7272, 0x45448d8e, 0x12121212,
+        0xedededee, 0x21212121, 0xdedededf, 0x3f3f3f3f, 0xc0c0c0c1, 0x6c6c6c6c, 0x93939394, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303, 0x03030303,
+        0x03030303, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd, 0xfcfcfcfd,
+        0xfcfcfcfd, 0xfcfcfcfd, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000,
+        0x03030000, 0x03030000, 0x03030000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000,
+        0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0x00000303, 0x00000303, 0x00000303, 0x00000303,
+        0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd,
+        0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404,
+        0xf7f7fbfc, 0x04040808, 0xfbfbf7f8, 0x08080808, 0xf7f7f7f8, 0x0807f7f8, 0xf7f80808, 0x0c0bfbfc,
+        0xf3f40404, 0xfbfc0c0c, 0x0403f3f4, 0x10100808, 0xefeff7f8, 0x08081010, 0xf7f7eff0, 0x10101010,
+        0xefefeff0, 0x14140000, 0xebec0000, 0x00001414, 0xffffebec, 0x100ff3f4, 0xeff00c0c, 0xf3f41010,
+        0x0c0beff0, 0x1817fbfc, 0xe7e80404, 0xfbfc1818, 0x0403e7e8, 0x20201010, 0xdfdfeff0, 0x10102020,
+        0xefefdfe0, 0x20200808, 0xdfdff7f8, 0x08082020, 0xf7f7dfe0, 0x20202020, 0xdfdfdfe0, 0x24240000,
+        0xdbdc0000, 0x00002424, 0xffffdbdc, 0x1817e7e8, 0xe7e81818, 0x201feff0, 0xdfe01010, 0xeff02020,
+        0x100fdfe0, 0x34341414, 0xcbcbebec, 0x14143434, 0xebebcbcc, 0x38382424, 0xc7c7dbdc, 0x24243838,
+        0xdbdbc7c8, 0x2c2bf3f4, 0xd3d40c0c, 0xf3f42c2c, 0x0c0bd3d4, 0x38380808, 0xc7c7f7f8, 0x08083838,
+        0xf7f7c7c8, 0x3c3c3c3c, 0xc3c3c3c4, 0x403ffbfc, 0xbfc00404, 0xfbfc4040, 0x0403bfc0, 0x2c2bdbdc,
+        0xd3d42424, 0xdbdc2c2c, 0x2423d3d4, 0x3c3be3e4, 0xc3c41c1c, 0xe3e43c3c, 0x1c1bc3c4, 0x5c5c2c2c,
+        0xa3a3d3d4, 0x2c2c5c5c, 0xd3d3a3a4, 0x5c5c1818, 0xa3a3e7e8, 0x18185c5c, 0xe7e7a3a4, 0x60604848,
+        0x9f9fb7b8, 0x48486060, 0xb7b79fa0, 0x5453ebec, 0xabac1414, 0xebec5454, 0x1413abac, 0x64640808,
+        0x9b9bf7f8, 0x08086464, 0xf7f79b9c, 0x6c6c6c6c, 0x93939394, 0x4443bbbc, 0xbbbc4444, 0x5453c7c8,
+        0xabac3838, 0xc7c85454, 0x3837abac, 0x7877f3f4, 0x87880c0c, 0xf3f47878, 0x0c0b8788, 0x6c6bcfd0,
+        0x93943030, 0xcfd06c6c, 0x302f9394, 0x7c7b9798, 0x83846868, 0x97987c7c, 0x68678384, 0x18181818,
+        0xe7e7e7e8, 0x2c2c2c2c, 0xd3d3d3d4, 0x54545454, 0xabababac, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04040404,
+        0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404,
+        0x04040404, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc,
+        0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000,
+        0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000,
+        0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0x00000404,
+        0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404,
+        0x00000404, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc,
+        0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0x08080404, 0x08080404, 0x08080404, 0x08080404, 0x08080404,
+        0x08080404, 0x08080404, 0x08080404, 0x08080404, 0x08080404, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc,
+        0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0xf7f7fbfc, 0x04040808,
+        0x04040808, 0x04040808, 0x04040808, 0x04040808, 0x04040808, 0x04040808, 0x04040808, 0x04040808,
+        0x04040808, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a,
+        0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x05050f0f, 0xfafaf0f1, 0x0a09f5f6, 0xf5f60a0a, 0x0f0efafb,
+        0xf0f10505, 0xfafb0f0f, 0x0504f0f1, 0x14140a0a, 0xebebf5f6, 0x0a0a1414, 0xf5f5ebec, 0x14141414,
+        0xebebebec, 0x19190000, 0xe6e70000, 0x00001919, 0xffffe6e7, 0x1413f0f1, 0xebec0f0f, 0xf0f11414,
+        0x0f0eebec, 0x28281919, 0xd7d7e6e7, 0x19192828, 0xe6e6d7d8, 0x1e1df5f6, 0xe1e20a0a, 0xf5f61e1e,
+        0x0a09e1e2, 0x28280a0a, 0xd7d7f5f6, 0x0a0a2828, 0xf5f5d7d8, 0x28282828, 0xd7d7d7d8, 0x2d2d0000,
+        0xd2d30000, 0x00002d2d, 0xffffd2d3, 0x1e1de1e2, 0xe1e21e1e, 0x2827ebec, 0xd7d81414, 0xebec2828,
+        0x1413d7d8, 0x41411919, 0xbebee6e7, 0x19194141, 0xe6e6bebf, 0x46462d2d, 0xb9b9d2d3, 0x2d2d4646,
+        0xd2d2b9ba, 0x3736f0f1, 0xc8c90f0f, 0xf0f13737, 0x0f0ec8c9, 0x46460a0a, 0xb9b9f5f6, 0x0a0a4646,
+        0xf5f5b9ba, 0x4b4b4b4b, 0xb4b4b4b5, 0x5554fafb, 0xaaab0505, 0xfafb5555, 0x0504aaab, 0x3736d2d3,
+        0xc8c92d2d, 0xd2d33737, 0x2d2cc8c9, 0x4b4adcdd, 0xb4b52323, 0xdcdd4b4b, 0x2322b4b5, 0x73733737,
+        0x8c8cc8c9, 0x37377373, 0xc8c88c8d, 0x73731e1e, 0x8c8ce1e2, 0x1e1e7373, 0xe1e18c8d, 0x78785a5a,
+        0x8787a5a6, 0x5a5a7878, 0xa5a58788, 0x6968e1e2, 0x96971e1e, 0xe1e26969, 0x1e1d9697, 0x5554aaab,
+        0xaaab5555, 0x6968b9ba, 0x96974646, 0xb9ba6969, 0x46459697, 0x1e1e1e1e, 0xe1e1e1e2, 0x3c3c3c3c,
+        0xc3c3c3c4, 0x69696969, 0x96969697, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x05050505, 0x05050505,
+        0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505, 0x05050505,
+        0x05050505, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb,
+        0xfafafafb, 0xfafafafb, 0xfafafafb, 0xfafafafb, 0x05050000, 0x05050000, 0x05050000, 0x05050000,
+        0x05050000, 0x05050000, 0x05050000, 0x05050000, 0x05050000, 0x05050000, 0x05050000, 0xfafb0000,
+        0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000, 0xfafb0000,
+        0xfafb0000, 0xfafb0000, 0x00000505, 0x00000505, 0x00000505, 0x00000505, 0x00000505, 0x00000505,
+        0x00000505, 0x00000505, 0x00000505, 0x00000505, 0x00000505, 0xfffffafb, 0xfffffafb, 0xfffffafb,
+        0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb, 0xfffffafb,
+        0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a,
+        0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6,
+        0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0x0f0f0505, 0x0f0f0505,
+        0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505, 0x0f0f0505,
+        0x0f0f0505, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb,
+        0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0xf0f0fafb, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c,
+        0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0xf9f9f3f4, 0x0c0bf3f4, 0xf3f40c0c, 0x1211f9fa,
+        0xedee0606, 0xf9fa1212, 0x0605edee, 0x18180c0c, 0xe7e7f3f4, 0x0c0c1818, 0xf3f3e7e8, 0x18181818,
+        0xe7e7e7e8, 0x1e1e0000, 0xe1e20000, 0x00001e1e, 0xffffe1e2, 0x1817edee, 0xe7e81212, 0xedee1818,
+        0x1211e7e8, 0x30301e1e, 0xcfcfe1e2, 0x1e1e3030, 0xe1e1cfd0, 0x2423f9fa, 0xdbdc0606, 0xf9fa2424,
+        0x0605dbdc, 0x30300c0c, 0xcfcff3f4, 0x0c0c3030, 0xf3f3cfd0, 0x30303030, 0xcfcfcfd0, 0x36360000,
+        0xc9ca0000, 0x00003636, 0xffffc9ca, 0x2423dbdc, 0xdbdc2424, 0x302fe7e8, 0xcfd01818, 0xe7e83030,
+        0x1817cfd0, 0x4e4e1e1e, 0xb1b1e1e2, 0x1e1e4e4e, 0xe1e1b1b2, 0x54543636, 0xababc9ca, 0x36365454,
+        0xc9c9abac, 0x4241edee, 0xbdbe1212, 0xedee4242, 0x1211bdbe, 0x54540c0c, 0xababf3f4, 0x0c0c5454,
+        0xf3f3abac, 0x5a5a5a5a, 0xa5a5a5a6, 0x605ff9fa, 0x9fa00606, 0xf9fa6060, 0x06059fa0, 0x4241c9ca,
+        0xbdbe3636, 0xc9ca4242, 0x3635bdbe, 0x5a59d5d6, 0xa5a62a2a, 0xd5d65a5a, 0x2a29a5a6, 0x7e7de1e2,
+        0x81821e1e, 0xe1e27e7e, 0x1e1d8182, 0x6665999a, 0x999a6666, 0x7e7dabac, 0x81825454, 0xabac7e7e,
+        0x54538182, 0x24242424, 0xdbdbdbdc, 0x42424242, 0xbdbdbdbe, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000,
+        0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000,
+        0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000,
+        0xf9fa0000, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606,
+        0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa,
+        0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa,
+        0xfffff9fa, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606,
+        0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0x0c0c0606, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa,
+        0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa, 0xf3f3f9fa,
+        0xf3f3f9fa, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c,
+        0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x06060c0c, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e,
+        0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0xf8f8eaeb, 0x0e0df1f2, 0xf1f20e0e, 0x1514f8f9,
+        0xeaeb0707, 0xf8f91515, 0x0706eaeb, 0x1c1c0e0e, 0xe3e3f1f2, 0x0e0e1c1c, 0xf1f1e3e4, 0x1c1c1c1c,
+        0xe3e3e3e4, 0x23230000, 0xdcdd0000, 0x00002323, 0xffffdcdd, 0x1c1beaeb, 0xe3e41515, 0xeaeb1c1c,
+        0x1514e3e4, 0x38382323, 0xc7c7dcdd, 0x23233838, 0xdcdcc7c8, 0x2a29f1f2, 0xd5d60e0e, 0xf1f22a2a,
+        0x0e0dd5d6, 0x38380e0e, 0xc7c7f1f2, 0x0e0e3838, 0xf1f1c7c8, 0x38383838, 0xc7c7c7c8, 0x3f3f0000,
+        0xc0c10000, 0x00003f3f, 0xffffc0c1, 0x2a29d5d6, 0xd5d62a2a, 0x3837e3e4, 0xc7c81c1c, 0xe3e43838,
+        0x1c1bc7c8, 0x5b5b2323, 0xa4a4dcdd, 0x23235b5b, 0xdcdca4a5, 0x62623f3f, 0x9d9dc0c1, 0x3f3f6262,
+        0xc0c09d9e, 0x4d4ceaeb, 0xb2b31515, 0xeaeb4d4d, 0x1514b2b3, 0x62620e0e, 0x9d9df1f2, 0x0e0e6262,
+        0xf1f19d9e, 0x69696969, 0x96969697, 0x7776f8f9, 0x88890707, 0xf8f97777, 0x07068889, 0x4d4cc0c1,
+        0xb2b33f3f, 0xc0c14d4d, 0x3f3eb2b3, 0x6968cecf, 0x96973131, 0xcecf6969, 0x31309697, 0x77768889,
+        0x88897777, 0x2a2a2a2a, 0xd5d5d5d6, 0x4d4d4d4d, 0xb2b2b2b3, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707,
+        0x07070707, 0x07070707, 0x07070707, 0x07070707, 0x07070707, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9,
+        0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9, 0xf8f8f8f9,
+        0xf8f8f8f9, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000,
+        0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0xf8f90000, 0xf8f90000, 0xf8f90000,
+        0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000,
+        0xf8f90000, 0x00000707, 0x00000707, 0x00000707, 0x00000707, 0x00000707, 0x00000707, 0x00000707,
+        0x00000707, 0x00000707, 0x00000707, 0x00000707, 0x00000707, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9,
+        0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9, 0xfffff8f9,
+        0xfffff8f9, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e,
+        0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0x0e0e0e0e, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2,
+        0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2, 0xf1f1f1f2,
+        0xf1f1f1f2, 0x15150707, 0x15150707, 0x15150707, 0x15150707, 0x15150707, 0x15150707, 0x15150707,
+        0x15150707, 0x15150707, 0x15150707, 0x15150707, 0x15150707, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9,
+        0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9, 0xeaeaf8f9,
+        0xeaeaf8f9, 0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x07071515,
+        0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x07071515, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010,
+        0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0xf7f7eff0, 0x100feff0, 0xeff01010, 0x1817f7f8,
+        0xe7e80808, 0xf7f81818, 0x0807e7e8, 0x20201010, 0xdfdfeff0, 0x10102020, 0xefefdfe0, 0x20202020,
+        0xdfdfdfe0, 0x28280000, 0xd7d80000, 0x00002828, 0xffffd7d8, 0x201fe7e8, 0xdfe01818, 0xe7e82020,
+        0x1817dfe0, 0x40402828, 0xbfbfd7d8, 0x28284040, 0xd7d7bfc0, 0x302feff0, 0xcfd01010, 0xeff03030,
+        0x100fcfd0, 0x40401010, 0xbfbfeff0, 0x10104040, 0xefefbfc0, 0x40404040, 0xbfbfbfc0, 0x48480000,
+        0xb7b80000, 0x00004848, 0xffffb7b8, 0x302fcfd0, 0xcfd03030, 0x403fdfe0, 0xbfc02020, 0xdfe04040,
+        0x201fbfc0, 0x68682828, 0x9797d7d8, 0x28286868, 0xd7d79798, 0x70704848, 0x8f8fb7b8, 0x48487070,
+        0xb7b78f90, 0x5857e7e8, 0xa7a81818, 0xe7e85858, 0x1817a7a8, 0x70701010, 0x8f8feff0, 0x10107070,
+        0xefef8f90, 0x78787878, 0x87878788, 0x5857b7b8, 0xa7a84848, 0xb7b85858, 0x4847a7a8, 0x7877c7c8,
+        0x87883838, 0xc7c87878, 0x38378788, 0x30303030, 0xcfcfcfd0, 0x58585858, 0xa7a7a7a8, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808,
+        0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0x08080808, 0xf7f7f7f8,
+        0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8,
+        0xf7f7f7f8, 0xf7f7f7f8, 0xf7f7f7f8, 0x08080000, 0x08080000, 0x08080000, 0x08080000, 0x08080000,
+        0x08080000, 0x08080000, 0x08080000, 0x08080000, 0x08080000, 0x08080000, 0x08080000, 0xf7f80000,
+        0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000, 0xf7f80000,
+        0xf7f80000, 0xf7f80000, 0xf7f80000, 0x00000808, 0x00000808, 0x00000808, 0x00000808, 0x00000808,
+        0x00000808, 0x00000808, 0x00000808, 0x00000808, 0x00000808, 0x00000808, 0x00000808, 0xfffff7f8,
+        0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0xfffff7f8,
+        0xfffff7f8, 0xfffff7f8, 0xfffff7f8, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010,
+        0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0x10101010, 0xefefeff0,
+        0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0, 0xefefeff0,
+        0xefefeff0, 0xefefeff0, 0xefefeff0, 0x10100808, 0x10100808, 0x10100808, 0x10100808, 0x10100808,
+        0x10100808, 0x10100808, 0x10100808, 0x10100808, 0x10100808, 0x10100808, 0x10100808, 0xefeff7f8,
+        0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0xefeff7f8,
+        0xefeff7f8, 0xefeff7f8, 0xefeff7f8, 0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x08081010,
+        0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x08081010, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212,
+        0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x1211edee, 0xedee1212, 0x1b1af6f7,
+        0xe4e50909, 0xf6f71b1b, 0x0908e4e5, 0x24241212, 0xdbdbedee, 0x12122424, 0xededdbdc, 0x24242424,
+        0xdbdbdbdc, 0x2d2d0000, 0xd2d30000, 0x00002d2d, 0xffffd2d3, 0x2423e4e5, 0xdbdc1b1b, 0xe4e52424,
+        0x1b1adbdc, 0x48482d2d, 0xb7b7d2d3, 0x2d2d4848, 0xd2d2b7b8, 0x3635edee, 0xc9ca1212, 0xedee3636,
+        0x1211c9ca, 0x48481212, 0xb7b7edee, 0x12124848, 0xededb7b8, 0x48484848, 0xb7b7b7b8, 0x51510000,
+        0xaeaf0000, 0x00005151, 0xffffaeaf, 0x3635c9ca, 0xc9ca3636, 0x4847dbdc, 0xb7b82424, 0xdbdc4848,
+        0x2423b7b8, 0x75752d2d, 0x8a8ad2d3, 0x2d2d7575, 0xd2d28a8b, 0x7e7e5151, 0x8181aeaf, 0x51517e7e,
+        0xaeae8182, 0x6362e4e5, 0x9c9d1b1b, 0xe4e56363, 0x1b1a9c9d, 0x6362aeaf, 0x9c9d5151, 0xaeaf6363,
+        0x51509c9d, 0x36363636, 0xc9c9c9ca, 0x6c6c6c6c, 0x93939394, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909,
+        0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0x09090909, 0xf6f6f6f7,
+        0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7,
+        0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0xf6f6f6f7, 0x09090000, 0x09090000, 0x09090000, 0x09090000,
+        0x09090000, 0x09090000, 0x09090000, 0x09090000, 0x09090000, 0x09090000, 0x09090000, 0x09090000,
+        0x09090000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000,
+        0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0xf6f70000, 0x00000909, 0x00000909,
+        0x00000909, 0x00000909, 0x00000909, 0x00000909, 0x00000909, 0x00000909, 0x00000909, 0x00000909,
+        0x00000909, 0x00000909, 0x00000909, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7,
+        0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7, 0xfffff6f7,
+        0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212,
+        0x12121212, 0x12121212, 0x12121212, 0x12121212, 0x12121212, 0xedededee, 0xedededee, 0xedededee,
+        0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee, 0xedededee,
+        0xedededee, 0xedededee, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909,
+        0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0x1b1b0909, 0xe4e4f6f7,
+        0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7,
+        0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0xe4e4f6f7, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b,
+        0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b, 0x09091b1b,
+        0x09091b1b, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5,
+        0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0xf6f6e4e5, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606,
+        0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0504fafb, 0xfafb0505, 0xfafb0505,
+        0x0504fafb, 0x0b0b0606, 0xf4f4f9fa, 0x06060b0b, 0xf9f9f4f5, 0x08080000, 0xf7f80000, 0x00000808,
+        0xfffff7f8, 0x0b0b0b0b, 0xf4f4f4f5, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x11110c0c,
+        0xeeeef3f4, 0x0c0c1111, 0xf3f3eeef, 0x11111111, 0xeeeeeeef, 0x12120606, 0xededf9fa, 0x06061212,
+        0xf9f9edee, 0x0b0af7f8, 0xf4f50808, 0xf7f80b0b, 0x0807f4f5, 0x0f0f0000, 0xf0f10000, 0x00000f0f,
+        0xfffff0f1, 0x14140000, 0xebec0000, 0x00001414, 0xffffebec, 0x19191212, 0xe6e6edee, 0x12121919,
+        0xedede6e7, 0x19190b0b, 0xe6e6f4f5, 0x0b0b1919, 0xf4f4e6e7, 0x19191919, 0xe6e6e6e7, 0x0e0df1f2,
+        0xf1f20e0e, 0xf1f20e0e, 0x0e0df1f2, 0x1a1a0000, 0xe5e60000, 0x00001a1a, 0xffffe5e6, 0x1211f4f5,
+        0xedee0b0b, 0xf4f51212, 0x0b0aedee, 0x1615f8f9, 0xe9ea0707, 0xf8f91616, 0x0706e9ea, 0x22221a1a,
+        0xdddde5e6, 0x1a1a2222, 0xe5e5ddde, 0x22221212, 0xddddedee, 0x12122222, 0xededddde, 0x22222222,
+        0xddddddde, 0x23230b0b, 0xdcdcf4f5, 0x0b0b2323, 0xf4f4dcdd, 0x1d1d0000, 0xe2e30000, 0x00001d1d,
+        0xffffe2e3, 0x1615eced, 0xe9ea1313, 0xeced1616, 0x1312e9ea, 0x1a19f0f1, 0xe5e60f0f, 0xf0f11a1a,
+        0x0f0ee5e6, 0x25250000, 0xdadb0000, 0x00002525, 0xffffdadb, 0x2c2c1b1b, 0xd3d3e4e5, 0x1b1b2c2c,
+        0xe4e4d3d4, 0x2c2c2424, 0xd3d3dbdc, 0x24242c2c, 0xdbdbd3d4, 0x2c2c1212, 0xd3d3edee, 0x12122c2c,
+        0xededd3d4, 0x2120f5f6, 0xdedf0a0a, 0xf5f62121, 0x0a09dedf, 0x2d2d2d2d, 0xd2d2d2d3, 0x00000000,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606,
+        0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000,
+        0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000,
+        0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd,
+        0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000,
+        0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa,
+        0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303,
+        0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe,
+        0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606,
+        0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x03030000, 0xfcfd0000,
+        0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000, 0xf8f90000, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x07070000,
+        0xf8f90000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0403fbfc, 0xfbfc0404, 0xf9fa0a0a,
+        0x0605f5f6, 0xf3f40000, 0x0c0c0000, 0xf3f3f9fa, 0xf3f40606, 0x0c0bf9fa, 0x0c0c0606, 0xfffff1f2,
+        0x00000e0e, 0x0c0c0c0c, 0xf3f3f3f4, 0xedee0000, 0x12120000, 0xf3f40e0e, 0x0c0bf1f2, 0xf9f9edee,
+        0xf9fa1212, 0x0605edee, 0x06061212, 0xededf5f6, 0xedee0a0a, 0x1211f5f6, 0x12120a0a, 0xffffe9ea,
+        0x00001616, 0xe7e80000, 0x18180000, 0xf3f3e9ea, 0xf3f41616, 0x0c0be9ea, 0x0c0c1616, 0xe7e7f7f8,
+        0xe7e80808, 0x1817f7f8, 0x18180808, 0xf9f9e5e6, 0xf9fa1a1a, 0x0605e5e6, 0x06061a1a, 0xffffe3e4,
+        0x00001c1c, 0x14141414, 0xebebebec, 0xe5e5f1f2, 0x1a1a0e0e, 0xf3f3e1e2, 0x0c0c1e1e, 0xdfdff5f6,
+        0x20200a0a, 0xdfdfedee, 0x20201212, 0xe5e5e5e6, 0x1a1a1a1a, 0xebebddde, 0x14142222, 0xf3f3d9da,
+        0x0c0c2626, 0xdfdfdfe0, 0x20202020, 0x20202020, 0xd7d7e9ea, 0xddddddde, 0x22222222, 0x00000000,
+        0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202,
+        0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606,
+        0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe,
+        0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000,
+        0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000,
+        0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa,
+        0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000,
+        0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000,
+        0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202,
+        0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000,
+        0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202,
+        0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606,
+        0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe,
+        0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000,
+        0xfdfe0000, 0x00000202, 0xfffffdfe, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000,
+        0xf9fa0000, 0x00000606, 0xfffff9fa, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a,
+        0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x0605f9fa, 0xf9fa0606, 0xf7f80e0e,
+        0x0807f1f2, 0xffffedee, 0x00001212, 0xeff00a0a, 0x100ff5f6, 0xe7e80000, 0x18180000, 0xf7f7e7e8,
+        0xf7f81818, 0x0807e7e8, 0x08081818, 0x12121212, 0xedededee, 0xeff01414, 0x100febec, 0xe5e5f1f2,
+        0xe5e60e0e, 0x1a19f1f2, 0x1a1a0e0e, 0xffffe1e2, 0x00001e1e, 0xddde0000, 0x22220000, 0xf7f7ddde,
+        0xf7f82222, 0x0807ddde, 0x08082222, 0xedede1e2, 0xedee1e1e, 0x1211e1e2, 0x12121e1e, 0xddddf5f6,
+        0xddde0a0a, 0x2221f5f6, 0x22220a0a, 0xddddebec, 0x22221414, 0xffffd7d8, 0x00002828, 0x1e1e1e1e,
+        0xe1e1e1e2, 0xededd7d8, 0x12122828, 0xd3d40000, 0x2c2c0000, 0xd3d3eff0, 0x2c2c1010, 0xdbdbdbdc,
+        0xdbdbdbdc, 0x24242424, 0xd3d3e5e6, 0x2c2c1a1a, 0xe5e5d1d2, 0x1a1a2e2e, 0xededcbcc, 0x12123434,
+        0xc9c9ebec, 0xd3d3d3d4, 0x2c2c2c2c, 0xc9c9dfe0, 0xd1d1d1d2, 0xd1d1d1d2, 0x2e2e2e2e, 0x00000000,
+        0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6,
+        0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202,
+        0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a,
+        0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc,
+        0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000,
+        0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000,
+        0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe,
+        0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6,
+        0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a,
+        0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000,
+        0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000,
+        0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404,
+        0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000,
+        0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6,
+        0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202,
+        0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a,
+        0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc,
+        0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000,
+        0xfdfe0000, 0x00000202, 0xfffffdfe, 0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000,
+        0xf5f60000, 0x00000a0a, 0xfffff5f6, 0x00000000, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe,
+        0x04040404, 0xfbfbfbfc, 0x0a0a0a0a, 0xf5f5f5f6, 0x0a0a0000, 0xf5f60000, 0x00000a0a, 0xfffff5f6,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c,
+        0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x0807f7f8, 0xf7f80808, 0xeff00808,
+        0x100ff7f8, 0xe7e80000, 0x18180000, 0xf7f7e7e8, 0xf7f81818, 0x0807e7e8, 0x08081818, 0xeff01414,
+        0x100febec, 0xffffe3e4, 0x00001c1c, 0xe7e7eff0, 0xe7e81010, 0x1817eff0, 0x18181010, 0xdfe00000,
+        0x20200000, 0xefefe3e4, 0xeff01c1c, 0x100fe3e4, 0x10101c1c, 0xdfdff7f8, 0xdfe00808, 0xf7f7dfe0,
+        0xf7f82020, 0x0807dfe0, 0x08082020, 0x201ff7f8, 0x20200808, 0x18181818, 0xe7e7e7e8, 0xe7e81818,
+        0x1817e7e8, 0xdfdfebec, 0x20201414, 0xffffd7d8, 0x00002828, 0xefefd7d8, 0x10102828, 0xd3d40000,
+        0xd3d40000, 0xffffd3d4, 0x00002c2c, 0x2c2c0000, 0x2c2c0000, 0xdfdfdfe0, 0x20202020, 0xd3d3eff0,
+        0x2c2c1010, 0xd3d3e7e8, 0xe7e7d3d4, 0x18182c2c, 0x2c2c1818, 0xefefcfd0, 0x10103030, 0xdbdbdbdc,
+        0xdbdbdbdc, 0x24242424, 0x24242424, 0xcbcbebec, 0x28282828, 0xd7d7d7d8, 0xcbcbdfe0, 0x00000000,
+        0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404,
+        0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c,
+        0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000,
+        0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000,
+        0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc,
+        0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4,
+        0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c,
+        0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000,
+        0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000,
+        0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404,
+        0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000,
+        0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404,
+        0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c,
+        0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000,
+        0xfbfc0000, 0x00000404, 0xfffffbfc, 0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000,
+        0xf3f40000, 0x00000c0c, 0xfffff3f4, 0x00000000, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc,
+        0x04040404, 0xfbfbfbfc, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0000, 0xf3f40000, 0x00000c0c, 0xfffff3f4,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414,
+        0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec,
+        0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e,
+        0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020,
+        0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414,
+        0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe,
+        0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0,
+        0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c,
+        0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec,
+        0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e,
+        0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020,
+        0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414,
+        0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec,
+        0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e,
+        0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020,
+        0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414,
+        0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe,
+        0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0,
+        0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c,
+        0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec,
+        0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e,
+        0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020,
+        0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414,
+        0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec,
+        0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e,
+        0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020,
+        0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414,
+        0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe,
+        0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0,
+        0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c,
+        0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec,
+        0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e,
+        0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020,
+        0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414,
+        0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec,
+        0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e,
+        0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020,
+        0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414,
+        0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe,
+        0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0,
+        0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c,
+        0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec,
+        0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606,
+        0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e,
+        0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4,
+        0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x06060606, 0xf9f9f9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020,
+        0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x06060606, 0xf9f9f9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x14141414, 0xebebebec, 0x20202020, 0xdfdfdfe0, 0x2e2e2e2e, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
 };
 
 
 static const uint32_t correctionhighorder[] = {
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 
-	0xfeff0303, 0x0100fcfd, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 
-	0x0100fcfd, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 
-	0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x00000000, 
-	0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x00000000, 0x02020202, 
-	0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x00000000, 0x02020202, 0xfdfdfdfe, 
-	0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 
-	0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, 
-	0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 
-	0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 
-	0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 
-	0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 
-	0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 
-	0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd, 
-	0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303, 
-	0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000, 
-	0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x04040404, 0xfbfbfbfc, 
-	0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, 
-	0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 
-	0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 
-	0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 
-	0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, 
-	0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, 
-	0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 
-	0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 
-	0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 
-	0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, 
-	0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, 
-	0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 
-	0x03030a0a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 
-	0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, 
-	0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 
-	0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 
-	0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 
-	0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 
-	0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 
-	0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 
-	0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 
-	0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 
-	0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 
-	0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 
-	0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, 
-	0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 
-	0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 
-	0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x06060606, 0xf9f9f9fa, 
-	0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 
-	0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 
-	0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 
-	0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 
-	0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 
-	0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 
-	0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 
-	0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 
-	0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 
-	0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 
-	0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 
-	0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 
-	0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 
-	0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 
-	0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 
-	0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 
-	0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 
-	0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x07070707, 0xf8f8f8f9, 
-	0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 
-	0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 
-	0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 
-	0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 
-	0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 
-	0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 
-	0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 
-	0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 
-	0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 
-	0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 
-	0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 
-	0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 
-	0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 
-	0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 
-	0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 
-	0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 
-	0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 
-	0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 
-	0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 
-	0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 
-	0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 
-	0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 
-	0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 
-	0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 
-	0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 
-	0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 
-	0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 
-	0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 
-	0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 
-	0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 
-	0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 
-	0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 
-	0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 
-	0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 
-	0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 
-	0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x09090909, 0xf6f6f6f7, 
-	0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 
-	0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 
-	0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 
-	0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 
-	0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 
-	0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 
-	0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 
-	0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 
-	0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 
-	0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 
-	0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 
-	0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 
-	0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 
-	0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 
-	0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 
-	0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 
-	0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 
-	0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 
-	0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 
-	0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 
-	0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 
-	0x00000202, 0xfffffdfe, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 
-	0xfffffdfe, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 
-	0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x00000000, 
-	0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x00000000, 0x02020202, 
-	0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x00000000, 0x02020202, 0xfdfdfdfe, 
-	0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 
-	0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 
-	0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 
-	0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 
-	0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 
-	0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 
-	0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 
-	0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd, 
-	0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303, 
-	0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000, 
-	0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x04040404, 0xfbfbfbfc, 
-	0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, 
-	0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 
-	0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 
-	0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 
-	0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, 
-	0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, 
-	0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 
-	0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 
-	0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 
-	0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, 
-	0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, 
-	0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 
-	0x04040808, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 
-	0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, 
-	0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 
-	0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 
-	0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 
-	0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 
-	0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 
-	0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 
-	0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 
-	0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 
-	0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 
-	0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 
-	0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, 
-	0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 
-	0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 
-	0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x06060606, 0xf9f9f9fa, 
-	0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 
-	0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 
-	0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 
-	0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 
-	0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 
-	0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 
-	0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 
-	0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 
-	0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 
-	0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 
-	0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 
-	0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 
-	0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 
-	0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 
-	0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 
-	0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 
-	0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 
-	0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x07070707, 0xf8f8f8f9, 
-	0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 
-	0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 
-	0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 
-	0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 
-	0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 
-	0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 
-	0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 
-	0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 
-	0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 
-	0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 
-	0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 
-	0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 
-	0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 
-	0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 
-	0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 
-	0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 
-	0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 
-	0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 
-	0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 
-	0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 
-	0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 
-	0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 
-	0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 
-	0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 
-	0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 
-	0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 
-	0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 
-	0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 
-	0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 
-	0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 
-	0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 
-	0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 
-	0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 
-	0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 
-	0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 
-	0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x09090909, 0xf6f6f6f7, 
-	0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 
-	0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 
-	0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 
-	0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 
-	0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 
-	0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 
-	0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 
-	0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 
-	0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 
-	0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 
-	0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 
-	0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 
-	0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 
-	0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 
-	0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 
-	0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 
-	0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 
-	0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 
-	0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 
-	0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 
-	0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 
-	0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 
-	0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 
-	0xfdfdfdfe, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 
-	0x03030000, 0x03030000, 0x03030000, 0x03030000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 
-	0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0x00000303, 
-	0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303, 
-	0x00000303, 0x00000303, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 
-	0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0x06060606, 0x06060606, 0x06060606, 
-	0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 
-	0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 
-	0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 
-	0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0xf8f90000, 0xf8f90000, 
-	0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 
-	0xf8f90000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 
-	0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 
-	0x02020000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 
-	0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0x00000202, 0x00000202, 
-	0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 
-	0x00000202, 0x00000202, 0x00000202, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 
-	0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 
-	0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 
-	0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 
-	0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 
-	0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 
-	0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 
-	0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 
-	0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 
-	0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 
-	0x06060000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 
-	0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0x00000606, 0x00000606, 
-	0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 
-	0x00000606, 0x00000606, 0x00000606, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 
-	0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 
-	0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 
-	0x02020000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 
-	0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0x00000202, 0x00000202, 
-	0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 
-	0x00000202, 0x00000202, 0x00000202, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 
-	0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 
-	0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 
-	0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 
-	0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 
-	0xfbfbfbfc, 0xfbfbfbfc, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 
-	0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0xf5f5f5f6, 
-	0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 
-	0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 
-	0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 
-	0x0a0a0000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 
-	0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0x00000a0a, 0x00000a0a, 
-	0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a, 
-	0x00000a0a, 0x00000a0a, 0x00000a0a, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 
-	0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 
-	0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 
-	0x04040000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 
-	0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0x00000404, 0x00000404, 
-	0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 
-	0x00000404, 0x00000404, 0x00000404, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 
-	0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 
-	0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 
-	0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 
-	0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 
-	0xfbfbfbfc, 0xfbfbfbfc, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 
-	0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 
-	0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 
-	0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 
-	0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 
-	0x0c0c0000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 
-	0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0x00000c0c, 0x00000c0c, 
-	0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c, 
-	0x00000c0c, 0x00000c0c, 0x00000c0c, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 
-	0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 
-	0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 
-	0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 
-	0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606, 
-	0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 
-	0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 
-	0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 
-	0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 
-	0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 
-	0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 
-	0xf3f3f3f4, 0xf3f3f3f4, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 
-	0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec, 
-	0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 
-	0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 
-	0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 
-	0x20202020, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 
-	0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0x2e2e2e2e, 0x2e2e2e2e, 
-	0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 
-	0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 
-	0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 
-	0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 
-	0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 
-	0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606, 
-	0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 
-	0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 
-	0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 
-	0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 
-	0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 
-	0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 
-	0xf3f3f3f4, 0xf3f3f3f4, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 
-	0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec, 
-	0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 
-	0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 
-	0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 
-	0x20202020, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 
-	0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0x2e2e2e2e, 0x2e2e2e2e, 
-	0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 
-	0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 
-	0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 
-	0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 
-	0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 
-	0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606, 
-	0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 
-	0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 
-	0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 
-	0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 
-	0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 
-	0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 
-	0xf3f3f3f4, 0xf3f3f3f4, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 
-	0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec, 
-	0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 
-	0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 
-	0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 
-	0x20202020, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 
-	0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0x2e2e2e2e, 0x2e2e2e2e, 
-	0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 
-	0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 
-	0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 
-	0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 
-	0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 
-	0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 
-	0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606, 
-	0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 
-	0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 
-	0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 
-	0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 
-	0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 
-	0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 
-	0xf3f3f3f4, 0xf3f3f3f4, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 
-	0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec, 
-	0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 
-	0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 
-	0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 
-	0x20202020, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 
-	0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0x2e2e2e2e, 0x2e2e2e2e, 
-	0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 
-	0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 
-	0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 
-	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101,
+        0xfeff0303, 0x0100fcfd, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303,
+        0x0100fcfd, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x00000000, 0x02020202, 0xfdfdfdfe,
+        0x0302feff, 0xfcfd0101, 0xfeff0303, 0x0100fcfd, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9,
+        0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707,
+        0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc,
+        0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404,
+        0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101,
+        0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x0403feff,
+        0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303, 0xfcfcfcfd,
+        0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000, 0x03030303,
+        0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9, 0x00000000,
+        0x03030303, 0xfcfcfcfd, 0x0403feff, 0xfbfc0101, 0xfeff0404, 0x0100fbfc, 0x07070707, 0xf8f8f8f9,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x04040404, 0xfbfbfbfc,
+        0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000,
+        0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd,
+        0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb,
+        0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101,
+        0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc,
+        0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000,
+        0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd,
+        0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb,
+        0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101,
+        0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000, 0x04040404, 0xfbfbfbfc,
+        0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd, 0x03030a0a, 0x00000000,
+        0x04040404, 0xfbfbfbfc, 0x0504feff, 0xfafb0101, 0xfeff0505, 0x0100fafb, 0x0a0a0303, 0xf5f5fcfd,
+        0x03030a0a, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202,
+        0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505,
+        0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303,
+        0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9,
+        0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe,
+        0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000,
+        0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5,
+        0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707,
+        0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb,
+        0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd,
+        0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b,
+        0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202,
+        0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x05050505,
+        0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9, 0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303,
+        0xf2f2fcfd, 0x00000000, 0x05050505, 0xfafafafb, 0x0706fdfe, 0xf8f90202, 0xfdfe0707, 0x0201f8f9,
+        0x0b0b0b0b, 0xf4f4f4f5, 0x0d0d0303, 0xf2f2fcfd, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc,
+        0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8,
+        0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc,
+        0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8,
+        0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc,
+        0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8,
+        0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc,
+        0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8,
+        0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc,
+        0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8,
+        0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8, 0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc,
+        0x04040f0f, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x0807fdfe, 0xf7f80202, 0xfdfe0808, 0x0201f7f8,
+        0x0d0d0d0d, 0xf2f2f2f3, 0x0f0f0404, 0xf0f0fbfc, 0x04040f0f, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb,
+        0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6,
+        0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb,
+        0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6,
+        0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb,
+        0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6,
+        0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb,
+        0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6,
+        0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb,
+        0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6,
+        0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6, 0x10101010, 0xefefeff0, 0x12120505, 0xededfafb,
+        0x05051212, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x0a09fcfd, 0xf5f60303, 0xfcfd0a0a, 0x0302f5f6,
+        0x10101010, 0xefefeff0, 0x12120505, 0xededfafb, 0x05051212, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee,
+        0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303,
+        0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee,
+        0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303,
+        0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee,
+        0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303,
+        0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee,
+        0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303,
+        0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee,
+        0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303,
+        0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303, 0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee,
+        0x14140505, 0xebebfafb, 0x05051414, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x0b0afcfd, 0xf4f50303,
+        0xfcfd0b0b, 0x0302f4f5, 0x12121212, 0xedededee, 0x14140505, 0xebebfafb, 0x05051414, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x09090909, 0xf6f6f6f7,
+        0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa,
+        0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c,
+        0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000,
+        0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec,
+        0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd,
+        0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717,
+        0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4,
+        0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909,
+        0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606,
+        0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303,
+        0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9,
+        0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414,
+        0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7,
+        0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa,
+        0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c,
+        0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000,
+        0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec,
+        0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd,
+        0xf3f40303, 0xfcfd0c0c, 0x0302f3f4, 0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717,
+        0xf9f9e8e9, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x0c0bfcfd, 0xf3f40303, 0xfcfd0c0c, 0x0302f3f4,
+        0x14141414, 0xebebebec, 0x17170606, 0xe8e8f9fa, 0x06061717, 0xf9f9e8e9, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000,
+        0x00000202, 0xfffffdfe, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202,
+        0xfffffdfe, 0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe,
+        0x00000000, 0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x00000000,
+        0x02020202, 0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x00000000, 0x02020202,
+        0xfdfdfdfe, 0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x00000000, 0x02020202, 0xfdfdfdfe,
+        0x02020000, 0xfdfe0000, 0x00000202, 0xfffffdfe, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa,
+        0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606,
+        0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd,
+        0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303,
+        0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000,
+        0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd, 0x03030000,
+        0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303, 0xfcfcfcfd,
+        0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000, 0x03030303,
+        0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa, 0x00000000,
+        0x03030303, 0xfcfcfcfd, 0x03030000, 0xfcfd0000, 0x00000303, 0xfffffcfd, 0x06060606, 0xf9f9f9fa,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x04040404, 0xfbfbfbfc,
+        0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000,
+        0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc,
+        0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc,
+        0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000,
+        0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc,
+        0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000,
+        0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc,
+        0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc,
+        0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000,
+        0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000, 0x04040404, 0xfbfbfbfc,
+        0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc, 0x04040808, 0x00000000,
+        0x04040404, 0xfbfbfbfc, 0x04040000, 0xfbfc0000, 0x00000404, 0xfffffbfc, 0x08080404, 0xf7f7fbfc,
+        0x04040808, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000,
+        0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505,
+        0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505,
+        0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb,
+        0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000,
+        0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000,
+        0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6,
+        0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505,
+        0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb,
+        0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb,
+        0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a,
+        0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000,
+        0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x05050505,
+        0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb, 0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505,
+        0xf0f0fafb, 0x00000000, 0x05050505, 0xfafafafb, 0x05050000, 0xfafb0000, 0x00000505, 0xfffffafb,
+        0x0a0a0a0a, 0xf5f5f5f6, 0x0f0f0505, 0xf0f0fafb, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa,
+        0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa,
+        0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa,
+        0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa,
+        0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa,
+        0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa,
+        0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa, 0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa,
+        0x06060c0c, 0x00000000, 0x06060606, 0xf9f9f9fa, 0x06060000, 0xf9fa0000, 0x00000606, 0xfffff9fa,
+        0x0c0c0c0c, 0xf3f3f3f4, 0x0c0c0606, 0xf3f3f9fa, 0x06060c0c, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9,
+        0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9,
+        0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9,
+        0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9,
+        0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9,
+        0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9,
+        0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9,
+        0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9,
+        0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9,
+        0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9,
+        0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9,
+        0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9, 0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9,
+        0x07071515, 0x00000000, 0x07070707, 0xf8f8f8f9, 0x07070000, 0xf8f90000, 0x00000707, 0xfffff8f9,
+        0x0e0e0e0e, 0xf1f1f1f2, 0x15150707, 0xeaeaf8f9, 0x07071515, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0,
+        0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000,
+        0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0,
+        0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000,
+        0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0,
+        0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000,
+        0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0,
+        0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000,
+        0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0,
+        0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000,
+        0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000,
+        0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000, 0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0,
+        0x10100808, 0xefeff7f8, 0x08081010, 0x00000000, 0x08080808, 0xf7f7f7f8, 0x08080000, 0xf7f80000,
+        0x00000808, 0xfffff7f8, 0x10101010, 0xefefeff0, 0x10100808, 0xefeff7f8, 0x08081010, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000, 0x09090909, 0xf6f6f6f7,
+        0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7,
+        0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909,
+        0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000,
+        0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee,
+        0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000,
+        0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b,
+        0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7,
+        0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909,
+        0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909,
+        0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000,
+        0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5,
+        0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212,
+        0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7,
+        0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7,
+        0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909,
+        0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000,
+        0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee,
+        0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000,
+        0xf6f70000, 0x00000909, 0xfffff6f7, 0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b,
+        0xf6f6e4e5, 0x00000000, 0x09090909, 0xf6f6f6f7, 0x09090000, 0xf6f70000, 0x00000909, 0xfffff6f7,
+        0x12121212, 0xedededee, 0x1b1b0909, 0xe4e4f6f7, 0x09091b1b, 0xf6f6e4e5, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0xfdfdfdfe, 0xfdfdfdfe,
+        0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe,
+        0xfdfdfdfe, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000, 0x03030000,
+        0x03030000, 0x03030000, 0x03030000, 0x03030000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000,
+        0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0xfcfd0000, 0x00000303,
+        0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303, 0x00000303,
+        0x00000303, 0x00000303, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd,
+        0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0xfffffcfd, 0x06060606, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000,
+        0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0x07070000, 0xf8f90000, 0xf8f90000,
+        0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000, 0xf8f90000,
+        0xf8f90000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020000, 0x02020000, 0x02020000, 0x02020000,
+        0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000,
+        0x02020000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000,
+        0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0x00000202, 0x00000202,
+        0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202,
+        0x00000202, 0x00000202, 0x00000202, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe,
+        0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe,
+        0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe,
+        0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe,
+        0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0x06060000, 0x06060000, 0x06060000, 0x06060000,
+        0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000, 0x06060000,
+        0x06060000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000,
+        0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0xf9fa0000, 0x00000606, 0x00000606,
+        0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606, 0x00000606,
+        0x00000606, 0x00000606, 0x00000606, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa,
+        0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa, 0xfffff9fa,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020000, 0x02020000, 0x02020000, 0x02020000,
+        0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000, 0x02020000,
+        0x02020000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000,
+        0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0xfdfe0000, 0x00000202, 0x00000202,
+        0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202, 0x00000202,
+        0x00000202, 0x00000202, 0x00000202, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe,
+        0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe, 0xfffffdfe,
+        0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404,
+        0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc,
+        0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc,
+        0xfbfbfbfc, 0xfbfbfbfc, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a,
+        0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0x0a0a0a0a, 0xf5f5f5f6,
+        0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6,
+        0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0xf5f5f5f6, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000,
+        0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000, 0x0a0a0000,
+        0x0a0a0000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000,
+        0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0xf5f60000, 0x00000a0a, 0x00000a0a,
+        0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a, 0x00000a0a,
+        0x00000a0a, 0x00000a0a, 0x00000a0a, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6,
+        0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6, 0xfffff5f6,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x04040000, 0x04040000, 0x04040000, 0x04040000,
+        0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000, 0x04040000,
+        0x04040000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000,
+        0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0xfbfc0000, 0x00000404, 0x00000404,
+        0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404, 0x00000404,
+        0x00000404, 0x00000404, 0x00000404, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc,
+        0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc, 0xfffffbfc,
+        0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404,
+        0x04040404, 0x04040404, 0x04040404, 0x04040404, 0x04040404, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc,
+        0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc, 0xfbfbfbfc,
+        0xfbfbfbfc, 0xfbfbfbfc, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000,
+        0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000, 0x0c0c0000,
+        0x0c0c0000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000,
+        0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0xf3f40000, 0x00000c0c, 0x00000c0c,
+        0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c, 0x00000c0c,
+        0x00000c0c, 0x00000c0c, 0x00000c0c, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4,
+        0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4, 0xfffff3f4,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe,
+        0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414,
+        0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec,
+        0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec,
+        0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0x20202020, 0x20202020, 0x20202020, 0x20202020,
+        0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020,
+        0x20202020, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0,
+        0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0x2e2e2e2e, 0x2e2e2e2e,
+        0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e,
+        0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2,
+        0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe,
+        0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414,
+        0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec,
+        0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec,
+        0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0x20202020, 0x20202020, 0x20202020, 0x20202020,
+        0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020,
+        0x20202020, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0,
+        0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0x2e2e2e2e, 0x2e2e2e2e,
+        0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e,
+        0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2,
+        0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe,
+        0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414,
+        0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec,
+        0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec,
+        0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0x20202020, 0x20202020, 0x20202020, 0x20202020,
+        0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020,
+        0x20202020, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0,
+        0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0x2e2e2e2e, 0x2e2e2e2e,
+        0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e,
+        0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2,
+        0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef,
+        0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0xdeadbeef, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202, 0x02020202,
+        0x02020202, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe,
+        0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0xfdfdfdfe, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606, 0x06060606,
+        0x06060606, 0x06060606, 0x06060606, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa, 0xf9f9f9fa,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c,
+        0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0x0c0c0c0c, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4, 0xf3f3f3f4,
+        0xf3f3f3f4, 0xf3f3f3f4, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414,
+        0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0x14141414, 0xebebebec,
+        0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec,
+        0xebebebec, 0xebebebec, 0xebebebec, 0xebebebec, 0x20202020, 0x20202020, 0x20202020, 0x20202020,
+        0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020, 0x20202020,
+        0x20202020, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0,
+        0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0xdfdfdfe0, 0x2e2e2e2e, 0x2e2e2e2e,
+        0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e,
+        0x2e2e2e2e, 0x2e2e2e2e, 0x2e2e2e2e, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2,
+        0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2, 0xd1d1d1d2,
+        0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000
 };
diff --git a/src/libffmpeg/libavcodec/interplayvideo.c b/src/libffmpeg/libavcodec/interplayvideo.c
index f4add08c0..73165e795 100644
--- a/src/libffmpeg/libavcodec/interplayvideo.c
+++ b/src/libffmpeg/libavcodec/interplayvideo.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -846,7 +846,7 @@ static void ipvideo_decode_opcodes(IpvideoContext *s)
     }
     if ((s->stream_ptr != s->stream_end) &&
         (s->stream_ptr + 1 != s->stream_end)) {
-        av_log(s->avctx, AV_LOG_ERROR, " Interplay video: decode finished with %d bytes left over\n",
+        av_log(s->avctx, AV_LOG_ERROR, " Interplay video: decode finished with %td bytes left over\n",
             s->stream_end - s->stream_ptr);
     }
 }
diff --git a/src/libffmpeg/libavcodec/jfdctfst.c b/src/libffmpeg/libavcodec/jfdctfst.c
index a393c5ca8..804fd5766 100644
--- a/src/libffmpeg/libavcodec/jfdctfst.c
+++ b/src/libffmpeg/libavcodec/jfdctfst.c
@@ -34,7 +34,7 @@
  * @file jfdctfst.c
  * Independent JPEG Group's fast AAN dct.
  */
- 
+
 #include <stdlib.h>
 #include <stdio.h>
 #include "common.h"
@@ -83,10 +83,10 @@
  */
 
 #if CONST_BITS == 8
-#define FIX_0_382683433  ((int32_t)   98)		/* FIX(0.382683433) */
-#define FIX_0_541196100  ((int32_t)  139)		/* FIX(0.541196100) */
-#define FIX_0_707106781  ((int32_t)  181)		/* FIX(0.707106781) */
-#define FIX_1_306562965  ((int32_t)  334)		/* FIX(1.306562965) */
+#define FIX_0_382683433  ((int32_t)   98)       /* FIX(0.382683433) */
+#define FIX_0_541196100  ((int32_t)  139)       /* FIX(0.541196100) */
+#define FIX_0_707106781  ((int32_t)  181)       /* FIX(0.707106781) */
+#define FIX_1_306562965  ((int32_t)  334)       /* FIX(1.306562965) */
 #else
 #define FIX_0_382683433  FIX(0.382683433)
 #define FIX_0_541196100  FIX(0.541196100)
@@ -132,42 +132,42 @@ static always_inline void row_fdct(DCTELEM * data){
     tmp5 = dataptr[2] - dataptr[5];
     tmp3 = dataptr[3] + dataptr[4];
     tmp4 = dataptr[3] - dataptr[4];
-    
+
     /* Even part */
-    
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
+
+    tmp10 = tmp0 + tmp3;        /* phase 2 */
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     dataptr[0] = tmp10 + tmp11; /* phase 3 */
     dataptr[4] = tmp10 - tmp11;
-    
+
     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
-    dataptr[2] = tmp13 + z1;	/* phase 5 */
+    dataptr[2] = tmp13 + z1;    /* phase 5 */
     dataptr[6] = tmp13 - z1;
-    
+
     /* Odd part */
 
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp10 = tmp4 + tmp5;        /* phase 2 */
     tmp11 = tmp5 + tmp6;
     tmp12 = tmp6 + tmp7;
 
     /* The rotator is modified from fig 4-8 to avoid extra negations. */
     z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
-    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
-    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
-    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5;    /* c2-c6 */
+    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5;    /* c2+c6 */
+    z3 = MULTIPLY(tmp11, FIX_0_707106781);         /* c4 */
 
-    z11 = tmp7 + z3;		/* phase 5 */
+    z11 = tmp7 + z3;            /* phase 5 */
     z13 = tmp7 - z3;
 
-    dataptr[5] = z13 + z2;	/* phase 6 */
+    dataptr[5] = z13 + z2;      /* phase 6 */
     dataptr[3] = z13 - z2;
     dataptr[1] = z11 + z4;
     dataptr[7] = z11 - z4;
 
-    dataptr += DCTSIZE;		/* advance pointer to next row */
+    dataptr += DCTSIZE;         /* advance pointer to next row */
   }
 }
 
@@ -186,7 +186,7 @@ fdct_ifast (DCTELEM * data)
   SHIFT_TEMPS
 
   row_fdct(data);
-  
+
   /* Pass 2: process columns. */
 
   dataptr = data;
@@ -199,24 +199,24 @@ fdct_ifast (DCTELEM * data)
     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
     tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-    
+
     /* Even part */
-    
-    tmp10 = tmp0 + tmp3;	/* phase 2 */
+
+    tmp10 = tmp0 + tmp3;        /* phase 2 */
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     dataptr[DCTSIZE*0] = tmp10 + tmp11; /* phase 3 */
     dataptr[DCTSIZE*4] = tmp10 - tmp11;
-    
+
     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
     dataptr[DCTSIZE*2] = tmp13 + z1; /* phase 5 */
     dataptr[DCTSIZE*6] = tmp13 - z1;
-    
+
     /* Odd part */
 
-    tmp10 = tmp4 + tmp5;	/* phase 2 */
+    tmp10 = tmp4 + tmp5;        /* phase 2 */
     tmp11 = tmp5 + tmp6;
     tmp12 = tmp6 + tmp7;
 
@@ -226,7 +226,7 @@ fdct_ifast (DCTELEM * data)
     z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
     z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
 
-    z11 = tmp7 + z3;		/* phase 5 */
+    z11 = tmp7 + z3;            /* phase 5 */
     z13 = tmp7 - z3;
 
     dataptr[DCTSIZE*5] = z13 + z2; /* phase 6 */
@@ -234,7 +234,7 @@ fdct_ifast (DCTELEM * data)
     dataptr[DCTSIZE*1] = z11 + z4;
     dataptr[DCTSIZE*7] = z11 - z4;
 
-    dataptr++;			/* advance pointer to next column */
+    dataptr++;                  /* advance pointer to next column */
   }
 }
 
@@ -253,7 +253,7 @@ fdct_ifast248 (DCTELEM * data)
   SHIFT_TEMPS
 
   row_fdct(data);
-   
+
   /* Pass 2: process columns. */
 
   dataptr = data;
@@ -268,15 +268,15 @@ fdct_ifast248 (DCTELEM * data)
     tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
 
     /* Even part */
-    
+
     tmp10 = tmp0 + tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
     tmp13 = tmp0 - tmp3;
-    
+
     dataptr[DCTSIZE*0] = tmp10 + tmp11;
     dataptr[DCTSIZE*4] = tmp10 - tmp11;
-    
+
     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
     dataptr[DCTSIZE*2] = tmp13 + z1;
     dataptr[DCTSIZE*6] = tmp13 - z1;
@@ -285,15 +285,15 @@ fdct_ifast248 (DCTELEM * data)
     tmp11 = tmp5 + tmp6;
     tmp12 = tmp5 - tmp6;
     tmp13 = tmp4 - tmp7;
-    
+
     dataptr[DCTSIZE*1] = tmp10 + tmp11;
     dataptr[DCTSIZE*5] = tmp10 - tmp11;
-    
+
     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781);
     dataptr[DCTSIZE*3] = tmp13 + z1;
     dataptr[DCTSIZE*7] = tmp13 - z1;
-    
-    dataptr++;			/* advance pointer to next column */
+
+    dataptr++;                        /* advance pointer to next column */
   }
 }
 
diff --git a/src/libffmpeg/libavcodec/jfdctint.c b/src/libffmpeg/libavcodec/jfdctint.c
index 1fbd85b28..41d274991 100644
--- a/src/libffmpeg/libavcodec/jfdctint.c
+++ b/src/libffmpeg/libavcodec/jfdctint.c
@@ -27,7 +27,7 @@
  * @file jfdctint.c
  * Independent JPEG Group's slow & accurate dct.
  */
- 
+
 #include <stdlib.h>
 #include <stdio.h>
 #include "common.h"
@@ -92,10 +92,10 @@
 
 #if BITS_IN_JSAMPLE == 8
 #define CONST_BITS  13
-#define PASS1_BITS  4		/* set this to 2 if 16x16 multiplies are faster */
+#define PASS1_BITS  4   /* set this to 2 if 16x16 multiplies are faster */
 #else
 #define CONST_BITS  13
-#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#define PASS1_BITS  1   /* lose a little precision to avoid overflow */
 #endif
 
 /* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
@@ -106,18 +106,18 @@
  */
 
 #if CONST_BITS == 13
-#define FIX_0_298631336  ((int32_t)  2446)	/* FIX(0.298631336) */
-#define FIX_0_390180644  ((int32_t)  3196)	/* FIX(0.390180644) */
-#define FIX_0_541196100  ((int32_t)  4433)	/* FIX(0.541196100) */
-#define FIX_0_765366865  ((int32_t)  6270)	/* FIX(0.765366865) */
-#define FIX_0_899976223  ((int32_t)  7373)	/* FIX(0.899976223) */
-#define FIX_1_175875602  ((int32_t)  9633)	/* FIX(1.175875602) */
-#define FIX_1_501321110  ((int32_t)  12299)	/* FIX(1.501321110) */
-#define FIX_1_847759065  ((int32_t)  15137)	/* FIX(1.847759065) */
-#define FIX_1_961570560  ((int32_t)  16069)	/* FIX(1.961570560) */
-#define FIX_2_053119869  ((int32_t)  16819)	/* FIX(2.053119869) */
-#define FIX_2_562915447  ((int32_t)  20995)	/* FIX(2.562915447) */
-#define FIX_3_072711026  ((int32_t)  25172)	/* FIX(3.072711026) */
+#define FIX_0_298631336  ((int32_t)  2446)      /* FIX(0.298631336) */
+#define FIX_0_390180644  ((int32_t)  3196)      /* FIX(0.390180644) */
+#define FIX_0_541196100  ((int32_t)  4433)      /* FIX(0.541196100) */
+#define FIX_0_765366865  ((int32_t)  6270)      /* FIX(0.765366865) */
+#define FIX_0_899976223  ((int32_t)  7373)      /* FIX(0.899976223) */
+#define FIX_1_175875602  ((int32_t)  9633)      /* FIX(1.175875602) */
+#define FIX_1_501321110  ((int32_t)  12299)     /* FIX(1.501321110) */
+#define FIX_1_847759065  ((int32_t)  15137)     /* FIX(1.847759065) */
+#define FIX_1_961570560  ((int32_t)  16069)     /* FIX(1.961570560) */
+#define FIX_2_053119869  ((int32_t)  16819)     /* FIX(2.053119869) */
+#define FIX_2_562915447  ((int32_t)  20995)     /* FIX(2.562915447) */
+#define FIX_3_072711026  ((int32_t)  25172)     /* FIX(3.072711026) */
 #else
 #define FIX_0_298631336  FIX(0.298631336)
 #define FIX_0_390180644  FIX(0.390180644)
@@ -170,36 +170,36 @@ static always_inline void row_fdct(DCTELEM * data){
     tmp5 = dataptr[2] - dataptr[5];
     tmp3 = dataptr[3] + dataptr[4];
     tmp4 = dataptr[3] - dataptr[4];
-    
+
     /* Even part per LL&M figure 1 --- note that published figure is faulty;
      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
      */
-    
+
     tmp10 = tmp0 + tmp3;
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     dataptr[0] = (DCTELEM) ((tmp10 + tmp11) << PASS1_BITS);
     dataptr[4] = (DCTELEM) ((tmp10 - tmp11) << PASS1_BITS);
-    
+
     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
     dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
-				   CONST_BITS-PASS1_BITS);
+                                   CONST_BITS-PASS1_BITS);
     dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
-				   CONST_BITS-PASS1_BITS);
-    
+                                   CONST_BITS-PASS1_BITS);
+
     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
      * cK represents cos(K*pi/16).
      * i0..i3 in the paper are tmp4..tmp7 here.
      */
-    
+
     z1 = tmp4 + tmp7;
     z2 = tmp5 + tmp6;
     z3 = tmp4 + tmp6;
     z4 = tmp5 + tmp7;
     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
-    
+
     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
@@ -208,16 +208,16 @@ static always_inline void row_fdct(DCTELEM * data){
     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-    
+
     z3 += z5;
     z4 += z5;
-    
+
     dataptr[7] = (DCTELEM) DESCALE(tmp4 + z1 + z3, CONST_BITS-PASS1_BITS);
     dataptr[5] = (DCTELEM) DESCALE(tmp5 + z2 + z4, CONST_BITS-PASS1_BITS);
     dataptr[3] = (DCTELEM) DESCALE(tmp6 + z2 + z3, CONST_BITS-PASS1_BITS);
     dataptr[1] = (DCTELEM) DESCALE(tmp7 + z1 + z4, CONST_BITS-PASS1_BITS);
-    
-    dataptr += DCTSIZE;		/* advance pointer to next row */
+
+    dataptr += DCTSIZE;         /* advance pointer to next row */
   }
 }
 
@@ -252,36 +252,36 @@ ff_jpeg_fdct_islow (DCTELEM * data)
     tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*5];
     tmp3 = dataptr[DCTSIZE*3] + dataptr[DCTSIZE*4];
     tmp4 = dataptr[DCTSIZE*3] - dataptr[DCTSIZE*4];
-    
+
     /* Even part per LL&M figure 1 --- note that published figure is faulty;
      * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
      */
-    
+
     tmp10 = tmp0 + tmp3;
     tmp13 = tmp0 - tmp3;
     tmp11 = tmp1 + tmp2;
     tmp12 = tmp1 - tmp2;
-    
+
     dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
     dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
-    
+
     z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
     dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
-					   CONST_BITS+PASS1_BITS);
+                                           CONST_BITS+PASS1_BITS);
     dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
-					   CONST_BITS+PASS1_BITS);
-    
+                                           CONST_BITS+PASS1_BITS);
+
     /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
      * cK represents cos(K*pi/16).
      * i0..i3 in the paper are tmp4..tmp7 here.
      */
-    
+
     z1 = tmp4 + tmp7;
     z2 = tmp5 + tmp6;
     z3 = tmp4 + tmp6;
     z4 = tmp5 + tmp7;
     z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
-    
+
     tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
     tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
     tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
@@ -290,20 +290,20 @@ ff_jpeg_fdct_islow (DCTELEM * data)
     z2 = MULTIPLY(z2, - FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
     z3 = MULTIPLY(z3, - FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
     z4 = MULTIPLY(z4, - FIX_0_390180644); /* sqrt(2) * (c5-c3) */
-    
+
     z3 += z5;
     z4 += z5;
-    
+
     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp4 + z1 + z3,
-					   CONST_BITS+PASS1_BITS);
+                                           CONST_BITS+PASS1_BITS);
     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp5 + z2 + z4,
-					   CONST_BITS+PASS1_BITS);
+                                           CONST_BITS+PASS1_BITS);
     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp6 + z2 + z3,
-					   CONST_BITS+PASS1_BITS);
+                                           CONST_BITS+PASS1_BITS);
     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp7 + z1 + z4,
-					   CONST_BITS+PASS1_BITS);
-    
-    dataptr++;			/* advance pointer to next column */
+                                           CONST_BITS+PASS1_BITS);
+
+    dataptr++;                  /* advance pointer to next column */
   }
 }
 
@@ -339,20 +339,20 @@ ff_fdct248_islow (DCTELEM * data)
      tmp5 = dataptr[DCTSIZE*2] - dataptr[DCTSIZE*3];
      tmp6 = dataptr[DCTSIZE*4] - dataptr[DCTSIZE*5];
      tmp7 = dataptr[DCTSIZE*6] - dataptr[DCTSIZE*7];
-      
+
      tmp10 = tmp0 + tmp3;
      tmp11 = tmp1 + tmp2;
      tmp12 = tmp1 - tmp2;
      tmp13 = tmp0 - tmp3;
-     
+
      dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
      dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
-     
+
      z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
      dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
-				            CONST_BITS+PASS1_BITS);
+                                            CONST_BITS+PASS1_BITS);
      dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
-				            CONST_BITS+PASS1_BITS);
+                                            CONST_BITS+PASS1_BITS);
 
      tmp10 = tmp4 + tmp7;
      tmp11 = tmp5 + tmp6;
@@ -361,13 +361,13 @@ ff_fdct248_islow (DCTELEM * data)
 
      dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp10 + tmp11, PASS1_BITS);
      dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp10 - tmp11, PASS1_BITS);
-     
+
      z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
      dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
-				            CONST_BITS+PASS1_BITS);
+                                            CONST_BITS+PASS1_BITS);
      dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
-				            CONST_BITS+PASS1_BITS);
-    
-     dataptr++;			/* advance pointer to next column */
+                                            CONST_BITS+PASS1_BITS);
+
+     dataptr++;                 /* advance pointer to next column */
   }
 }
diff --git a/src/libffmpeg/libavcodec/jpeg_ls.c b/src/libffmpeg/libavcodec/jpeg_ls.c
new file mode 100644
index 000000000..4b365bb4a
--- /dev/null
+++ b/src/libffmpeg/libavcodec/jpeg_ls.c
@@ -0,0 +1,843 @@
+/*
+ * JPEG-LS encoder and decoder
+ * Copyright (c) 2003 Michael Niedermayer
+ * Copyright (c) 2006 Konstantin Shishkov
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "golomb.h"
+
+/**
+ * @file jpeg_ls.c
+ * JPEG-LS encoder and decoder.
+ */
+
+typedef struct JpeglsContext{
+    AVCodecContext *avctx;
+    AVFrame picture;
+}JpeglsContext;
+
+typedef struct JLSState{
+    int T1, T2, T3;
+    int A[367], B[367], C[365], N[367];
+    int limit, reset, bpp, qbpp, maxval, range;
+    int near, twonear;
+    int run_index[3];
+}JLSState;
+
+static const uint8_t log2_run[32]={
+ 0, 0, 0, 0, 1, 1, 1, 1,
+ 2, 2, 2, 2, 3, 3, 3, 3,
+ 4, 4, 5, 5, 6, 6, 7, 7,
+ 8, 9,10,11,12,13,14,15
+};
+
+/*
+* Uncomment this to significantly speed up decoding of broken JPEG-LS
+* (or test broken JPEG-LS decoder) and slow down ordinary decoding a bit.
+*
+* There is no Golomb code with length >= 32 bits possible, so check and
+* avoid situation of 32 zeros, FFmpeg Golomb decoder is painfully slow
+* on this errors.
+*/
+//#define JLS_BROKEN
+
+/********** Functions for both encoder and decoder **********/
+
+/**
+ * Calculate initial JPEG-LS parameters
+ */
+static void ls_init_state(JLSState *state){
+    int i;
+
+    state->twonear = state->near * 2 + 1;
+    state->range = ((state->maxval + state->twonear - 1) / state->twonear) + 1;
+
+    // QBPP = ceil(log2(RANGE))
+    for(state->qbpp = 0; (1 << state->qbpp) < state->range; state->qbpp++);
+
+    if(state->bpp < 8)
+        state->limit = 16 + 2 * state->bpp - state->qbpp;
+    else
+        state->limit = (4 * state->bpp) - state->qbpp;
+
+    for(i = 0; i < 367; i++) {
+        state->A[i] = (state->range + 32) >> 6;
+        if(state->A[i] < 2)
+            state->A[i] = 2;
+        state->N[i] = 1;
+    }
+
+}
+
+/**
+ * Calculate quantized gradient value, used for context determination
+ */
+static inline int quantize(JLSState *s, int v){ //FIXME optimize
+    if(v==0) return 0;
+    if(v < 0){
+        if(v <= -s->T3) return -4;
+        if(v <= -s->T2) return -3;
+        if(v <= -s->T1) return -2;
+        if(v <  -s->near) return -1;
+        return 0;
+    }else{
+        if(v <= s->near) return 0;
+        if(v <  s->T1) return 1;
+        if(v <  s->T2) return 2;
+        if(v <  s->T3) return 3;
+        return 4;
+    }
+}
+
+/**
+ * Custom value clipping function used in T1, T2, T3 calculation
+ */
+static inline int iso_clip(int v, int vmin, int vmax){
+    if(v > vmax || v < vmin) return vmin;
+    else                     return v;
+}
+
+/**
+ * Calculate JPEG-LS codec values
+ */
+static void reset_ls_coding_parameters(JLSState *s, int reset_all){
+    const int basic_t1= 3;
+    const int basic_t2= 7;
+    const int basic_t3= 21;
+    int factor;
+
+    if(s->maxval==0 || reset_all) s->maxval= (1 << s->bpp) - 1;
+
+    if(s->maxval >=128){
+        factor= (FFMIN(s->maxval, 4095) + 128)>>8;
+
+        if(s->T1==0     || reset_all)
+            s->T1= iso_clip(factor*(basic_t1-2) + 2 + 3*s->near, s->near+1, s->maxval);
+        if(s->T2==0     || reset_all)
+            s->T2= iso_clip(factor*(basic_t2-3) + 3 + 5*s->near, s->T1, s->maxval);
+        if(s->T3==0     || reset_all)
+            s->T3= iso_clip(factor*(basic_t3-4) + 4 + 7*s->near, s->T2, s->maxval);
+    }else{
+        factor= 256 / (s->maxval + 1);
+
+        if(s->T1==0     || reset_all)
+            s->T1= iso_clip(FFMAX(2, basic_t1/factor + 3*s->near), s->near+1, s->maxval);
+        if(s->T2==0     || reset_all)
+            s->T2= iso_clip(FFMAX(3, basic_t2/factor + 5*s->near), s->T1, s->maxval);
+        if(s->T3==0     || reset_all)
+            s->T3= iso_clip(FFMAX(4, basic_t3/factor + 6*s->near), s->T2, s->maxval);
+    }
+
+    if(s->reset==0  || reset_all) s->reset= 64;
+//    av_log(NULL, AV_LOG_DEBUG, "[JPEG-LS RESET] T=%i,%i,%i\n", s->T1, s->T2, s->T3);
+}
+
+
+/********** Decoder-specific functions **********/
+
+/**
+ * Decode LSE block with initialization parameters
+ */
+static int decode_lse(MJpegDecodeContext *s)
+{
+    int len, id;
+
+    /* XXX: verify len field validity */
+    len = get_bits(&s->gb, 16);
+    id = get_bits(&s->gb, 8);
+
+    switch(id){
+    case 1:
+        s->maxval= get_bits(&s->gb, 16);
+        s->t1= get_bits(&s->gb, 16);
+        s->t2= get_bits(&s->gb, 16);
+        s->t3= get_bits(&s->gb, 16);
+        s->reset= get_bits(&s->gb, 16);
+
+//        reset_ls_coding_parameters(s, 0);
+        //FIXME quant table?
+        break;
+    case 2:
+    case 3:
+        av_log(s->avctx, AV_LOG_ERROR, "palette not supported\n");
+        return -1;
+    case 4:
+        av_log(s->avctx, AV_LOG_ERROR, "oversize image not supported\n");
+        return -1;
+    default:
+        av_log(s->avctx, AV_LOG_ERROR, "invalid id %d\n", id);
+        return -1;
+    }
+//    av_log(s->avctx, AV_LOG_DEBUG, "ID=%i, T=%i,%i,%i\n", id, s->t1, s->t2, s->t3);
+
+    return 0;
+}
+
+
+/**
+ * Get context-dependent Golomb code, decode it and update context
+ */
+static inline int ls_get_code_regular(GetBitContext *gb, JLSState *state, int Q){
+    int k, ret;
+
+    for(k = 0; (state->N[Q] << k) < state->A[Q]; k++);
+
+#ifdef JLS_BROKEN
+    if(!show_bits_long(gb, 32))return -1;
+#endif
+    ret = get_ur_golomb_jpegls(gb, k, state->limit, state->qbpp);
+
+    /* decode mapped error */
+    if(ret & 1)
+        ret = -((ret + 1) >> 1);
+    else
+        ret >>= 1;
+
+    /* for NEAR=0, k=0 and 2*B[Q] <= - N[Q] mapping is reversed */
+    if(!state->near && !k && (2 * state->B[Q] <= -state->N[Q]))
+        ret = -(ret + 1);
+
+    state->A[Q] += ABS(ret);
+    ret *= state->twonear;
+    state->B[Q] += ret;
+
+    if(state->N[Q] == state->reset) {
+        state->A[Q] >>= 1;
+        state->B[Q] >>= 1;
+        state->N[Q] >>= 1;
+    }
+    state->N[Q]++;
+
+    if(state->B[Q] <= -state->N[Q]) {
+        state->B[Q] += state->N[Q];
+        if(state->C[Q] > -128)
+            state->C[Q]--;
+        if(state->B[Q] <= -state->N[Q])
+            state->B[Q] = -state->N[Q] + 1;
+    }else if(state->B[Q] > 0){
+        state->B[Q] -= state->N[Q];
+        if(state->C[Q] < 127)
+            state->C[Q]++;
+        if(state->B[Q] > 0)
+            state->B[Q] = 0;
+    }
+
+    return ret;
+}
+
+/**
+ * Get Golomb code, decode it and update state for run termination
+ */
+static inline int ls_get_code_runterm(GetBitContext *gb, JLSState *state, int RItype, int limit_add){
+    int k, ret, temp, map;
+    int Q = 365 + RItype;
+
+    if(!RItype)
+        temp = state->A[Q];
+    else
+        temp = state->A[Q] + (state->N[Q] >> 1);
+
+    for(k = 0; (state->N[Q] << k) < temp; k++);
+
+#ifdef JLS_BROKEN
+    if(!show_bits_long(gb, 32))return -1;
+#endif
+    ret = get_ur_golomb_jpegls(gb, k, state->limit - limit_add - 1, state->qbpp);
+
+    /* decode mapped error */
+    map = 0;
+    if(!k && (RItype || ret) && (2 * state->B[Q] < state->N[Q]))
+        map = 1;
+    ret += RItype + map;
+
+    if(ret & 1){
+        ret = map - ((ret + 1) >> 1);
+        state->B[Q]++;
+    } else {
+        ret = ret >> 1;
+    }
+
+    /* update state */
+    state->A[Q] += ABS(ret) - RItype;
+    ret *= state->twonear;
+    if(state->N[Q] == state->reset){
+        state->A[Q] >>=1;
+        state->B[Q] >>=1;
+        state->N[Q] >>=1;
+    }
+    state->N[Q]++;
+
+    return ret;
+}
+
+/**
+ * Decode one line of image
+ */
+static inline void ls_decode_line(JLSState *state, MJpegDecodeContext *s, uint8_t *last, uint8_t *dst, int last2, int w, int stride, int comp){
+    int i, x = 0;
+    int Ra, Rb, Rc, Rd;
+    int D0, D1, D2;
+
+    while(x < w) {
+        int err, pred;
+
+        /* compute gradients */
+        Ra = x ? dst[x - stride] : last[x];
+        Rb = last[x];
+        Rc = x ? last[x - stride] : last2;
+        Rd = (x >= w - stride) ? last[x] : last[x + stride];
+        D0 = Rd - Rb;
+        D1 = Rb - Rc;
+        D2 = Rc - Ra;
+        /* run mode */
+        if((ABS(D0) <= state->near) && (ABS(D1) <= state->near) && (ABS(D2) <= state->near)) {
+            int r;
+            int RItype;
+
+            /* decode full runs while available */
+            while(get_bits1(&s->gb)) {
+                int r;
+                r = 1 << log2_run[state->run_index[comp]];
+                if(x + r * stride > w) {
+                    r = (w - x) / stride;
+                }
+                for(i = 0; i < r; i++) {
+                    dst[x] = Ra;
+                    x += stride;
+                }
+                /* if EOL reached, we stop decoding */
+                if(r != (1 << log2_run[state->run_index[comp]]))
+                    return;
+                if(state->run_index[comp] < 31)
+                    state->run_index[comp]++;
+                if(x + stride > w)
+                    return;
+            }
+            /* decode aborted run */
+            r = log2_run[state->run_index[comp]];
+            if(r)
+                r = get_bits_long(&s->gb, r);
+            for(i = 0; i < r; i++) {
+                dst[x] = Ra;
+                x += stride;
+            }
+
+            /* decode run termination value */
+            Rb = last[x];
+            RItype = (ABS(Ra - Rb) <= state->near) ? 1 : 0;
+            err = ls_get_code_runterm(&s->gb, state, RItype, log2_run[state->run_index[comp]]);
+            if(state->run_index[comp])
+                state->run_index[comp]--;
+
+            if(state->near && RItype){
+                pred = Ra + err;
+            } else {
+                if(Rb < Ra)
+                    pred = Rb - err;
+                else
+                    pred = Rb + err;
+            }
+
+            if(state->near){
+                if(pred < -state->near)
+                    pred += state->range * state->twonear;
+                else if(pred > state->maxval + state->near)
+                    pred -= state->range * state->twonear;
+                pred = clip(pred, 0, state->maxval);
+            }
+
+            dst[x] = pred;
+            x += stride;
+        } else { /* regular mode */
+            int context, sign;
+
+            context = quantize(state, D0) * 81 + quantize(state, D1) * 9 + quantize(state, D2);
+            pred = mid_pred(Ra, Ra + Rb - Rc, Rb);
+
+            if(context < 0){
+                context = -context;
+                sign = 1;
+            }else{
+                sign = 0;
+            }
+
+            if(sign){
+                pred = clip(pred - state->C[context], 0, state->maxval);
+                err = -ls_get_code_regular(&s->gb, state, context);
+            } else {
+                pred = clip(pred + state->C[context], 0, state->maxval);
+                err = ls_get_code_regular(&s->gb, state, context);
+            }
+
+            /* we have to do something more for near-lossless coding */
+            pred += err;
+            if(state->near) {
+                if(pred < -state->near)
+                    pred += state->range * state->twonear;
+                else if(pred > state->maxval + state->near)
+                    pred -= state->range * state->twonear;
+                pred = clip(pred, 0, state->maxval);
+            }
+
+            dst[x] = pred;
+            x += stride;
+        }
+    }
+}
+
+static int ls_decode_picture(MJpegDecodeContext *s, int near, int point_transform, int ilv){
+    int i, t = 0;
+    uint8_t *zero, *last, *cur;
+    JLSState *state;
+    int off, stride, width;
+
+    zero = av_mallocz(s->picture.linesize[0]);
+    last = zero;
+    cur = s->picture.data[0];
+
+    state = av_mallocz(sizeof(JLSState));
+    /* initialize JPEG-LS state from JPEG parameters */
+    state->near = near;
+    state->bpp = (s->bits < 2) ? 2 : s->bits;
+    state->maxval = s->maxval;
+    state->T1 = s->t1;
+    state->T2 = s->t2;
+    state->T3 = s->t3;
+    state->reset = s->reset;
+    reset_ls_coding_parameters(state, 0);
+    ls_init_state(state);
+
+//    av_log(s->avctx, AV_LOG_DEBUG, "JPEG-LS params: %ix%i NEAR=%i MV=%i T(%i,%i,%i) RESET=%i, LIMIT=%i, qbpp=%i, RANGE=%i\n",s->width,s->height,state->near,state->maxval,state->T1,state->T2,state->T3,state->reset,state->limit,state->qbpp, state->range);
+//    av_log(s->avctx, AV_LOG_DEBUG, "JPEG params: ILV=%i Pt=%i BPP=%i, scan = %i\n", ilv, point_transform, s->bits, s->cur_scan);
+    if(ilv == 0) { /* separate planes */
+        off = s->cur_scan - 1;
+        stride = (s->nb_components > 1) ? 3 : 1;
+        width = s->width * stride;
+        cur += off;
+        for(i = 0; i < s->height; i++) {
+            ls_decode_line(state, s, last, cur, t, width, stride, off);
+            t = last[0];
+            last = cur;
+            cur += s->picture.linesize[0];
+
+            if (s->restart_interval && !--s->restart_count) {
+                align_get_bits(&s->gb);
+                skip_bits(&s->gb, 16); /* skip RSTn */
+            }
+        }
+    } else if(ilv == 1) { /* line interleaving */
+        int j;
+        int Rc[3] = {0, 0, 0};
+        memset(cur, 0, s->picture.linesize[0]);
+        width = s->width * 3;
+        for(i = 0; i < s->height; i++) {
+            for(j = 0; j < 3; j++) {
+                ls_decode_line(state, s, last + j, cur + j, Rc[j], width, 3, j);
+                Rc[j] = last[j];
+
+                if (s->restart_interval && !--s->restart_count) {
+                    align_get_bits(&s->gb);
+                    skip_bits(&s->gb, 16); /* skip RSTn */
+                }
+            }
+            last = cur;
+            cur += s->picture.linesize[0];
+        }
+    } else if(ilv == 2) { /* sample interleaving */
+        av_log(s->avctx, AV_LOG_ERROR, "Sample interleaved images are not supported.\n");
+        return -1;
+    }
+
+    av_free(state);
+    av_free(zero);
+
+    return 0;
+}
+
+#if defined(CONFIG_ENCODERS) && defined(CONFIG_JPEGLS_ENCODER)
+/********** Encoder-specific functions **********/
+
+/**
+ * Encode error from regular symbol
+ */
+static inline void ls_encode_regular(JLSState *state, PutBitContext *pb, int Q, int err){
+    int k;
+    int val;
+    int map;
+
+    for(k = 0; (state->N[Q] << k) < state->A[Q]; k++);
+
+    map = !state->near && !k && (2 * state->B[Q] <= -state->N[Q]);
+
+    if(err < 0)
+        err += state->range;
+    if(err >= ((state->range + 1) >> 1)) {
+        err -= state->range;
+        val = 2 * ABS(err) - 1 - map;
+    } else
+        val = 2 * err + map;
+
+    set_ur_golomb_jpegls(pb, val, k, state->limit, state->qbpp);
+
+    state->A[Q] += ABS(err);
+    state->B[Q] += err * state->twonear;
+
+    if(state->N[Q] == state->reset) {
+        state->A[Q] >>= 1;
+        state->B[Q] >>= 1;
+        state->N[Q] >>= 1;
+    }
+    state->N[Q]++;
+
+    if(state->B[Q] <= -state->N[Q]) {
+        state->B[Q] += state->N[Q];
+        if(state->C[Q] > -128)
+            state->C[Q]--;
+        if(state->B[Q] <= -state->N[Q])
+            state->B[Q] = -state->N[Q] + 1;
+    }else if(state->B[Q] > 0){
+        state->B[Q] -= state->N[Q];
+        if(state->C[Q] < 127)
+            state->C[Q]++;
+        if(state->B[Q] > 0)
+            state->B[Q] = 0;
+    }
+}
+
+/**
+ * Encode error from run termination
+ */
+static inline void ls_encode_runterm(JLSState *state, PutBitContext *pb, int RItype, int err, int limit_add){
+    int k;
+    int val, map;
+    int Q = 365 + RItype;
+    int temp;
+
+    temp = state->A[Q];
+    if(RItype)
+        temp += state->N[Q] >> 1;
+    for(k = 0; (state->N[Q] << k) < temp; k++);
+    map = 0;
+    if(!k && err && (2 * state->B[Q] < state->N[Q]))
+        map = 1;
+
+    if(err < 0)
+        val = - (2 * err) - 1 - RItype + map;
+    else
+        val = 2 * err - RItype - map;
+    set_ur_golomb_jpegls(pb, val, k, state->limit - limit_add - 1, state->qbpp);
+
+    if(err < 0)
+        state->B[Q]++;
+    state->A[Q] += (val + 1 - RItype) >> 1;
+
+    if(state->N[Q] == state->reset) {
+        state->A[Q] >>= 1;
+        state->B[Q] >>= 1;
+        state->N[Q] >>= 1;
+    }
+    state->N[Q]++;
+}
+
+/**
+ * Encode run value as specified by JPEG-LS standard
+ */
+static inline void ls_encode_run(JLSState *state, PutBitContext *pb, int run, int comp, int trail){
+    while(run >= (1 << log2_run[state->run_index[comp]])){
+        put_bits(pb, 1, 1);
+        run -= 1 << log2_run[state->run_index[comp]];
+        if(state->run_index[comp] < 31)
+            state->run_index[comp]++;
+    }
+    /* if hit EOL, encode another full run, else encode aborted run */
+    if(!trail && run) {
+        put_bits(pb, 1, 1);
+    }else if(trail){
+        put_bits(pb, 1, 0);
+        if(log2_run[state->run_index[comp]])
+            put_bits(pb, log2_run[state->run_index[comp]], run);
+    }
+}
+
+/**
+ * Encode one line of image
+ */
+static inline void ls_encode_line(JLSState *state, PutBitContext *pb, uint8_t *last, uint8_t *cur, int last2, int w, int stride, int comp){
+    int x = 0;
+    int Ra, Rb, Rc, Rd;
+    int D0, D1, D2;
+
+    while(x < w) {
+        int err, pred, sign;
+
+        /* compute gradients */
+        Ra = x ? cur[x - stride] : last[x];
+        Rb = last[x];
+        Rc = x ? last[x - stride] : last2;
+        Rd = (x >= w - stride) ? last[x] : last[x + stride];
+        D0 = Rd - Rb;
+        D1 = Rb - Rc;
+        D2 = Rc - Ra;
+
+        /* run mode */
+        if((ABS(D0) <= state->near) && (ABS(D1) <= state->near) && (ABS(D2) <= state->near)) {
+            int RUNval, RItype, run;
+
+            run = 0;
+            RUNval = Ra;
+            while(x < w && (ABS(cur[x] - RUNval) <= state->near)){
+                run++;
+                cur[x] = Ra;
+                x += stride;
+            }
+            ls_encode_run(state, pb, run, comp, x < w);
+            if(x >= w)
+                return;
+            Rb = last[x];
+            RItype = (ABS(Ra - Rb) <= state->near);
+            pred = RItype ? Ra : Rb;
+            err = cur[x] - pred;
+
+            if(!RItype && Ra > Rb)
+                err = -err;
+
+            if(state->near){
+                if(err > 0)
+                    err = (state->near + err) / state->twonear;
+                else
+                    err = -(state->near - err) / state->twonear;
+
+                if(RItype || (Rb >= Ra))
+                    Ra = clip(pred + err * state->twonear, 0, state->maxval);
+                else
+                    Ra = clip(pred - err * state->twonear, 0, state->maxval);
+                cur[x] = Ra;
+            }
+            if(err < 0)
+                err += state->range;
+            if(err >= ((state->range + 1) >> 1))
+                err -= state->range;
+
+            ls_encode_runterm(state, pb, RItype, err, log2_run[state->run_index[comp]]);
+
+            if(state->run_index[comp] > 0)
+                state->run_index[comp]--;
+            x += stride;
+        } else { /* regular mode */
+            int context;
+
+            context = quantize(state, D0) * 81 + quantize(state, D1) * 9 + quantize(state, D2);
+            pred = mid_pred(Ra, Ra + Rb - Rc, Rb);
+
+            if(context < 0){
+                context = -context;
+                sign = 1;
+                pred = clip(pred - state->C[context], 0, state->maxval);
+                err = pred - cur[x];
+            }else{
+                sign = 0;
+                pred = clip(pred + state->C[context], 0, state->maxval);
+                err = cur[x] - pred;
+            }
+
+            if(state->near){
+                if(err > 0)
+                    err = (state->near + err) / state->twonear;
+                else
+                    err = -(state->near - err) / state->twonear;
+                if(!sign)
+                    Ra = clip(pred + err * state->twonear, 0, state->maxval);
+                else
+                    Ra = clip(pred - err * state->twonear, 0, state->maxval);
+                cur[x] = Ra;
+            }
+
+            ls_encode_regular(state, pb, context, err);
+            x += stride;
+        }
+    }
+}
+
+static void ls_store_lse(JLSState *state, PutBitContext *pb){
+    /* Test if we have default params and don't need to store LSE */
+    JLSState state2;
+    memset(&state2, 0, sizeof(JLSState));
+    state2.bpp = 8;
+    state2.near = state->near;
+    reset_ls_coding_parameters(&state2, 1);
+    if(state->T1 == state2.T1 && state->T2 == state2.T2 && state->T3 == state2.T3 && state->reset == state2.reset)
+        return;
+    /* store LSE type 1 */
+    put_marker(pb, LSE);
+    put_bits(pb, 16, 13);
+    put_bits(pb, 8,   1);
+    put_bits(pb, 16, state->maxval);
+    put_bits(pb, 16, state->T1);
+    put_bits(pb, 16, state->T2);
+    put_bits(pb, 16, state->T3);
+    put_bits(pb, 16, state->reset);
+}
+
+static int encode_picture_ls(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
+    JpeglsContext * const s = avctx->priv_data;
+    AVFrame *pict = data;
+    AVFrame * const p= (AVFrame*)&s->picture;
+    const int near = avctx->prediction_method;
+    PutBitContext pb, pb2;
+    GetBitContext gb;
+    uint8_t *buf2, *zero, *cur, *last;
+    JLSState *state;
+    int i, size;
+    int comps;
+
+    buf2 = av_malloc(buf_size);
+
+    init_put_bits(&pb, buf, buf_size);
+    init_put_bits(&pb2, buf2, buf_size);
+
+    *p = *pict;
+    p->pict_type= FF_I_TYPE;
+    p->key_frame= 1;
+
+    comps = (avctx->pix_fmt == PIX_FMT_GRAY8) ? 1 : 3;
+
+    /* write our own JPEG header, can't use mjpeg_picture_header */
+    put_marker(&pb, SOI);
+    put_marker(&pb, SOF48);
+    put_bits(&pb, 16, 8 + comps * 3); // header size depends on components
+    put_bits(&pb,  8, 8);             // bpp
+    put_bits(&pb, 16, avctx->height);
+    put_bits(&pb, 16, avctx->width);
+    put_bits(&pb,  8, comps);         // components
+    for(i = 1; i <= comps; i++) {
+        put_bits(&pb,  8, i);    // component ID
+        put_bits(&pb,  8, 0x11); // subsampling: none
+        put_bits(&pb,  8, 0);    // Tiq, used by JPEG-LS ext
+    }
+
+    put_marker(&pb, SOS);
+    put_bits(&pb, 16, 6 + comps * 2);
+    put_bits(&pb,  8, comps);
+    for(i = 1; i <= comps; i++) {
+        put_bits(&pb,  8, i);  // component ID
+        put_bits(&pb,  8, 0);  // mapping index: none
+    }
+    put_bits(&pb,  8, near);
+    put_bits(&pb,  8, (comps > 1) ? 1 : 0); // interleaving: 0 - plane, 1 - line
+    put_bits(&pb,  8, 0); // point transform: none
+
+    state = av_mallocz(sizeof(JLSState));
+    /* initialize JPEG-LS state from JPEG parameters */
+    state->near = near;
+    state->bpp = 8;
+    reset_ls_coding_parameters(state, 0);
+    ls_init_state(state);
+
+    ls_store_lse(state, &pb);
+
+    zero = av_mallocz(p->linesize[0]);
+    last = zero;
+    cur = p->data[0];
+    if(avctx->pix_fmt == PIX_FMT_GRAY8){
+        int t = 0;
+
+        for(i = 0; i < avctx->height; i++) {
+            ls_encode_line(state, &pb2, last, cur, t, avctx->width, 1, 0);
+            t = last[0];
+            last = cur;
+            cur += p->linesize[0];
+        }
+    }else if(avctx->pix_fmt == PIX_FMT_RGB24){
+        int j, width;
+        int Rc[3] = {0, 0, 0};
+
+        width = avctx->width * 3;
+        for(i = 0; i < avctx->height; i++) {
+            for(j = 0; j < 3; j++) {
+                ls_encode_line(state, &pb2, last + j, cur + j, Rc[j], width, 3, j);
+                Rc[j] = last[j];
+            }
+            last = cur;
+            cur += s->picture.linesize[0];
+        }
+    }else if(avctx->pix_fmt == PIX_FMT_BGR24){
+        int j, width;
+        int Rc[3] = {0, 0, 0};
+
+        width = avctx->width * 3;
+        for(i = 0; i < avctx->height; i++) {
+            for(j = 2; j >= 0; j--) {
+                ls_encode_line(state, &pb2, last + j, cur + j, Rc[j], width, 3, j);
+                Rc[j] = last[j];
+            }
+            last = cur;
+            cur += s->picture.linesize[0];
+        }
+    }
+
+    av_free(zero);
+    av_free(state);
+
+    flush_put_bits(&pb2);
+    /* do escape coding */
+    size = put_bits_count(&pb2) >> 3;
+    init_get_bits(&gb, buf2, size);
+    while(get_bits_count(&gb) < size * 8){
+        int v;
+        v = get_bits(&gb, 8);
+        put_bits(&pb, 8, v);
+        if(v == 0xFF){
+            v = get_bits(&gb, 7);
+            put_bits(&pb, 8, v);
+        }
+    }
+    align_put_bits(&pb);
+    av_free(buf2);
+
+    /* End of image */
+    put_marker(&pb, EOI);
+    flush_put_bits(&pb);
+
+    emms_c();
+
+    return put_bits_count(&pb) >> 3;
+}
+
+static int encode_init_ls(AVCodecContext *ctx) {
+    JpeglsContext *c = (JpeglsContext*)ctx->priv_data;
+
+    c->avctx = ctx;
+    ctx->coded_frame = &c->picture;
+
+    if(ctx->pix_fmt != PIX_FMT_GRAY8 && ctx->pix_fmt != PIX_FMT_RGB24 && ctx->pix_fmt != PIX_FMT_BGR24){
+        av_log(ctx, AV_LOG_ERROR, "Only grayscale and RGB24/BGR24 images are supported\n");
+        return -1;
+    }
+    return 0;
+}
+
+AVCodec jpegls_encoder = { //FIXME avoid MPV_* lossless jpeg shouldnt need them
+    "jpegls",
+    CODEC_TYPE_VIDEO,
+    CODEC_ID_JPEGLS,
+    sizeof(JpeglsContext),
+    encode_init_ls,
+    encode_picture_ls,
+    NULL,
+    .pix_fmts= (enum PixelFormat[]){PIX_FMT_BGR24, PIX_FMT_RGB24, PIX_FMT_GRAY8, -1},
+};
+#endif
diff --git a/src/libffmpeg/libavcodec/jrevdct.c b/src/libffmpeg/libavcodec/jrevdct.c
index c08d1241f..dc2ffaff7 100644
--- a/src/libffmpeg/libavcodec/jrevdct.c
+++ b/src/libffmpeg/libavcodec/jrevdct.c
@@ -16,7 +16,7 @@
  * The advantage of this method is that no data path contains more than one
  * multiplication; this allows a very simple and accurate implementation in
  * scaled fixed-point arithmetic, with a minimal number of shifts.
- * 
+ *
  * I've made lots of modifications to attempt to take advantage of the
  * sparse nature of the DCT matrices we're getting.  Although the logic
  * is cumbersome, it's straightforward and the resulting code is much
@@ -25,12 +25,12 @@
  * A better way to do this would be to pass in the DCT block as a sparse
  * matrix, perhaps with the difference cases encoded.
  */
- 
+
 /**
  * @file jrevdct.c
  * Independent JPEG Group's LLM idct.
  */
- 
+
 #include "common.h"
 #include "dsputil.h"
 
@@ -95,10 +95,10 @@ typedef DCTELEM DCTBLOCK[DCTSIZE2];
 #ifdef EIGHT_BIT_SAMPLES
 #define PASS1_BITS  2
 #else
-#define PASS1_BITS  1		/* lose a little precision to avoid overflow */
+#define PASS1_BITS  1   /* lose a little precision to avoid overflow */
 #endif
 
-#define ONE	((int32_t) 1)
+#define ONE         ((int32_t) 1)
 
 #define CONST_SCALE (ONE << CONST_BITS)
 
@@ -109,7 +109,7 @@ typedef DCTELEM DCTBLOCK[DCTSIZE2];
  */
 
 /* Actually FIX is no longer used, we precomputed them all */
-#define FIX(x)	((int32_t) ((x) * CONST_SCALE + 0.5)) 
+#define FIX(x)  ((int32_t) ((x) * CONST_SCALE + 0.5))
 
 /* Descale and correctly round an int32_t value that's scaled by N bits.
  * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
@@ -130,22 +130,22 @@ typedef DCTELEM DCTBLOCK[DCTSIZE2];
  */
 
 #ifdef EIGHT_BIT_SAMPLES
-#ifdef SHORTxSHORT_32		/* may work if 'int' is 32 bits */
+#ifdef SHORTxSHORT_32           /* may work if 'int' is 32 bits */
 #define MULTIPLY(var,const)  (((int16_t) (var)) * ((int16_t) (const)))
 #endif
-#ifdef SHORTxLCONST_32		/* known to work with Microsoft C 6.0 */
+#ifdef SHORTxLCONST_32          /* known to work with Microsoft C 6.0 */
 #define MULTIPLY(var,const)  (((int16_t) (var)) * ((int32_t) (const)))
 #endif
 #endif
 
-#ifndef MULTIPLY		/* default definition */
+#ifndef MULTIPLY                /* default definition */
 #define MULTIPLY(var,const)  ((var) * (const))
 #endif
 
 
-/* 
+/*
   Unlike our decoder where we approximate the FIXes, we need to use exact
-ones here or successive P-frames will drift too much with Reference frame coding 
+ones here or successive P-frames will drift too much with Reference frame coding
 */
 #define FIX_0_211164243 1730
 #define FIX_0_275899380 2260
@@ -184,7 +184,7 @@ void j_rev_dct(DCTBLOCK data)
   int32_t d0, d1, d2, d3, d4, d5, d6, d7;
   register DCTELEM *dataptr;
   int rowctr;
-   
+
   /* Pass 1: process rows. */
   /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
   /* furthermore, we scale the results by 2**PASS1_BITS. */
@@ -217,17 +217,17 @@ void j_rev_dct(DCTBLOCK data)
     if ((d1 | d2 | d3 | d4 | d5 | d6 | d7) == 0) {
       /* AC terms all zero */
       if (d0) {
-	  /* Compute a 32 bit value to assign. */
-	  DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
-	  register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
-	  
-	  idataptr[0] = v;
-	  idataptr[1] = v;
-	  idataptr[2] = v;
-	  idataptr[3] = v;
+          /* Compute a 32 bit value to assign. */
+          DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
+          register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
+
+          idataptr[0] = v;
+          idataptr[1] = v;
+          idataptr[2] = v;
+          idataptr[3] = v;
       }
-      
-      dataptr += DCTSIZE;	/* advance pointer to next row */
+
+      dataptr += DCTSIZE;       /* advance pointer to next row */
       continue;
     }
 
@@ -235,50 +235,50 @@ void j_rev_dct(DCTBLOCK data)
     /* The rotator is sqrt(2)*c(-6). */
 {
     if (d6) {
-	    if (d2) {
-		    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
-		    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
-		    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
-		    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
-
-		    tmp0 = (d0 + d4) << CONST_BITS;
-		    tmp1 = (d0 - d4) << CONST_BITS;
-
-		    tmp10 = tmp0 + tmp3;
-		    tmp13 = tmp0 - tmp3;
-		    tmp11 = tmp1 + tmp2;
-		    tmp12 = tmp1 - tmp2;
-	    } else {
-		    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
-		    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
-		    tmp3 = MULTIPLY(d6, FIX_0_541196100);
-
-		    tmp0 = (d0 + d4) << CONST_BITS;
-		    tmp1 = (d0 - d4) << CONST_BITS;
-
-		    tmp10 = tmp0 + tmp3;
-		    tmp13 = tmp0 - tmp3;
-		    tmp11 = tmp1 + tmp2;
-		    tmp12 = tmp1 - tmp2;
-	    }
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            }
     } else {
-	    if (d2) {
-		    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
-		    tmp2 = MULTIPLY(d2, FIX_0_541196100);
-		    tmp3 = MULTIPLY(d2, FIX_1_306562965);
-
-		    tmp0 = (d0 + d4) << CONST_BITS;
-		    tmp1 = (d0 - d4) << CONST_BITS;
-
-		    tmp10 = tmp0 + tmp3;
-		    tmp13 = tmp0 - tmp3;
-		    tmp11 = tmp1 + tmp2;
-		    tmp12 = tmp1 - tmp2;
-	    } else {
-		    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
-		    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
-		    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
-	    }
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+                    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
+                    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
+            }
       }
 
     /* Odd part per figure 8; the matrix is unitary and hence its
@@ -286,259 +286,259 @@ void j_rev_dct(DCTBLOCK data)
      */
 
     if (d7) {
-	if (d5) {
-	    if (d3) {
-		if (d1) {
-		    /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
-		    z1 = d7 + d1;
-		    z2 = d5 + d3;
-		    z3 = d7 + d3;
-		    z4 = d5 + d1;
-		    z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
-		    
-		    tmp0 = MULTIPLY(d7, FIX_0_298631336); 
-		    tmp1 = MULTIPLY(d5, FIX_2_053119869);
-		    tmp2 = MULTIPLY(d3, FIX_3_072711026);
-		    tmp3 = MULTIPLY(d1, FIX_1_501321110);
-		    z1 = MULTIPLY(-z1, FIX_0_899976223);
-		    z2 = MULTIPLY(-z2, FIX_2_562915447);
-		    z3 = MULTIPLY(-z3, FIX_1_961570560);
-		    z4 = MULTIPLY(-z4, FIX_0_390180644);
-		    
-		    z3 += z5;
-		    z4 += z5;
-		    
-		    tmp0 += z1 + z3;
-		    tmp1 += z2 + z4;
-		    tmp2 += z2 + z3;
-		    tmp3 += z1 + z4;
-		} else {
-		    /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
-		    z2 = d5 + d3;
-		    z3 = d7 + d3;
-		    z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
-		    
-		    tmp0 = MULTIPLY(d7, FIX_0_298631336); 
-		    tmp1 = MULTIPLY(d5, FIX_2_053119869);
-		    tmp2 = MULTIPLY(d3, FIX_3_072711026);
-		    z1 = MULTIPLY(-d7, FIX_0_899976223);
-		    z2 = MULTIPLY(-z2, FIX_2_562915447);
-		    z3 = MULTIPLY(-z3, FIX_1_961570560);
-		    z4 = MULTIPLY(-d5, FIX_0_390180644);
-		    
-		    z3 += z5;
-		    z4 += z5;
-		    
-		    tmp0 += z1 + z3;
-		    tmp1 += z2 + z4;
-		    tmp2 += z2 + z3;
-		    tmp3 = z1 + z4;
-		}
-	    } else {
-		if (d1) {
-		    /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
-		    z1 = d7 + d1;
-		    z4 = d5 + d1;
-		    z5 = MULTIPLY(d7 + z4, FIX_1_175875602);
-		    
-		    tmp0 = MULTIPLY(d7, FIX_0_298631336); 
-		    tmp1 = MULTIPLY(d5, FIX_2_053119869);
-		    tmp3 = MULTIPLY(d1, FIX_1_501321110);
-		    z1 = MULTIPLY(-z1, FIX_0_899976223);
-		    z2 = MULTIPLY(-d5, FIX_2_562915447);
-		    z3 = MULTIPLY(-d7, FIX_1_961570560);
-		    z4 = MULTIPLY(-z4, FIX_0_390180644);
-		    
-		    z3 += z5;
-		    z4 += z5;
-		    
-		    tmp0 += z1 + z3;
-		    tmp1 += z2 + z4;
-		    tmp2 = z2 + z3;
-		    tmp3 += z1 + z4;
-		} else {
-		    /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
-		    tmp0 = MULTIPLY(-d7, FIX_0_601344887); 
-		    z1 = MULTIPLY(-d7, FIX_0_899976223);
-		    z3 = MULTIPLY(-d7, FIX_1_961570560);
-		    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
-		    z2 = MULTIPLY(-d5, FIX_2_562915447);
-		    z4 = MULTIPLY(-d5, FIX_0_390180644);
-		    z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
-		    
-		    z3 += z5;
-		    z4 += z5;
-		    
-		    tmp0 += z3;
-		    tmp1 += z4;
-		    tmp2 = z2 + z3;
-		    tmp3 = z1 + z4;
-		}
-	    }
-	} else {
-	    if (d3) {
-		if (d1) {
-		    /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
-		    z1 = d7 + d1;
-		    z3 = d7 + d3;
-		    z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
-		    
-		    tmp0 = MULTIPLY(d7, FIX_0_298631336); 
-		    tmp2 = MULTIPLY(d3, FIX_3_072711026);
-		    tmp3 = MULTIPLY(d1, FIX_1_501321110);
-		    z1 = MULTIPLY(-z1, FIX_0_899976223);
-		    z2 = MULTIPLY(-d3, FIX_2_562915447);
-		    z3 = MULTIPLY(-z3, FIX_1_961570560);
-		    z4 = MULTIPLY(-d1, FIX_0_390180644);
-		    
-		    z3 += z5;
-		    z4 += z5;
-		    
-		    tmp0 += z1 + z3;
-		    tmp1 = z2 + z4;
-		    tmp2 += z2 + z3;
-		    tmp3 += z1 + z4;
-		} else {
-		    /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
-		    z3 = d7 + d3;
-		    
-		    tmp0 = MULTIPLY(-d7, FIX_0_601344887); 
-		    z1 = MULTIPLY(-d7, FIX_0_899976223);
-		    tmp2 = MULTIPLY(d3, FIX_0_509795579);
-		    z2 = MULTIPLY(-d3, FIX_2_562915447);
-		    z5 = MULTIPLY(z3, FIX_1_175875602);
-		    z3 = MULTIPLY(-z3, FIX_0_785694958);
-		    
-		    tmp0 += z3;
-		    tmp1 = z2 + z5;
-		    tmp2 += z3;
-		    tmp3 = z1 + z5;
-		}
-	    } else {
-		if (d1) {
-		    /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
-		    z1 = d7 + d1;
-		    z5 = MULTIPLY(z1, FIX_1_175875602);
-
-		    z1 = MULTIPLY(z1, FIX_0_275899380);
-		    z3 = MULTIPLY(-d7, FIX_1_961570560);
-		    tmp0 = MULTIPLY(-d7, FIX_1_662939225); 
-		    z4 = MULTIPLY(-d1, FIX_0_390180644);
-		    tmp3 = MULTIPLY(d1, FIX_1_111140466);
-
-		    tmp0 += z1;
-		    tmp1 = z4 + z5;
-		    tmp2 = z3 + z5;
-		    tmp3 += z1;
-		} else {
-		    /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
-		    tmp0 = MULTIPLY(-d7, FIX_1_387039845);
-		    tmp1 = MULTIPLY(d7, FIX_1_175875602);
-		    tmp2 = MULTIPLY(-d7, FIX_0_785694958);
-		    tmp3 = MULTIPLY(d7, FIX_0_275899380);
-		}
-	    }
-	}
+        if (d5) {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z2 = d5 + d3;
+                    z3 = d7 + d3;
+                    z4 = d5 + d1;
+                    z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-z1, FIX_0_899976223);
+                    z2 = MULTIPLY(-z2, FIX_2_562915447);
+                    z3 = MULTIPLY(-z3, FIX_1_961570560);
+                    z4 = MULTIPLY(-z4, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
+                    z2 = d5 + d3;
+                    z3 = d7 + d3;
+                    z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    z1 = MULTIPLY(-d7, FIX_0_899976223);
+                    z2 = MULTIPLY(-z2, FIX_2_562915447);
+                    z3 = MULTIPLY(-z3, FIX_1_961570560);
+                    z4 = MULTIPLY(-d5, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 = z1 + z4;
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z4 = d5 + d1;
+                    z5 = MULTIPLY(d7 + z4, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-z1, FIX_0_899976223);
+                    z2 = MULTIPLY(-d5, FIX_2_562915447);
+                    z3 = MULTIPLY(-d7, FIX_1_961570560);
+                    z4 = MULTIPLY(-z4, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 = z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
+                    tmp0 = MULTIPLY(-d7, FIX_0_601344887);
+                    z1 = MULTIPLY(-d7, FIX_0_899976223);
+                    z3 = MULTIPLY(-d7, FIX_1_961570560);
+                    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
+                    z2 = MULTIPLY(-d5, FIX_2_562915447);
+                    z4 = MULTIPLY(-d5, FIX_0_390180644);
+                    z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z3;
+                    tmp1 += z4;
+                    tmp2 = z2 + z3;
+                    tmp3 = z1 + z4;
+                }
+            }
+        } else {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z3 = d7 + d3;
+                    z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-z1, FIX_0_899976223);
+                    z2 = MULTIPLY(-d3, FIX_2_562915447);
+                    z3 = MULTIPLY(-z3, FIX_1_961570560);
+                    z4 = MULTIPLY(-d1, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 = z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
+                    z3 = d7 + d3;
+
+                    tmp0 = MULTIPLY(-d7, FIX_0_601344887);
+                    z1 = MULTIPLY(-d7, FIX_0_899976223);
+                    tmp2 = MULTIPLY(d3, FIX_0_509795579);
+                    z2 = MULTIPLY(-d3, FIX_2_562915447);
+                    z5 = MULTIPLY(z3, FIX_1_175875602);
+                    z3 = MULTIPLY(-z3, FIX_0_785694958);
+
+                    tmp0 += z3;
+                    tmp1 = z2 + z5;
+                    tmp2 += z3;
+                    tmp3 = z1 + z5;
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z5 = MULTIPLY(z1, FIX_1_175875602);
+
+                    z1 = MULTIPLY(z1, FIX_0_275899380);
+                    z3 = MULTIPLY(-d7, FIX_1_961570560);
+                    tmp0 = MULTIPLY(-d7, FIX_1_662939225);
+                    z4 = MULTIPLY(-d1, FIX_0_390180644);
+                    tmp3 = MULTIPLY(d1, FIX_1_111140466);
+
+                    tmp0 += z1;
+                    tmp1 = z4 + z5;
+                    tmp2 = z3 + z5;
+                    tmp3 += z1;
+                } else {
+                    /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
+                    tmp0 = MULTIPLY(-d7, FIX_1_387039845);
+                    tmp1 = MULTIPLY(d7, FIX_1_175875602);
+                    tmp2 = MULTIPLY(-d7, FIX_0_785694958);
+                    tmp3 = MULTIPLY(d7, FIX_0_275899380);
+                }
+            }
+        }
     } else {
-	if (d5) {
-	    if (d3) {
-		if (d1) {
-		    /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
-		    z2 = d5 + d3;
-		    z4 = d5 + d1;
-		    z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
-		    
-		    tmp1 = MULTIPLY(d5, FIX_2_053119869);
-		    tmp2 = MULTIPLY(d3, FIX_3_072711026);
-		    tmp3 = MULTIPLY(d1, FIX_1_501321110);
-		    z1 = MULTIPLY(-d1, FIX_0_899976223);
-		    z2 = MULTIPLY(-z2, FIX_2_562915447);
-		    z3 = MULTIPLY(-d3, FIX_1_961570560);
-		    z4 = MULTIPLY(-z4, FIX_0_390180644);
-		    
-		    z3 += z5;
-		    z4 += z5;
-		    
-		    tmp0 = z1 + z3;
-		    tmp1 += z2 + z4;
-		    tmp2 += z2 + z3;
-		    tmp3 += z1 + z4;
-		} else {
-		    /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
-		    z2 = d5 + d3;
-		    
-		    z5 = MULTIPLY(z2, FIX_1_175875602);
-		    tmp1 = MULTIPLY(d5, FIX_1_662939225);
-		    z4 = MULTIPLY(-d5, FIX_0_390180644);
-		    z2 = MULTIPLY(-z2, FIX_1_387039845);
-		    tmp2 = MULTIPLY(d3, FIX_1_111140466);
-		    z3 = MULTIPLY(-d3, FIX_1_961570560);
-		    
-		    tmp0 = z3 + z5;
-		    tmp1 += z2;
-		    tmp2 += z2;
-		    tmp3 = z4 + z5;
-		}
-	    } else {
-		if (d1) {
-		    /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
-		    z4 = d5 + d1;
-		    
-		    z5 = MULTIPLY(z4, FIX_1_175875602);
-		    z1 = MULTIPLY(-d1, FIX_0_899976223);
-		    tmp3 = MULTIPLY(d1, FIX_0_601344887);
-		    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
-		    z2 = MULTIPLY(-d5, FIX_2_562915447);
-		    z4 = MULTIPLY(z4, FIX_0_785694958);
-		    
-		    tmp0 = z1 + z5;
-		    tmp1 += z4;
-		    tmp2 = z2 + z5;
-		    tmp3 += z4;
-		} else {
-		    /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
-		    tmp0 = MULTIPLY(d5, FIX_1_175875602);
-		    tmp1 = MULTIPLY(d5, FIX_0_275899380);
-		    tmp2 = MULTIPLY(-d5, FIX_1_387039845);
-		    tmp3 = MULTIPLY(d5, FIX_0_785694958);
-		}
-	    }
-	} else {
-	    if (d3) {
-		if (d1) {
-		    /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
-		    z5 = d1 + d3;
-		    tmp3 = MULTIPLY(d1, FIX_0_211164243);
-		    tmp2 = MULTIPLY(-d3, FIX_1_451774981);
-		    z1 = MULTIPLY(d1, FIX_1_061594337);
-		    z2 = MULTIPLY(-d3, FIX_2_172734803);
-		    z4 = MULTIPLY(z5, FIX_0_785694958);
-		    z5 = MULTIPLY(z5, FIX_1_175875602);
-		    
-		    tmp0 = z1 - z4;
-		    tmp1 = z2 + z4;
-		    tmp2 += z5;
-		    tmp3 += z5;
-		} else {
-		    /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
-		    tmp0 = MULTIPLY(-d3, FIX_0_785694958);
-		    tmp1 = MULTIPLY(-d3, FIX_1_387039845);
-		    tmp2 = MULTIPLY(-d3, FIX_0_275899380);
-		    tmp3 = MULTIPLY(d3, FIX_1_175875602);
-		}
-	    } else {
-		if (d1) {
-		    /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
-		    tmp0 = MULTIPLY(d1, FIX_0_275899380);
-		    tmp1 = MULTIPLY(d1, FIX_0_785694958);
-		    tmp2 = MULTIPLY(d1, FIX_1_175875602);
-		    tmp3 = MULTIPLY(d1, FIX_1_387039845);
-		} else {
-		    /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
-		    tmp0 = tmp1 = tmp2 = tmp3 = 0;
-		}
-	    }
-	}
+        if (d5) {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
+                    z2 = d5 + d3;
+                    z4 = d5 + d1;
+                    z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
+
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-d1, FIX_0_899976223);
+                    z2 = MULTIPLY(-z2, FIX_2_562915447);
+                    z3 = MULTIPLY(-d3, FIX_1_961570560);
+                    z4 = MULTIPLY(-z4, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 = z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
+                    z2 = d5 + d3;
+
+                    z5 = MULTIPLY(z2, FIX_1_175875602);
+                    tmp1 = MULTIPLY(d5, FIX_1_662939225);
+                    z4 = MULTIPLY(-d5, FIX_0_390180644);
+                    z2 = MULTIPLY(-z2, FIX_1_387039845);
+                    tmp2 = MULTIPLY(d3, FIX_1_111140466);
+                    z3 = MULTIPLY(-d3, FIX_1_961570560);
+
+                    tmp0 = z3 + z5;
+                    tmp1 += z2;
+                    tmp2 += z2;
+                    tmp3 = z4 + z5;
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
+                    z4 = d5 + d1;
+
+                    z5 = MULTIPLY(z4, FIX_1_175875602);
+                    z1 = MULTIPLY(-d1, FIX_0_899976223);
+                    tmp3 = MULTIPLY(d1, FIX_0_601344887);
+                    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
+                    z2 = MULTIPLY(-d5, FIX_2_562915447);
+                    z4 = MULTIPLY(z4, FIX_0_785694958);
+
+                    tmp0 = z1 + z5;
+                    tmp1 += z4;
+                    tmp2 = z2 + z5;
+                    tmp3 += z4;
+                } else {
+                    /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
+                    tmp0 = MULTIPLY(d5, FIX_1_175875602);
+                    tmp1 = MULTIPLY(d5, FIX_0_275899380);
+                    tmp2 = MULTIPLY(-d5, FIX_1_387039845);
+                    tmp3 = MULTIPLY(d5, FIX_0_785694958);
+                }
+            }
+        } else {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
+                    z5 = d1 + d3;
+                    tmp3 = MULTIPLY(d1, FIX_0_211164243);
+                    tmp2 = MULTIPLY(-d3, FIX_1_451774981);
+                    z1 = MULTIPLY(d1, FIX_1_061594337);
+                    z2 = MULTIPLY(-d3, FIX_2_172734803);
+                    z4 = MULTIPLY(z5, FIX_0_785694958);
+                    z5 = MULTIPLY(z5, FIX_1_175875602);
+
+                    tmp0 = z1 - z4;
+                    tmp1 = z2 + z4;
+                    tmp2 += z5;
+                    tmp3 += z5;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
+                    tmp0 = MULTIPLY(-d3, FIX_0_785694958);
+                    tmp1 = MULTIPLY(-d3, FIX_1_387039845);
+                    tmp2 = MULTIPLY(-d3, FIX_0_275899380);
+                    tmp3 = MULTIPLY(d3, FIX_1_175875602);
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
+                    tmp0 = MULTIPLY(d1, FIX_0_275899380);
+                    tmp1 = MULTIPLY(d1, FIX_0_785694958);
+                    tmp2 = MULTIPLY(d1, FIX_1_175875602);
+                    tmp3 = MULTIPLY(d1, FIX_1_387039845);
+                } else {
+                    /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
+                    tmp0 = tmp1 = tmp2 = tmp3 = 0;
+                }
+            }
+        }
     }
 }
     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
@@ -552,7 +552,7 @@ void j_rev_dct(DCTBLOCK data)
     dataptr[3] = (DCTELEM) DESCALE(tmp13 + tmp0, CONST_BITS-PASS1_BITS);
     dataptr[4] = (DCTELEM) DESCALE(tmp13 - tmp0, CONST_BITS-PASS1_BITS);
 
-    dataptr += DCTSIZE;		/* advance pointer to next row */
+    dataptr += DCTSIZE;         /* advance pointer to next row */
   }
 
   /* Pass 2: process columns. */
@@ -581,334 +581,334 @@ void j_rev_dct(DCTBLOCK data)
     /* Even part: reverse the even part of the forward DCT. */
     /* The rotator is sqrt(2)*c(-6). */
     if (d6) {
-	    if (d2) {
-		    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
-		    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
-		    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
-		    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
-
-		    tmp0 = (d0 + d4) << CONST_BITS;
-		    tmp1 = (d0 - d4) << CONST_BITS;
-
-		    tmp10 = tmp0 + tmp3;
-		    tmp13 = tmp0 - tmp3;
-		    tmp11 = tmp1 + tmp2;
-		    tmp12 = tmp1 - tmp2;
-	    } else {
-		    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
-		    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
-		    tmp3 = MULTIPLY(d6, FIX_0_541196100);
-
-		    tmp0 = (d0 + d4) << CONST_BITS;
-		    tmp1 = (d0 - d4) << CONST_BITS;
-
-		    tmp10 = tmp0 + tmp3;
-		    tmp13 = tmp0 - tmp3;
-		    tmp11 = tmp1 + tmp2;
-		    tmp12 = tmp1 - tmp2;
-	    }
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            }
     } else {
-	    if (d2) {
-		    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
-		    tmp2 = MULTIPLY(d2, FIX_0_541196100);
-		    tmp3 = MULTIPLY(d2, FIX_1_306562965);
-
-		    tmp0 = (d0 + d4) << CONST_BITS;
-		    tmp1 = (d0 - d4) << CONST_BITS;
-
-		    tmp10 = tmp0 + tmp3;
-		    tmp13 = tmp0 - tmp3;
-		    tmp11 = tmp1 + tmp2;
-		    tmp12 = tmp1 - tmp2;
-	    } else {
-		    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
-		    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
-		    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
-	    }
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+                    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
+                    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
+            }
     }
 
     /* Odd part per figure 8; the matrix is unitary and hence its
      * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
      */
     if (d7) {
-	if (d5) {
-	    if (d3) {
-		if (d1) {
-		    /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
-		    z1 = d7 + d1;
-		    z2 = d5 + d3;
-		    z3 = d7 + d3;
-		    z4 = d5 + d1;
-		    z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
-		    
-		    tmp0 = MULTIPLY(d7, FIX_0_298631336); 
-		    tmp1 = MULTIPLY(d5, FIX_2_053119869);
-		    tmp2 = MULTIPLY(d3, FIX_3_072711026);
-		    tmp3 = MULTIPLY(d1, FIX_1_501321110);
-		    z1 = MULTIPLY(-z1, FIX_0_899976223);
-		    z2 = MULTIPLY(-z2, FIX_2_562915447);
-		    z3 = MULTIPLY(-z3, FIX_1_961570560);
-		    z4 = MULTIPLY(-z4, FIX_0_390180644);
-		    
-		    z3 += z5;
-		    z4 += z5;
-		    
-		    tmp0 += z1 + z3;
-		    tmp1 += z2 + z4;
-		    tmp2 += z2 + z3;
-		    tmp3 += z1 + z4;
-		} else {
-		    /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
-		    z1 = d7;
-		    z2 = d5 + d3;
-		    z3 = d7 + d3;
-		    z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
-		    
-		    tmp0 = MULTIPLY(d7, FIX_0_298631336); 
-		    tmp1 = MULTIPLY(d5, FIX_2_053119869);
-		    tmp2 = MULTIPLY(d3, FIX_3_072711026);
-		    z1 = MULTIPLY(-d7, FIX_0_899976223);
-		    z2 = MULTIPLY(-z2, FIX_2_562915447);
-		    z3 = MULTIPLY(-z3, FIX_1_961570560);
-		    z4 = MULTIPLY(-d5, FIX_0_390180644);
-		    
-		    z3 += z5;
-		    z4 += z5;
-		    
-		    tmp0 += z1 + z3;
-		    tmp1 += z2 + z4;
-		    tmp2 += z2 + z3;
-		    tmp3 = z1 + z4;
-		}
-	    } else {
-		if (d1) {
-		    /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
-		    z1 = d7 + d1;
-		    z2 = d5;
-		    z3 = d7;
-		    z4 = d5 + d1;
-		    z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
-		    
-		    tmp0 = MULTIPLY(d7, FIX_0_298631336); 
-		    tmp1 = MULTIPLY(d5, FIX_2_053119869);
-		    tmp3 = MULTIPLY(d1, FIX_1_501321110);
-		    z1 = MULTIPLY(-z1, FIX_0_899976223);
-		    z2 = MULTIPLY(-d5, FIX_2_562915447);
-		    z3 = MULTIPLY(-d7, FIX_1_961570560);
-		    z4 = MULTIPLY(-z4, FIX_0_390180644);
-		    
-		    z3 += z5;
-		    z4 += z5;
-		    
-		    tmp0 += z1 + z3;
-		    tmp1 += z2 + z4;
-		    tmp2 = z2 + z3;
-		    tmp3 += z1 + z4;
-		} else {
-		    /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
-		    tmp0 = MULTIPLY(-d7, FIX_0_601344887); 
-		    z1 = MULTIPLY(-d7, FIX_0_899976223);
-		    z3 = MULTIPLY(-d7, FIX_1_961570560);
-		    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
-		    z2 = MULTIPLY(-d5, FIX_2_562915447);
-		    z4 = MULTIPLY(-d5, FIX_0_390180644);
-		    z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
-		    
-		    z3 += z5;
-		    z4 += z5;
-		    
-		    tmp0 += z3;
-		    tmp1 += z4;
-		    tmp2 = z2 + z3;
-		    tmp3 = z1 + z4;
-		}
-	    }
-	} else {
-	    if (d3) {
-		if (d1) {
-		    /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
-		    z1 = d7 + d1;
-		    z3 = d7 + d3;
-		    z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
-		    
-		    tmp0 = MULTIPLY(d7, FIX_0_298631336); 
-		    tmp2 = MULTIPLY(d3, FIX_3_072711026);
-		    tmp3 = MULTIPLY(d1, FIX_1_501321110);
-		    z1 = MULTIPLY(-z1, FIX_0_899976223);
-		    z2 = MULTIPLY(-d3, FIX_2_562915447);
-		    z3 = MULTIPLY(-z3, FIX_1_961570560);
-		    z4 = MULTIPLY(-d1, FIX_0_390180644);
-		    
-		    z3 += z5;
-		    z4 += z5;
-		    
-		    tmp0 += z1 + z3;
-		    tmp1 = z2 + z4;
-		    tmp2 += z2 + z3;
-		    tmp3 += z1 + z4;
-		} else {
-		    /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
-		    z3 = d7 + d3;
-		    
-		    tmp0 = MULTIPLY(-d7, FIX_0_601344887); 
-		    z1 = MULTIPLY(-d7, FIX_0_899976223);
-		    tmp2 = MULTIPLY(d3, FIX_0_509795579);
-		    z2 = MULTIPLY(-d3, FIX_2_562915447);
-		    z5 = MULTIPLY(z3, FIX_1_175875602);
-		    z3 = MULTIPLY(-z3, FIX_0_785694958);
-		    
-		    tmp0 += z3;
-		    tmp1 = z2 + z5;
-		    tmp2 += z3;
-		    tmp3 = z1 + z5;
-		}
-	    } else {
-		if (d1) {
-		    /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
-		    z1 = d7 + d1;
-		    z5 = MULTIPLY(z1, FIX_1_175875602);
-
-		    z1 = MULTIPLY(z1, FIX_0_275899380);
-		    z3 = MULTIPLY(-d7, FIX_1_961570560);
-		    tmp0 = MULTIPLY(-d7, FIX_1_662939225); 
-		    z4 = MULTIPLY(-d1, FIX_0_390180644);
-		    tmp3 = MULTIPLY(d1, FIX_1_111140466);
-
-		    tmp0 += z1;
-		    tmp1 = z4 + z5;
-		    tmp2 = z3 + z5;
-		    tmp3 += z1;
-		} else {
-		    /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
-		    tmp0 = MULTIPLY(-d7, FIX_1_387039845);
-		    tmp1 = MULTIPLY(d7, FIX_1_175875602);
-		    tmp2 = MULTIPLY(-d7, FIX_0_785694958);
-		    tmp3 = MULTIPLY(d7, FIX_0_275899380);
-		}
-	    }
-	}
+        if (d5) {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 != 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z2 = d5 + d3;
+                    z3 = d7 + d3;
+                    z4 = d5 + d1;
+                    z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-z1, FIX_0_899976223);
+                    z2 = MULTIPLY(-z2, FIX_2_562915447);
+                    z3 = MULTIPLY(-z3, FIX_1_961570560);
+                    z4 = MULTIPLY(-z4, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 != 0, d7 != 0 */
+                    z1 = d7;
+                    z2 = d5 + d3;
+                    z3 = d7 + d3;
+                    z5 = MULTIPLY(z3 + d5, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    z1 = MULTIPLY(-d7, FIX_0_899976223);
+                    z2 = MULTIPLY(-z2, FIX_2_562915447);
+                    z3 = MULTIPLY(-z3, FIX_1_961570560);
+                    z4 = MULTIPLY(-d5, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 = z1 + z4;
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 != 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z2 = d5;
+                    z3 = d7;
+                    z4 = d5 + d1;
+                    z5 = MULTIPLY(z3 + z4, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-z1, FIX_0_899976223);
+                    z2 = MULTIPLY(-d5, FIX_2_562915447);
+                    z3 = MULTIPLY(-d7, FIX_1_961570560);
+                    z4 = MULTIPLY(-z4, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 = z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 == 0, d5 != 0, d7 != 0 */
+                    tmp0 = MULTIPLY(-d7, FIX_0_601344887);
+                    z1 = MULTIPLY(-d7, FIX_0_899976223);
+                    z3 = MULTIPLY(-d7, FIX_1_961570560);
+                    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
+                    z2 = MULTIPLY(-d5, FIX_2_562915447);
+                    z4 = MULTIPLY(-d5, FIX_0_390180644);
+                    z5 = MULTIPLY(d5 + d7, FIX_1_175875602);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z3;
+                    tmp1 += z4;
+                    tmp2 = z2 + z3;
+                    tmp3 = z1 + z4;
+                }
+            }
+        } else {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 == 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z3 = d7 + d3;
+                    z5 = MULTIPLY(z3 + d1, FIX_1_175875602);
+
+                    tmp0 = MULTIPLY(d7, FIX_0_298631336);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-z1, FIX_0_899976223);
+                    z2 = MULTIPLY(-d3, FIX_2_562915447);
+                    z3 = MULTIPLY(-z3, FIX_1_961570560);
+                    z4 = MULTIPLY(-d1, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 += z1 + z3;
+                    tmp1 = z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 == 0, d7 != 0 */
+                    z3 = d7 + d3;
+
+                    tmp0 = MULTIPLY(-d7, FIX_0_601344887);
+                    z1 = MULTIPLY(-d7, FIX_0_899976223);
+                    tmp2 = MULTIPLY(d3, FIX_0_509795579);
+                    z2 = MULTIPLY(-d3, FIX_2_562915447);
+                    z5 = MULTIPLY(z3, FIX_1_175875602);
+                    z3 = MULTIPLY(-z3, FIX_0_785694958);
+
+                    tmp0 += z3;
+                    tmp1 = z2 + z5;
+                    tmp2 += z3;
+                    tmp3 = z1 + z5;
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 == 0, d7 != 0 */
+                    z1 = d7 + d1;
+                    z5 = MULTIPLY(z1, FIX_1_175875602);
+
+                    z1 = MULTIPLY(z1, FIX_0_275899380);
+                    z3 = MULTIPLY(-d7, FIX_1_961570560);
+                    tmp0 = MULTIPLY(-d7, FIX_1_662939225);
+                    z4 = MULTIPLY(-d1, FIX_0_390180644);
+                    tmp3 = MULTIPLY(d1, FIX_1_111140466);
+
+                    tmp0 += z1;
+                    tmp1 = z4 + z5;
+                    tmp2 = z3 + z5;
+                    tmp3 += z1;
+                } else {
+                    /* d1 == 0, d3 == 0, d5 == 0, d7 != 0 */
+                    tmp0 = MULTIPLY(-d7, FIX_1_387039845);
+                    tmp1 = MULTIPLY(d7, FIX_1_175875602);
+                    tmp2 = MULTIPLY(-d7, FIX_0_785694958);
+                    tmp3 = MULTIPLY(d7, FIX_0_275899380);
+                }
+            }
+        }
     } else {
-	if (d5) {
-	    if (d3) {
-		if (d1) {
-		    /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
-		    z2 = d5 + d3;
-		    z4 = d5 + d1;
-		    z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
-		    
-		    tmp1 = MULTIPLY(d5, FIX_2_053119869);
-		    tmp2 = MULTIPLY(d3, FIX_3_072711026);
-		    tmp3 = MULTIPLY(d1, FIX_1_501321110);
-		    z1 = MULTIPLY(-d1, FIX_0_899976223);
-		    z2 = MULTIPLY(-z2, FIX_2_562915447);
-		    z3 = MULTIPLY(-d3, FIX_1_961570560);
-		    z4 = MULTIPLY(-z4, FIX_0_390180644);
-		    
-		    z3 += z5;
-		    z4 += z5;
-		    
-		    tmp0 = z1 + z3;
-		    tmp1 += z2 + z4;
-		    tmp2 += z2 + z3;
-		    tmp3 += z1 + z4;
-		} else {
-		    /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
-		    z2 = d5 + d3;
-		    
-		    z5 = MULTIPLY(z2, FIX_1_175875602);
-		    tmp1 = MULTIPLY(d5, FIX_1_662939225);
-		    z4 = MULTIPLY(-d5, FIX_0_390180644);
-		    z2 = MULTIPLY(-z2, FIX_1_387039845);
-		    tmp2 = MULTIPLY(d3, FIX_1_111140466);
-		    z3 = MULTIPLY(-d3, FIX_1_961570560);
-		    
-		    tmp0 = z3 + z5;
-		    tmp1 += z2;
-		    tmp2 += z2;
-		    tmp3 = z4 + z5;
-		}
-	    } else {
-		if (d1) {
-		    /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
-		    z4 = d5 + d1;
-		    
-		    z5 = MULTIPLY(z4, FIX_1_175875602);
-		    z1 = MULTIPLY(-d1, FIX_0_899976223);
-		    tmp3 = MULTIPLY(d1, FIX_0_601344887);
-		    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
-		    z2 = MULTIPLY(-d5, FIX_2_562915447);
-		    z4 = MULTIPLY(z4, FIX_0_785694958);
-		    
-		    tmp0 = z1 + z5;
-		    tmp1 += z4;
-		    tmp2 = z2 + z5;
-		    tmp3 += z4;
-		} else {
-		    /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
-		    tmp0 = MULTIPLY(d5, FIX_1_175875602);
-		    tmp1 = MULTIPLY(d5, FIX_0_275899380);
-		    tmp2 = MULTIPLY(-d5, FIX_1_387039845);
-		    tmp3 = MULTIPLY(d5, FIX_0_785694958);
-		}
-	    }
-	} else {
-	    if (d3) {
-		if (d1) {
-		    /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
-		    z5 = d1 + d3;
-		    tmp3 = MULTIPLY(d1, FIX_0_211164243);
-		    tmp2 = MULTIPLY(-d3, FIX_1_451774981);
-		    z1 = MULTIPLY(d1, FIX_1_061594337);
-		    z2 = MULTIPLY(-d3, FIX_2_172734803);
-		    z4 = MULTIPLY(z5, FIX_0_785694958);
-		    z5 = MULTIPLY(z5, FIX_1_175875602);
-		    
-		    tmp0 = z1 - z4;
-		    tmp1 = z2 + z4;
-		    tmp2 += z5;
-		    tmp3 += z5;
-		} else {
-		    /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
-		    tmp0 = MULTIPLY(-d3, FIX_0_785694958);
-		    tmp1 = MULTIPLY(-d3, FIX_1_387039845);
-		    tmp2 = MULTIPLY(-d3, FIX_0_275899380);
-		    tmp3 = MULTIPLY(d3, FIX_1_175875602);
-		}
-	    } else {
-		if (d1) {
-		    /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
-		    tmp0 = MULTIPLY(d1, FIX_0_275899380);
-		    tmp1 = MULTIPLY(d1, FIX_0_785694958);
-		    tmp2 = MULTIPLY(d1, FIX_1_175875602);
-		    tmp3 = MULTIPLY(d1, FIX_1_387039845);
-		} else {
-		    /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
-		    tmp0 = tmp1 = tmp2 = tmp3 = 0;
-		}
-	    }
-	}
+        if (d5) {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 != 0, d7 == 0 */
+                    z2 = d5 + d3;
+                    z4 = d5 + d1;
+                    z5 = MULTIPLY(d3 + z4, FIX_1_175875602);
+
+                    tmp1 = MULTIPLY(d5, FIX_2_053119869);
+                    tmp2 = MULTIPLY(d3, FIX_3_072711026);
+                    tmp3 = MULTIPLY(d1, FIX_1_501321110);
+                    z1 = MULTIPLY(-d1, FIX_0_899976223);
+                    z2 = MULTIPLY(-z2, FIX_2_562915447);
+                    z3 = MULTIPLY(-d3, FIX_1_961570560);
+                    z4 = MULTIPLY(-z4, FIX_0_390180644);
+
+                    z3 += z5;
+                    z4 += z5;
+
+                    tmp0 = z1 + z3;
+                    tmp1 += z2 + z4;
+                    tmp2 += z2 + z3;
+                    tmp3 += z1 + z4;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 != 0, d7 == 0 */
+                    z2 = d5 + d3;
+
+                    z5 = MULTIPLY(z2, FIX_1_175875602);
+                    tmp1 = MULTIPLY(d5, FIX_1_662939225);
+                    z4 = MULTIPLY(-d5, FIX_0_390180644);
+                    z2 = MULTIPLY(-z2, FIX_1_387039845);
+                    tmp2 = MULTIPLY(d3, FIX_1_111140466);
+                    z3 = MULTIPLY(-d3, FIX_1_961570560);
+
+                    tmp0 = z3 + z5;
+                    tmp1 += z2;
+                    tmp2 += z2;
+                    tmp3 = z4 + z5;
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 != 0, d7 == 0 */
+                    z4 = d5 + d1;
+
+                    z5 = MULTIPLY(z4, FIX_1_175875602);
+                    z1 = MULTIPLY(-d1, FIX_0_899976223);
+                    tmp3 = MULTIPLY(d1, FIX_0_601344887);
+                    tmp1 = MULTIPLY(-d5, FIX_0_509795579);
+                    z2 = MULTIPLY(-d5, FIX_2_562915447);
+                    z4 = MULTIPLY(z4, FIX_0_785694958);
+
+                    tmp0 = z1 + z5;
+                    tmp1 += z4;
+                    tmp2 = z2 + z5;
+                    tmp3 += z4;
+                } else {
+                    /* d1 == 0, d3 == 0, d5 != 0, d7 == 0 */
+                    tmp0 = MULTIPLY(d5, FIX_1_175875602);
+                    tmp1 = MULTIPLY(d5, FIX_0_275899380);
+                    tmp2 = MULTIPLY(-d5, FIX_1_387039845);
+                    tmp3 = MULTIPLY(d5, FIX_0_785694958);
+                }
+            }
+        } else {
+            if (d3) {
+                if (d1) {
+                    /* d1 != 0, d3 != 0, d5 == 0, d7 == 0 */
+                    z5 = d1 + d3;
+                    tmp3 = MULTIPLY(d1, FIX_0_211164243);
+                    tmp2 = MULTIPLY(-d3, FIX_1_451774981);
+                    z1 = MULTIPLY(d1, FIX_1_061594337);
+                    z2 = MULTIPLY(-d3, FIX_2_172734803);
+                    z4 = MULTIPLY(z5, FIX_0_785694958);
+                    z5 = MULTIPLY(z5, FIX_1_175875602);
+
+                    tmp0 = z1 - z4;
+                    tmp1 = z2 + z4;
+                    tmp2 += z5;
+                    tmp3 += z5;
+                } else {
+                    /* d1 == 0, d3 != 0, d5 == 0, d7 == 0 */
+                    tmp0 = MULTIPLY(-d3, FIX_0_785694958);
+                    tmp1 = MULTIPLY(-d3, FIX_1_387039845);
+                    tmp2 = MULTIPLY(-d3, FIX_0_275899380);
+                    tmp3 = MULTIPLY(d3, FIX_1_175875602);
+                }
+            } else {
+                if (d1) {
+                    /* d1 != 0, d3 == 0, d5 == 0, d7 == 0 */
+                    tmp0 = MULTIPLY(d1, FIX_0_275899380);
+                    tmp1 = MULTIPLY(d1, FIX_0_785694958);
+                    tmp2 = MULTIPLY(d1, FIX_1_175875602);
+                    tmp3 = MULTIPLY(d1, FIX_1_387039845);
+                } else {
+                    /* d1 == 0, d3 == 0, d5 == 0, d7 == 0 */
+                    tmp0 = tmp1 = tmp2 = tmp3 = 0;
+                }
+            }
+        }
     }
 
     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
 
     dataptr[DCTSIZE*0] = (DCTELEM) DESCALE(tmp10 + tmp3,
-					   CONST_BITS+PASS1_BITS+3);
+                                           CONST_BITS+PASS1_BITS+3);
     dataptr[DCTSIZE*7] = (DCTELEM) DESCALE(tmp10 - tmp3,
-					   CONST_BITS+PASS1_BITS+3);
+                                           CONST_BITS+PASS1_BITS+3);
     dataptr[DCTSIZE*1] = (DCTELEM) DESCALE(tmp11 + tmp2,
-					   CONST_BITS+PASS1_BITS+3);
+                                           CONST_BITS+PASS1_BITS+3);
     dataptr[DCTSIZE*6] = (DCTELEM) DESCALE(tmp11 - tmp2,
-					   CONST_BITS+PASS1_BITS+3);
+                                           CONST_BITS+PASS1_BITS+3);
     dataptr[DCTSIZE*2] = (DCTELEM) DESCALE(tmp12 + tmp1,
-					   CONST_BITS+PASS1_BITS+3);
+                                           CONST_BITS+PASS1_BITS+3);
     dataptr[DCTSIZE*5] = (DCTELEM) DESCALE(tmp12 - tmp1,
-					   CONST_BITS+PASS1_BITS+3);
+                                           CONST_BITS+PASS1_BITS+3);
     dataptr[DCTSIZE*3] = (DCTELEM) DESCALE(tmp13 + tmp0,
-					   CONST_BITS+PASS1_BITS+3);
+                                           CONST_BITS+PASS1_BITS+3);
     dataptr[DCTSIZE*4] = (DCTELEM) DESCALE(tmp13 - tmp0,
-					   CONST_BITS+PASS1_BITS+3);
-    
-    dataptr++;			/* advance pointer to next column */
+                                           CONST_BITS+PASS1_BITS+3);
+
+    dataptr++;                  /* advance pointer to next column */
   }
 }
 
@@ -930,7 +930,7 @@ void j_rev_dct4(DCTBLOCK data)
   /* furthermore, we scale the results by 2**PASS1_BITS. */
 
   data[0] += 4;
-  
+
   dataptr = data;
 
   for (rowctr = DCTSIZE-1; rowctr >= 0; rowctr--) {
@@ -953,65 +953,65 @@ void j_rev_dct4(DCTBLOCK data)
     if ((d2 | d4 | d6) == 0) {
       /* AC terms all zero */
       if (d0) {
-	  /* Compute a 32 bit value to assign. */
-	  DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
-	  register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
-	  
-	  idataptr[0] = v;
-	  idataptr[1] = v;
+          /* Compute a 32 bit value to assign. */
+          DCTELEM dcval = (DCTELEM) (d0 << PASS1_BITS);
+          register int v = (dcval & 0xffff) | ((dcval << 16) & 0xffff0000);
+
+          idataptr[0] = v;
+          idataptr[1] = v;
       }
-      
-      dataptr += DCTSTRIDE;	/* advance pointer to next row */
+
+      dataptr += DCTSTRIDE;     /* advance pointer to next row */
       continue;
     }
-    
+
     /* Even part: reverse the even part of the forward DCT. */
     /* The rotator is sqrt(2)*c(-6). */
     if (d6) {
-	    if (d2) {
-		    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
-		    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
-		    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
-		    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
-
-		    tmp0 = (d0 + d4) << CONST_BITS;
-		    tmp1 = (d0 - d4) << CONST_BITS;
-
-		    tmp10 = tmp0 + tmp3;
-		    tmp13 = tmp0 - tmp3;
-		    tmp11 = tmp1 + tmp2;
-		    tmp12 = tmp1 - tmp2;
-	    } else {
-		    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
-		    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
-		    tmp3 = MULTIPLY(d6, FIX_0_541196100);
-
-		    tmp0 = (d0 + d4) << CONST_BITS;
-		    tmp1 = (d0 - d4) << CONST_BITS;
-
-		    tmp10 = tmp0 + tmp3;
-		    tmp13 = tmp0 - tmp3;
-		    tmp11 = tmp1 + tmp2;
-		    tmp12 = tmp1 - tmp2;
-	    }
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            }
     } else {
-	    if (d2) {
-		    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
-		    tmp2 = MULTIPLY(d2, FIX_0_541196100);
-		    tmp3 = MULTIPLY(d2, FIX_1_306562965);
-
-		    tmp0 = (d0 + d4) << CONST_BITS;
-		    tmp1 = (d0 - d4) << CONST_BITS;
-
-		    tmp10 = tmp0 + tmp3;
-		    tmp13 = tmp0 - tmp3;
-		    tmp11 = tmp1 + tmp2;
-		    tmp12 = tmp1 - tmp2;
-	    } else {
-		    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
-		    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
-		    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
-	    }
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+                    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
+                    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
+            }
       }
 
     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
@@ -1021,7 +1021,7 @@ void j_rev_dct4(DCTBLOCK data)
     dataptr[2] = (DCTELEM) DESCALE(tmp12, CONST_BITS-PASS1_BITS);
     dataptr[3] = (DCTELEM) DESCALE(tmp13, CONST_BITS-PASS1_BITS);
 
-    dataptr += DCTSTRIDE;		/* advance pointer to next row */
+    dataptr += DCTSTRIDE;       /* advance pointer to next row */
   }
 
   /* Pass 2: process columns. */
@@ -1046,50 +1046,50 @@ void j_rev_dct4(DCTBLOCK data)
     /* Even part: reverse the even part of the forward DCT. */
     /* The rotator is sqrt(2)*c(-6). */
     if (d6) {
-	    if (d2) {
-		    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
-		    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
-		    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
-		    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
-
-		    tmp0 = (d0 + d4) << CONST_BITS;
-		    tmp1 = (d0 - d4) << CONST_BITS;
-
-		    tmp10 = tmp0 + tmp3;
-		    tmp13 = tmp0 - tmp3;
-		    tmp11 = tmp1 + tmp2;
-		    tmp12 = tmp1 - tmp2;
-	    } else {
-		    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
-		    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
-		    tmp3 = MULTIPLY(d6, FIX_0_541196100);
-
-		    tmp0 = (d0 + d4) << CONST_BITS;
-		    tmp1 = (d0 - d4) << CONST_BITS;
-
-		    tmp10 = tmp0 + tmp3;
-		    tmp13 = tmp0 - tmp3;
-		    tmp11 = tmp1 + tmp2;
-		    tmp12 = tmp1 - tmp2;
-	    }
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 != 0 */
+                    z1 = MULTIPLY(d2 + d6, FIX_0_541196100);
+                    tmp2 = z1 + MULTIPLY(-d6, FIX_1_847759065);
+                    tmp3 = z1 + MULTIPLY(d2, FIX_0_765366865);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 != 0 */
+                    tmp2 = MULTIPLY(-d6, FIX_1_306562965);
+                    tmp3 = MULTIPLY(d6, FIX_0_541196100);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            }
     } else {
-	    if (d2) {
-		    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
-		    tmp2 = MULTIPLY(d2, FIX_0_541196100);
-		    tmp3 = MULTIPLY(d2, FIX_1_306562965);
-
-		    tmp0 = (d0 + d4) << CONST_BITS;
-		    tmp1 = (d0 - d4) << CONST_BITS;
-
-		    tmp10 = tmp0 + tmp3;
-		    tmp13 = tmp0 - tmp3;
-		    tmp11 = tmp1 + tmp2;
-		    tmp12 = tmp1 - tmp2;
-	    } else {
-		    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
-		    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
-		    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
-	    }
+            if (d2) {
+                    /* d0 != 0, d2 != 0, d4 != 0, d6 == 0 */
+                    tmp2 = MULTIPLY(d2, FIX_0_541196100);
+                    tmp3 = MULTIPLY(d2, FIX_1_306562965);
+
+                    tmp0 = (d0 + d4) << CONST_BITS;
+                    tmp1 = (d0 - d4) << CONST_BITS;
+
+                    tmp10 = tmp0 + tmp3;
+                    tmp13 = tmp0 - tmp3;
+                    tmp11 = tmp1 + tmp2;
+                    tmp12 = tmp1 - tmp2;
+            } else {
+                    /* d0 != 0, d2 == 0, d4 != 0, d6 == 0 */
+                    tmp10 = tmp13 = (d0 + d4) << CONST_BITS;
+                    tmp11 = tmp12 = (d0 - d4) << CONST_BITS;
+            }
     }
 
     /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
@@ -1098,8 +1098,8 @@ void j_rev_dct4(DCTBLOCK data)
     dataptr[DCTSTRIDE*1] = tmp11 >> (CONST_BITS+PASS1_BITS+3);
     dataptr[DCTSTRIDE*2] = tmp12 >> (CONST_BITS+PASS1_BITS+3);
     dataptr[DCTSTRIDE*3] = tmp13 >> (CONST_BITS+PASS1_BITS+3);
-    
-    dataptr++;			/* advance pointer to next column */
+
+    dataptr++;                  /* advance pointer to next column */
   }
 }
 
@@ -1111,7 +1111,7 @@ void j_rev_dct2(DCTBLOCK data){
   d01 = data[0+0*DCTSTRIDE] - data[1+0*DCTSTRIDE];
   d10 = data[0+1*DCTSTRIDE] + data[1+1*DCTSTRIDE];
   d11 = data[0+1*DCTSTRIDE] - data[1+1*DCTSTRIDE];
- 
+
   data[0+0*DCTSTRIDE]= (d00 + d10)>>3;
   data[1+0*DCTSTRIDE]= (d01 + d11)>>3;
   data[0+1*DCTSTRIDE]= (d00 - d10)>>3;
diff --git a/src/libffmpeg/libavcodec/lcl.c b/src/libffmpeg/libavcodec/lcl.c
index b93f31448..0bc118af2 100644
--- a/src/libffmpeg/libavcodec/lcl.c
+++ b/src/libffmpeg/libavcodec/lcl.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -81,8 +81,8 @@
  */
 typedef struct LclContext {
 
-	AVCodecContext *avctx;
-	AVFrame pic;
+        AVCodecContext *avctx;
+        AVFrame pic;
     PutBitContext pb;
 
     // Image type
@@ -152,7 +152,7 @@ static unsigned int mszh_decomp(unsigned char * srcptr, int srclen, unsigned cha
     unsigned char mask = 0;
     unsigned char maskbit = 0;
     unsigned int ofs, cnt;
-  
+
     while ((srclen > 0) && (destptr < destptr_end)) {
         if (maskbit == 0) {
             mask = *(srcptr++);
@@ -198,8 +198,8 @@ static unsigned int mszh_decomp(unsigned char * srcptr, int srclen, unsigned cha
  */
 static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8_t *buf, int buf_size)
 {
-	LclContext * const c = (LclContext *)avctx->priv_data;
-	unsigned char *encoded = (unsigned char *)buf;
+        LclContext * const c = (LclContext *)avctx->priv_data;
+        unsigned char *encoded = (unsigned char *)buf;
     unsigned int pixel_ptr;
     int row, col;
     unsigned char *outptr;
@@ -214,15 +214,15 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
 #endif
     unsigned int len = buf_size;
 
-	if(c->pic.data[0])
-		avctx->release_buffer(avctx, &c->pic);
+        if(c->pic.data[0])
+                avctx->release_buffer(avctx, &c->pic);
 
-	c->pic.reference = 0;
-	c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
-	if(avctx->get_buffer(avctx, &c->pic) < 0){
-		av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-		return -1;
-	}
+        c->pic.reference = 0;
+        c->pic.buffer_hints = FF_BUFFER_HINTS_VALID;
+        if(avctx->get_buffer(avctx, &c->pic) < 0){
+                av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
+                return -1;
+        }
 
     outptr = c->pic.data[0]; // Output image pointer
 
@@ -290,7 +290,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
                 c->zstream.next_in = encoded + 8;
                 c->zstream.avail_in = mthread_inlen;
                 c->zstream.next_out = c->decomp_buf;
-                c->zstream.avail_out = c->decomp_size;    
+                c->zstream.avail_out = c->decomp_size;
                 zret = inflate(&(c->zstream), Z_FINISH);
                 if ((zret != Z_OK) && (zret != Z_STREAM_END)) {
                     av_log(avctx, AV_LOG_ERROR, "Mthread1 inflate error: %d\n", zret);
@@ -309,7 +309,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
                 c->zstream.next_in = encoded + 8 + mthread_inlen;
                 c->zstream.avail_in = len - mthread_inlen;
                 c->zstream.next_out = c->decomp_buf + mthread_outlen;
-                c->zstream.avail_out = c->decomp_size - mthread_outlen;    
+                c->zstream.avail_out = c->decomp_size - mthread_outlen;
                 zret = inflate(&(c->zstream), Z_FINISH);
                 if ((zret != Z_OK) && (zret != Z_STREAM_END)) {
                     av_log(avctx, AV_LOG_ERROR, "Mthread2 inflate error: %d\n", zret);
@@ -358,7 +358,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
                     pixel_ptr = row * width * 3;
                     yq = encoded[pixel_ptr++];
                     uqvq = encoded[pixel_ptr++];
-              	  uqvq+=(encoded[pixel_ptr++] << 8);
+                    uqvq+=(encoded[pixel_ptr++] << 8);
                     for (col = 1; col < width; col++) {
                         encoded[pixel_ptr] = yq -= encoded[pixel_ptr];
                         uqvq -= (encoded[pixel_ptr+1] | (encoded[pixel_ptr+2]<<8));
@@ -565,11 +565,11 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
 #else
 
     init_put_bits(&c->pb, buf, buf_size);
-    
+
     *p = *pict;
     p->pict_type= FF_I_TYPE;
     p->key_frame= 1;
-    
+
     if(avctx->pix_fmt != PIX_FMT_BGR24){
         av_log(avctx, AV_LOG_ERROR, "Format not supported!\n");
         return -1;
@@ -588,8 +588,8 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
         c->zstream.avail_in = avctx->width*3;
         zret = deflate(&(c->zstream), Z_NO_FLUSH);
         if (zret != Z_OK) {
-    	    av_log(avctx, AV_LOG_ERROR, "Deflate error: %d\n", zret);
-    	    return -1;
+            av_log(avctx, AV_LOG_ERROR, "Deflate error: %d\n", zret);
+            return -1;
         }
     }
     zret = deflate(&(c->zstream), Z_FINISH);
@@ -628,7 +628,7 @@ static int decode_init(AVCodecContext *avctx)
 
 #ifdef CONFIG_ZLIB
     // Needed if zlib unused or init aborted before inflateInit
-    memset(&(c->zstream), 0, sizeof(z_stream)); 
+    memset(&(c->zstream), 0, sizeof(z_stream));
 #endif
 
     if (avctx->extradata_size < 8) {
@@ -640,7 +640,7 @@ static int decode_init(AVCodecContext *avctx)
         return 1;
     }
 
-    /* Check codec type */ 
+    /* Check codec type */
     if (((avctx->codec_id == CODEC_ID_MSZH)  && (*((char *)avctx->extradata + 7) != CODEC_MSZH)) ||
         ((avctx->codec_id == CODEC_ID_ZLIB)  && (*((char *)avctx->extradata + 7) != CODEC_ZLIB))) {
         av_log(avctx, AV_LOG_ERROR, "Codec id and codec type mismatch. This should not happen.\n");
@@ -714,7 +714,7 @@ static int decode_init(AVCodecContext *avctx)
                     break;
                 default:
                     if ((c->compression < Z_NO_COMPRESSION) || (c->compression > Z_BEST_COMPRESSION)) {
-                	    av_log(avctx, AV_LOG_ERROR, "Unsupported compression level for ZLIB: (%d).\n", c->compression);
+                            av_log(avctx, AV_LOG_ERROR, "Unsupported compression level for ZLIB: (%d).\n", c->compression);
                         return 1;
                     }
                     av_log(avctx, AV_LOG_INFO, "Compression level for ZLIB: (%d).\n", c->compression);
@@ -736,8 +736,8 @@ static int decode_init(AVCodecContext *avctx)
             return 1;
         }
     }
-  
-    /* Detect flags */ 
+
+    /* Detect flags */
     c->flags = *((char *)avctx->extradata + 6);
     if (c->flags & FLAG_MULTITHREAD)
         av_log(avctx, AV_LOG_INFO, "Multithread encoder flag set.\n");
@@ -788,9 +788,9 @@ static int encode_init(AVCodecContext *avctx)
 #else
 
     c->avctx= avctx;
-        
+
     assert(avctx->width && avctx->height);
-    
+
     avctx->extradata= av_mallocz(8);
     avctx->coded_frame= &c->pic;
 
@@ -818,7 +818,7 @@ static int encode_init(AVCodecContext *avctx)
     ((uint8_t*)avctx->extradata)[6]= c->flags;
     ((uint8_t*)avctx->extradata)[7]= CODEC_ZLIB;
     c->avctx->extradata_size= 8;
-    
+
     c->zstream.zalloc = Z_NULL;
     c->zstream.zfree = Z_NULL;
     c->zstream.opaque = Z_NULL;
@@ -851,15 +851,15 @@ static int encode_init(AVCodecContext *avctx)
  */
 static int decode_end(AVCodecContext *avctx)
 {
-	LclContext * const c = (LclContext *)avctx->priv_data;
+        LclContext * const c = (LclContext *)avctx->priv_data;
 
-	if (c->pic.data[0])
-		avctx->release_buffer(avctx, &c->pic);
+        if (c->pic.data[0])
+                avctx->release_buffer(avctx, &c->pic);
 #ifdef CONFIG_ZLIB
     inflateEnd(&(c->zstream));
 #endif
 
-	return 0;
+        return 0;
 }
 
 
@@ -878,33 +878,33 @@ static int encode_end(AVCodecContext *avctx)
 #ifdef CONFIG_ZLIB
     deflateEnd(&(c->zstream));
 #endif
-    
+
     return 0;
 }
 
 AVCodec mszh_decoder = {
-	"mszh",
-	CODEC_TYPE_VIDEO,
-	CODEC_ID_MSZH,
-	sizeof(LclContext),
-	decode_init,
-	NULL,
-	decode_end,
-	decode_frame,
-	CODEC_CAP_DR1,
+        "mszh",
+        CODEC_TYPE_VIDEO,
+        CODEC_ID_MSZH,
+        sizeof(LclContext),
+        decode_init,
+        NULL,
+        decode_end,
+        decode_frame,
+        CODEC_CAP_DR1,
 };
 
 
 AVCodec zlib_decoder = {
-	"zlib",
-	CODEC_TYPE_VIDEO,
-	CODEC_ID_ZLIB,
-	sizeof(LclContext),
-	decode_init,
-	NULL,
-	decode_end,
-	decode_frame,
-	CODEC_CAP_DR1,
+        "zlib",
+        CODEC_TYPE_VIDEO,
+        CODEC_ID_ZLIB,
+        sizeof(LclContext),
+        decode_init,
+        NULL,
+        decode_end,
+        decode_frame,
+        CODEC_CAP_DR1,
 };
 
 #ifdef CONFIG_ENCODERS
diff --git a/src/libffmpeg/libavcodec/libpostproc/postprocess.c b/src/libffmpeg/libavcodec/libpostproc/postprocess.c
index 9f3e522ed..63d65fece 100644
--- a/src/libffmpeg/libavcodec/libpostproc/postprocess.c
+++ b/src/libffmpeg/libavcodec/libpostproc/postprocess.c
@@ -15,35 +15,35 @@
 
     You should have received a copy of the GNU General Public License
     along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 
 /**
  * @file postprocess.c
  * postprocessing.
  */
- 
+
 /*
-			C	MMX	MMX2	3DNow	AltiVec
-isVertDC		Ec	Ec			Ec
-isVertMinMaxOk		Ec	Ec			Ec
-doVertLowPass		E		e	e	Ec
-doVertDefFilter		Ec	Ec	e	e	Ec
-isHorizDC		Ec	Ec			Ec
-isHorizMinMaxOk		a	E			Ec
-doHorizLowPass		E		e	e	Ec
-doHorizDefFilter	Ec	Ec	e	e	Ec
-do_a_deblock		Ec	E	Ec	E
-deRing			E		e	e*	Ecp
-Vertical RKAlgo1	E		a	a
-Horizontal RKAlgo1			a	a
-Vertical X1#		a		E	E
-Horizontal X1#		a		E	E
-LinIpolDeinterlace	e		E	E*
-CubicIpolDeinterlace	a		e	e*
-LinBlendDeinterlace	e		E	E*
-MedianDeinterlace#	E	Ec	Ec
-TempDeNoiser#		E		e	e	Ec
+                        C       MMX     MMX2    3DNow   AltiVec
+isVertDC                Ec      Ec                      Ec
+isVertMinMaxOk          Ec      Ec                      Ec
+doVertLowPass           E               e       e       Ec
+doVertDefFilter         Ec      Ec      e       e       Ec
+isHorizDC               Ec      Ec                      Ec
+isHorizMinMaxOk         a       E                       Ec
+doHorizLowPass          E               e       e       Ec
+doHorizDefFilter        Ec      Ec      e       e       Ec
+do_a_deblock            Ec      E       Ec      E
+deRing                  E               e       e*      Ecp
+Vertical RKAlgo1        E               a       a
+Horizontal RKAlgo1                      a       a
+Vertical X1#            a               E       E
+Horizontal X1#          a               E       E
+LinIpolDeinterlace      e               E       E*
+CubicIpolDeinterlace    a               e       e*
+LinBlendDeinterlace     e               E       E*
+MedianDeinterlace#      E       Ec      Ec
+TempDeNoiser#           E               e       e       Ec
 
 * i dont have a 3dnow CPU -> its untested, but noone said it doesnt work so it seems to work
 # more or less selfinvented filters so the exactness isnt too meaningfull
@@ -61,7 +61,7 @@ unroll stuff if instructions depend too much on the prior one
 move YScale thing to the end instead of fixing QP
 write a faster and higher quality deblocking filter :)
 make the mainloop more flexible (variable number of blocks at once
-	(the if/else stuff per block is slowing things down)
+        (the if/else stuff per block is slowing things down)
 compare the quality & speed of all filters
 split this huge file
 optimize c versions
@@ -120,14 +120,14 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
 #endif
 
 #if defined(ARCH_X86) || defined(ARCH_X86_64)
-static uint64_t __attribute__((aligned(8))) attribute_used w05=		0x0005000500050005LL;
-static uint64_t __attribute__((aligned(8))) attribute_used w04=		0x0004000400040004LL;
-static uint64_t __attribute__((aligned(8))) attribute_used w20=		0x0020002000200020LL;
-static uint64_t __attribute__((aligned(8))) attribute_used b00= 		0x0000000000000000LL;
-static uint64_t __attribute__((aligned(8))) attribute_used b01= 		0x0101010101010101LL;
-static uint64_t __attribute__((aligned(8))) attribute_used b02= 		0x0202020202020202LL;
-static uint64_t __attribute__((aligned(8))) attribute_used b08= 		0x0808080808080808LL;
-static uint64_t __attribute__((aligned(8))) attribute_used b80= 		0x8080808080808080LL;
+static uint64_t __attribute__((aligned(8))) attribute_used w05= 0x0005000500050005LL;
+static uint64_t __attribute__((aligned(8))) attribute_used w04= 0x0004000400040004LL;
+static uint64_t __attribute__((aligned(8))) attribute_used w20= 0x0020002000200020LL;
+static uint64_t __attribute__((aligned(8))) attribute_used b00= 0x0000000000000000LL;
+static uint64_t __attribute__((aligned(8))) attribute_used b01= 0x0101010101010101LL;
+static uint64_t __attribute__((aligned(8))) attribute_used b02= 0x0202020202020202LL;
+static uint64_t __attribute__((aligned(8))) attribute_used b08= 0x0808080808080808LL;
+static uint64_t __attribute__((aligned(8))) attribute_used b80= 0x8080808080808080LL;
 #endif
 
 static uint8_t clip_table[3*256];
@@ -140,65 +140,65 @@ static const int attribute_used deringThreshold= 20;
 
 static struct PPFilter filters[]=
 {
-	{"hb", "hdeblock", 		1, 1, 3, H_DEBLOCK},
-	{"vb", "vdeblock", 		1, 2, 4, V_DEBLOCK},
-/*	{"hr", "rkhdeblock", 		1, 1, 3, H_RK1_FILTER},
-	{"vr", "rkvdeblock", 		1, 2, 4, V_RK1_FILTER},*/
-	{"h1", "x1hdeblock", 		1, 1, 3, H_X1_FILTER},
-	{"v1", "x1vdeblock", 		1, 2, 4, V_X1_FILTER},
-	{"ha", "ahdeblock", 		1, 1, 3, H_A_DEBLOCK},
-	{"va", "avdeblock", 		1, 2, 4, V_A_DEBLOCK},
-	{"dr", "dering", 		1, 5, 6, DERING},
-	{"al", "autolevels", 		0, 1, 2, LEVEL_FIX},
-	{"lb", "linblenddeint", 	1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
-	{"li", "linipoldeint", 		1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
-	{"ci", "cubicipoldeint",	1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
-	{"md", "mediandeint", 		1, 1, 4, MEDIAN_DEINT_FILTER},
-	{"fd", "ffmpegdeint", 		1, 1, 4, FFMPEG_DEINT_FILTER},
-	{"l5", "lowpass5", 		1, 1, 4, LOWPASS5_DEINT_FILTER},
-	{"tn", "tmpnoise", 		1, 7, 8, TEMP_NOISE_FILTER},
-	{"fq", "forcequant", 		1, 0, 0, FORCE_QUANT},
-	{NULL, NULL,0,0,0,0} //End Marker
+        {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
+        {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
+/*      {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
+        {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
+        {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
+        {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
+        {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
+        {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
+        {"dr", "dering",                1, 5, 6, DERING},
+        {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
+        {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
+        {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
+        {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
+        {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
+        {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
+        {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
+        {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
+        {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
+        {NULL, NULL,0,0,0,0} //End Marker
 };
 
 static char *replaceTable[]=
 {
-	"default", 	"hdeblock:a,vdeblock:a,dering:a",
-	"de", 		"hdeblock:a,vdeblock:a,dering:a",
-	"fast", 	"x1hdeblock:a,x1vdeblock:a,dering:a",
-	"fa", 		"x1hdeblock:a,x1vdeblock:a,dering:a",
-	"ac", 		"ha:a:128:7,va:a,dering:a",
-	NULL //End Marker
+        "default",      "hdeblock:a,vdeblock:a,dering:a",
+        "de",           "hdeblock:a,vdeblock:a,dering:a",
+        "fast",         "x1hdeblock:a,x1vdeblock:a,dering:a",
+        "fa",           "x1hdeblock:a,x1vdeblock:a,dering:a",
+        "ac",           "ha:a:128:7,va:a,dering:a",
+        NULL //End Marker
 };
 
 
 #if defined(ARCH_X86) || defined(ARCH_X86_64)
 static inline void prefetchnta(void *p)
 {
-	asm volatile(	"prefetchnta (%0)\n\t"
-		: : "r" (p)
-	);
+        asm volatile(   "prefetchnta (%0)\n\t"
+                : : "r" (p)
+        );
 }
 
 static inline void prefetcht0(void *p)
 {
-	asm volatile(	"prefetcht0 (%0)\n\t"
-		: : "r" (p)
-	);
+        asm volatile(   "prefetcht0 (%0)\n\t"
+                : : "r" (p)
+        );
 }
 
 static inline void prefetcht1(void *p)
 {
-	asm volatile(	"prefetcht1 (%0)\n\t"
-		: : "r" (p)
-	);
+        asm volatile(   "prefetcht1 (%0)\n\t"
+                : : "r" (p)
+        );
 }
 
 static inline void prefetcht2(void *p)
 {
-	asm volatile(	"prefetcht2 (%0)\n\t"
-		: : "r" (p)
-	);
+        asm volatile(   "prefetcht2 (%0)\n\t"
+                : : "r" (p)
+        );
 }
 #endif
 
@@ -209,171 +209,171 @@ static inline void prefetcht2(void *p)
  */
 static inline int isHorizDC_C(uint8_t src[], int stride, PPContext *c)
 {
-	int numEq= 0;
-	int y;
-	const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
-	const int dcThreshold= dcOffset*2 + 1;
-
-	for(y=0; y<BLOCK_SIZE; y++)
-	{
-		if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
-		src+= stride;
-	}
-	return numEq > c->ppMode.flatnessThreshold;
+        int numEq= 0;
+        int y;
+        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
+        const int dcThreshold= dcOffset*2 + 1;
+
+        for(y=0; y<BLOCK_SIZE; y++)
+        {
+                if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
+                src+= stride;
+        }
+        return numEq > c->ppMode.flatnessThreshold;
 }
 
 /**
  * Check if the middle 8x8 Block in the given 8x16 block is flat
  */
 static inline int isVertDC_C(uint8_t src[], int stride, PPContext *c){
-	int numEq= 0;
-	int y;
-	const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
-	const int dcThreshold= dcOffset*2 + 1;
-
-	src+= stride*4; // src points to begin of the 8x8 Block
-	for(y=0; y<BLOCK_SIZE-1; y++)
-	{
-		if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
-		src+= stride;
-	}
-	return numEq > c->ppMode.flatnessThreshold;
+        int numEq= 0;
+        int y;
+        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
+        const int dcThreshold= dcOffset*2 + 1;
+
+        src+= stride*4; // src points to begin of the 8x8 Block
+        for(y=0; y<BLOCK_SIZE-1; y++)
+        {
+                if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
+                src+= stride;
+        }
+        return numEq > c->ppMode.flatnessThreshold;
 }
 
 static inline int isHorizMinMaxOk_C(uint8_t src[], int stride, int QP)
 {
-	int i;
+        int i;
 #if 1
-	for(i=0; i<2; i++){
-		if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
-		src += stride;
-		if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
-		src += stride;
-		if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
-		src += stride;
-		if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
-		src += stride;
-	}
-#else        
-	for(i=0; i<8; i++){
-		if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
-		src += stride;
-	}
+        for(i=0; i<2; i++){
+                if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
+                src += stride;
+                if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
+                src += stride;
+                if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
+                src += stride;
+                if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
+                src += stride;
+        }
+#else
+        for(i=0; i<8; i++){
+                if((unsigned)(src[0] - src[7] + 2*QP) > 4*QP) return 0;
+                src += stride;
+        }
 #endif
-	return 1;
+        return 1;
 }
 
 static inline int isVertMinMaxOk_C(uint8_t src[], int stride, int QP)
 {
 #if 1
 #if 1
-	int x;
-	src+= stride*4;
-	for(x=0; x<BLOCK_SIZE; x+=4)
-	{
-		if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
-		if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
-		if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
-		if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
-	}
+        int x;
+        src+= stride*4;
+        for(x=0; x<BLOCK_SIZE; x+=4)
+        {
+                if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
+                if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
+                if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
+                if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
+        }
 #else
-	int x;
-	src+= stride*3;
-	for(x=0; x<BLOCK_SIZE; x++)
-	{
-		if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
-	}
+        int x;
+        src+= stride*3;
+        for(x=0; x<BLOCK_SIZE; x++)
+        {
+                if((unsigned)(src[x + stride] - src[x + (stride<<3)] + 2*QP) > 4*QP) return 0;
+        }
 #endif
-	return 1;
+        return 1;
 #else
-	int x;
-	src+= stride*4;
-	for(x=0; x<BLOCK_SIZE; x++)
-	{
-		int min=255;
-		int max=0;
-		int y;
-		for(y=0; y<8; y++){
-			int v= src[x + y*stride];
-			if(v>max) max=v;
-			if(v<min) min=v;
-		}
-		if(max-min > 2*QP) return 0;
-	}
-	return 1;
+        int x;
+        src+= stride*4;
+        for(x=0; x<BLOCK_SIZE; x++)
+        {
+                int min=255;
+                int max=0;
+                int y;
+                for(y=0; y<8; y++){
+                        int v= src[x + y*stride];
+                        if(v>max) max=v;
+                        if(v<min) min=v;
+                }
+                if(max-min > 2*QP) return 0;
+        }
+        return 1;
 #endif
 }
 
 static inline int horizClassify_C(uint8_t src[], int stride, PPContext *c){
-	if( isHorizDC_C(src, stride, c) ){
-		if( isHorizMinMaxOk_C(src, stride, c->QP) )
-			return 1;
-		else
-			return 0;
-	}else{
-		return 2;
-	}
+        if( isHorizDC_C(src, stride, c) ){
+                if( isHorizMinMaxOk_C(src, stride, c->QP) )
+                        return 1;
+                else
+                        return 0;
+        }else{
+                return 2;
+        }
 }
 
 static inline int vertClassify_C(uint8_t src[], int stride, PPContext *c){
-	if( isVertDC_C(src, stride, c) ){
-		if( isVertMinMaxOk_C(src, stride, c->QP) )
-			return 1;
-		else
-			return 0;
-	}else{
-		return 2;
-	}
+        if( isVertDC_C(src, stride, c) ){
+                if( isVertMinMaxOk_C(src, stride, c->QP) )
+                        return 1;
+                else
+                        return 0;
+        }else{
+                return 2;
+        }
 }
 
 static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
 {
-	int y;
-	for(y=0; y<BLOCK_SIZE; y++)
-	{
-		const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
-
-		if(ABS(middleEnergy) < 8*c->QP)
-		{
-			const int q=(dst[3] - dst[4])/2;
-			const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
-			const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
-
-			int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
-			d= MAX(d, 0);
-
-			d= (5*d + 32) >> 6;
-			d*= SIGN(-middleEnergy);
-
-			if(q>0)
-			{
-				d= d<0 ? 0 : d;
-				d= d>q ? q : d;
-			}
-			else
-			{
-				d= d>0 ? 0 : d;
-				d= d<q ? q : d;
-			}
-
-        		dst[3]-= d;
-	        	dst[4]+= d;
-		}
-		dst+= stride;
-	}
+        int y;
+        for(y=0; y<BLOCK_SIZE; y++)
+        {
+                const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
+
+                if(ABS(middleEnergy) < 8*c->QP)
+                {
+                        const int q=(dst[3] - dst[4])/2;
+                        const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
+                        const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
+
+                        int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
+                        d= MAX(d, 0);
+
+                        d= (5*d + 32) >> 6;
+                        d*= SIGN(-middleEnergy);
+
+                        if(q>0)
+                        {
+                                d= d<0 ? 0 : d;
+                                d= d>q ? q : d;
+                        }
+                        else
+                        {
+                                d= d>0 ? 0 : d;
+                                d= d<q ? q : d;
+                        }
+
+                        dst[3]-= d;
+                        dst[4]+= d;
+                }
+                dst+= stride;
+        }
 }
 
 /**
@@ -382,35 +382,35 @@ static inline void doHorizDefFilter_C(uint8_t dst[], int stride, PPContext *c)
  */
 static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
 {
-	int y;
-	for(y=0; y<BLOCK_SIZE; y++)
-	{
-		const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
-		const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
-
-		int sums[10];
-		sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
-		sums[1] = sums[0] - first  + dst[3];
-		sums[2] = sums[1] - first  + dst[4];
-		sums[3] = sums[2] - first  + dst[5];
-		sums[4] = sums[3] - first  + dst[6];
-		sums[5] = sums[4] - dst[0] + dst[7];
-		sums[6] = sums[5] - dst[1] + last;
-		sums[7] = sums[6] - dst[2] + last;
-		sums[8] = sums[7] - dst[3] + last;
-		sums[9] = sums[8] - dst[4] + last;
-
-		dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
-		dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
-		dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
-		dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
-		dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
-		dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
-		dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
-		dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
-
-		dst+= stride;
-	}
+        int y;
+        for(y=0; y<BLOCK_SIZE; y++)
+        {
+                const int first= ABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
+                const int last= ABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
+
+                int sums[10];
+                sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
+                sums[1] = sums[0] - first  + dst[3];
+                sums[2] = sums[1] - first  + dst[4];
+                sums[3] = sums[2] - first  + dst[5];
+                sums[4] = sums[3] - first  + dst[6];
+                sums[5] = sums[4] - dst[0] + dst[7];
+                sums[6] = sums[5] - dst[1] + last;
+                sums[7] = sums[6] - dst[2] + last;
+                sums[8] = sums[7] - dst[3] + last;
+                sums[9] = sums[8] - dst[4] + last;
+
+                dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
+                dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
+                dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
+                dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
+                dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
+                dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
+                dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
+                dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
+
+                dst+= stride;
+        }
 }
 
 /**
@@ -423,161 +423,161 @@ static inline void doHorizLowPass_C(uint8_t dst[], int stride, PPContext *c)
  */
 static inline void horizX1Filter(uint8_t *src, int stride, int QP)
 {
-	int y;
-	static uint64_t *lut= NULL;
-	if(lut==NULL)
-	{
-		int i;
-		lut= (uint64_t*)memalign(8, 256*8);
-		for(i=0; i<256; i++)
-		{
-			int v= i < 128 ? 2*i : 2*(i-256);
+        int y;
+        static uint64_t *lut= NULL;
+        if(lut==NULL)
+        {
+                int i;
+                lut= (uint64_t*)memalign(8, 256*8);
+                for(i=0; i<256; i++)
+                {
+                        int v= i < 128 ? 2*i : 2*(i-256);
 /*
 //Simulate 112242211 9-Tap filter
-			uint64_t a= (v/16) & 0xFF;
-			uint64_t b= (v/8) & 0xFF;
-			uint64_t c= (v/4) & 0xFF;
-			uint64_t d= (3*v/8) & 0xFF;
+                        uint64_t a= (v/16) & 0xFF;
+                        uint64_t b= (v/8) & 0xFF;
+                        uint64_t c= (v/4) & 0xFF;
+                        uint64_t d= (3*v/8) & 0xFF;
 */
 //Simulate piecewise linear interpolation
-			uint64_t a= (v/16) & 0xFF;
-			uint64_t b= (v*3/16) & 0xFF;
-			uint64_t c= (v*5/16) & 0xFF;
-			uint64_t d= (7*v/16) & 0xFF;
-			uint64_t A= (0x100 - a)&0xFF;
-			uint64_t B= (0x100 - b)&0xFF;
-			uint64_t C= (0x100 - c)&0xFF;
-			uint64_t D= (0x100 - c)&0xFF;
-
-			lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
-				(D<<24) | (C<<16) | (B<<8) | (A);
-			//lut[i] = (v<<32) | (v<<24);
-		}
-	}
-
-	for(y=0; y<BLOCK_SIZE; y++)
-	{
-		int a= src[1] - src[2];
-		int b= src[3] - src[4];
-		int c= src[5] - src[6];
-
-		int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
-
-		if(d < QP)
-		{
-			int v = d * SIGN(-b);
-
-			src[1] +=v/8;
-			src[2] +=v/4;
-			src[3] +=3*v/8;
-			src[4] -=3*v/8;
-			src[5] -=v/4;
-			src[6] -=v/8;
-
-		}
-		src+=stride;
-	}
+                        uint64_t a= (v/16) & 0xFF;
+                        uint64_t b= (v*3/16) & 0xFF;
+                        uint64_t c= (v*5/16) & 0xFF;
+                        uint64_t d= (7*v/16) & 0xFF;
+                        uint64_t A= (0x100 - a)&0xFF;
+                        uint64_t B= (0x100 - b)&0xFF;
+                        uint64_t C= (0x100 - c)&0xFF;
+                        uint64_t D= (0x100 - c)&0xFF;
+
+                        lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
+                                (D<<24) | (C<<16) | (B<<8) | (A);
+                        //lut[i] = (v<<32) | (v<<24);
+                }
+        }
+
+        for(y=0; y<BLOCK_SIZE; y++)
+        {
+                int a= src[1] - src[2];
+                int b= src[3] - src[4];
+                int c= src[5] - src[6];
+
+                int d= MAX(ABS(b) - (ABS(a) + ABS(c))/2, 0);
+
+                if(d < QP)
+                {
+                        int v = d * SIGN(-b);
+
+                        src[1] +=v/8;
+                        src[2] +=v/4;
+                        src[3] +=3*v/8;
+                        src[4] -=3*v/8;
+                        src[5] -=v/4;
+                        src[6] -=v/8;
+
+                }
+                src+=stride;
+        }
 }
 
 /**
  * accurate deblock filter
  */
 static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPContext *c){
-	int y;
-	const int QP= c->QP;
-	const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
-	const int dcThreshold= dcOffset*2 + 1;
+        int y;
+        const int QP= c->QP;
+        const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
+        const int dcThreshold= dcOffset*2 + 1;
 //START_TIMER
-	src+= step*4; // src points to begin of the 8x8 Block
-	for(y=0; y<8; y++){
-		int numEq= 0;
-
-		if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
-		if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
-		if(numEq > c->ppMode.flatnessThreshold){
-			int min, max, x;
-			
-			if(src[0] > src[step]){
-			    max= src[0];
-			    min= src[step];
-			}else{
-			    max= src[step];
-			    min= src[0];
-			}
-			for(x=2; x<8; x+=2){
-				if(src[x*step] > src[(x+1)*step]){
-					if(src[x    *step] > max) max= src[ x   *step];
-					if(src[(x+1)*step] < min) min= src[(x+1)*step];
-				}else{
-					if(src[(x+1)*step] > max) max= src[(x+1)*step];
-					if(src[ x   *step] < min) min= src[ x   *step];
-				}
-			}
-			if(max-min < 2*QP){
-				const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
-				const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
-				
-				int sums[10];
-				sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
-				sums[1] = sums[0] - first       + src[3*step];
-				sums[2] = sums[1] - first       + src[4*step];
-				sums[3] = sums[2] - first       + src[5*step];
-				sums[4] = sums[3] - first       + src[6*step];
-				sums[5] = sums[4] - src[0*step] + src[7*step];
-				sums[6] = sums[5] - src[1*step] + last;
-				sums[7] = sums[6] - src[2*step] + last;
-				sums[8] = sums[7] - src[3*step] + last;
-				sums[9] = sums[8] - src[4*step] + last;
-
-				src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
-				src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
-				src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
-				src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
-				src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
-				src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
-				src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
-				src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
-			}
-		}else{
-			const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
-
-			if(ABS(middleEnergy) < 8*QP)
-			{
-				const int q=(src[3*step] - src[4*step])/2;
-				const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
-				const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
-
-				int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
-				d= MAX(d, 0);
-	
-				d= (5*d + 32) >> 6;
-				d*= SIGN(-middleEnergy);
-	
-				if(q>0)
-				{
-					d= d<0 ? 0 : d;
-					d= d>q ? q : d;
-				}
-				else
-				{
-					d= d>0 ? 0 : d;
-					d= d<q ? q : d;
-				}
-	
-				src[3*step]-= d;
-				src[4*step]+= d;
-			}
-		}
-
-		src += stride;
-	}
+        src+= step*4; // src points to begin of the 8x8 Block
+        for(y=0; y<8; y++){
+                int numEq= 0;
+
+                if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
+                if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
+                if(numEq > c->ppMode.flatnessThreshold){
+                        int min, max, x;
+
+                        if(src[0] > src[step]){
+                            max= src[0];
+                            min= src[step];
+                        }else{
+                            max= src[step];
+                            min= src[0];
+                        }
+                        for(x=2; x<8; x+=2){
+                                if(src[x*step] > src[(x+1)*step]){
+                                        if(src[x    *step] > max) max= src[ x   *step];
+                                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
+                                }else{
+                                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
+                                        if(src[ x   *step] < min) min= src[ x   *step];
+                                }
+                        }
+                        if(max-min < 2*QP){
+                                const int first= ABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
+                                const int last= ABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
+
+                                int sums[10];
+                                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
+                                sums[1] = sums[0] - first       + src[3*step];
+                                sums[2] = sums[1] - first       + src[4*step];
+                                sums[3] = sums[2] - first       + src[5*step];
+                                sums[4] = sums[3] - first       + src[6*step];
+                                sums[5] = sums[4] - src[0*step] + src[7*step];
+                                sums[6] = sums[5] - src[1*step] + last;
+                                sums[7] = sums[6] - src[2*step] + last;
+                                sums[8] = sums[7] - src[3*step] + last;
+                                sums[9] = sums[8] - src[4*step] + last;
+
+                                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
+                                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
+                                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
+                                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
+                                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
+                                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
+                                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
+                                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
+                        }
+                }else{
+                        const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
+
+                        if(ABS(middleEnergy) < 8*QP)
+                        {
+                                const int q=(src[3*step] - src[4*step])/2;
+                                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
+                                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
+
+                                int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
+                                d= MAX(d, 0);
+
+                                d= (5*d + 32) >> 6;
+                                d*= SIGN(-middleEnergy);
+
+                                if(q>0)
+                                {
+                                        d= d<0 ? 0 : d;
+                                        d= d>q ? q : d;
+                                }
+                                else
+                                {
+                                        d= d>0 ? 0 : d;
+                                        d= d<q ? q : d;
+                                }
+
+                                src[3*step]-= d;
+                                src[4*step]+= d;
+                        }
+                }
+
+                src += stride;
+        }
 /*if(step==16){
     STOP_TIMER("step16")
 }else{
@@ -668,290 +668,290 @@ static always_inline void do_a_deblock_C(uint8_t *src, int step, int stride, PPC
 // minor note: the HAVE_xyz is messed up after that line so dont use it
 
 static inline void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
-	QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
+        QP_STORE_T QPs[], int QPStride, int isColor, pp_mode_t *vm, pp_context_t *vc)
 {
-	PPContext *c= (PPContext *)vc;
-	PPMode *ppMode= (PPMode *)vm;
-	c->ppMode= *ppMode; //FIXME
+        PPContext *c= (PPContext *)vc;
+        PPMode *ppMode= (PPMode *)vm;
+        c->ppMode= *ppMode; //FIXME
 
-	// useing ifs here as they are faster than function pointers allthough the
-	// difference wouldnt be messureable here but its much better because
-	// someone might exchange the cpu whithout restarting mplayer ;)
+        // useing ifs here as they are faster than function pointers allthough the
+        // difference wouldnt be messureable here but its much better because
+        // someone might exchange the cpu whithout restarting mplayer ;)
 #ifdef RUNTIME_CPUDETECT
 #if defined(ARCH_X86) || defined(ARCH_X86_64)
-	// ordered per speed fasterst first
-	if(c->cpuCaps & PP_CPU_CAPS_MMX2)
-		postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-	else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
-		postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-	else if(c->cpuCaps & PP_CPU_CAPS_MMX)
-		postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
-	else
-		postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
+        // ordered per speed fasterst first
+        if(c->cpuCaps & PP_CPU_CAPS_MMX2)
+                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
+        else if(c->cpuCaps & PP_CPU_CAPS_3DNOW)
+                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
+        else if(c->cpuCaps & PP_CPU_CAPS_MMX)
+                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
+        else
+                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 #else
 #ifdef ARCH_POWERPC
 #ifdef HAVE_ALTIVEC
         if(c->cpuCaps & PP_CPU_CAPS_ALTIVEC)
-		postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
+                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
         else
 #endif
 #endif
-		postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
+                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 #endif
 #else //RUNTIME_CPUDETECT
 #ifdef HAVE_MMX2
-		postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
+                postProcess_MMX2(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 #elif defined (HAVE_3DNOW)
-		postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
+                postProcess_3DNow(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 #elif defined (HAVE_MMX)
-		postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
+                postProcess_MMX(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 #elif defined (HAVE_ALTIVEC)
-		postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
+                postProcess_altivec(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 #else
-		postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
+                postProcess_C(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
 #endif
 #endif //!RUNTIME_CPUDETECT
 }
 
 //static void postProcess(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
-//	QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
+//        QP_STORE_T QPs[], int QPStride, int isColor, struct PPMode *ppMode);
 
 /* -pp Command line Help
 */
 char *pp_help=
 "Available postprocessing filters:\n"
-"Filters			Options\n"
-"short	long name	short	long option	Description\n"
-"*	*		a	autoq		CPU power dependent enabler\n"
-"			c	chrom		chrominance filtering enabled\n"
-"			y	nochrom		chrominance filtering disabled\n"
-"			n	noluma		luma filtering disabled\n"
-"hb	hdeblock	(2 threshold)		horizontal deblocking filter\n"
-"	1. difference factor: default=32, higher -> more deblocking\n"
-"	2. flatness threshold: default=39, lower -> more deblocking\n"
-"			the h & v deblocking filters share these\n"
-"			so you can't set different thresholds for h / v\n"
-"vb	vdeblock	(2 threshold)		vertical deblocking filter\n"
-"ha	hadeblock	(2 threshold)		horizontal deblocking filter\n"
-"va	vadeblock	(2 threshold)		vertical deblocking filter\n"
-"h1	x1hdeblock				experimental h deblock filter 1\n"
-"v1	x1vdeblock				experimental v deblock filter 1\n"
-"dr	dering					deringing filter\n"
-"al	autolevels				automatic brightness / contrast\n"
-"			f	fullyrange	stretch luminance to (0..255)\n"
-"lb	linblenddeint				linear blend deinterlacer\n"
-"li	linipoldeint				linear interpolating deinterlace\n"
-"ci	cubicipoldeint				cubic interpolating deinterlacer\n"
-"md	mediandeint				median deinterlacer\n"
-"fd	ffmpegdeint				ffmpeg deinterlacer\n"
-"l5	lowpass5				FIR lowpass deinterlacer\n"
-"de	default					hb:a,vb:a,dr:a\n"
-"fa	fast					h1:a,v1:a,dr:a\n"
-"ac						ha:a:128:7,va:a,dr:a\n"
-"tn	tmpnoise	(3 threshold)		temporal noise reducer\n"
-"			1. <= 2. <= 3.		larger -> stronger filtering\n"
-"fq	forceQuant	<quantizer>		force quantizer\n"
+"Filters                        Options\n"
+"short  long name       short   long option     Description\n"
+"*      *               a       autoq           CPU power dependent enabler\n"
+"                       c       chrom           chrominance filtering enabled\n"
+"                       y       nochrom         chrominance filtering disabled\n"
+"                       n       noluma          luma filtering disabled\n"
+"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
+"       1. difference factor: default=32, higher -> more deblocking\n"
+"       2. flatness threshold: default=39, lower -> more deblocking\n"
+"                       the h & v deblocking filters share these\n"
+"                       so you can't set different thresholds for h / v\n"
+"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
+"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
+"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
+"h1     x1hdeblock                              experimental h deblock filter 1\n"
+"v1     x1vdeblock                              experimental v deblock filter 1\n"
+"dr     dering                                  deringing filter\n"
+"al     autolevels                              automatic brightness / contrast\n"
+"                       f        fullyrange     stretch luminance to (0..255)\n"
+"lb     linblenddeint                           linear blend deinterlacer\n"
+"li     linipoldeint                            linear interpolating deinterlace\n"
+"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
+"md     mediandeint                             median deinterlacer\n"
+"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
+"l5     lowpass5                                FIR lowpass deinterlacer\n"
+"de     default                                 hb:a,vb:a,dr:a\n"
+"fa     fast                                    h1:a,v1:a,dr:a\n"
+"ac                                             ha:a:128:7,va:a,dr:a\n"
+"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
+"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
+"fq     forceQuant      <quantizer>             force quantizer\n"
 "Usage:\n"
 "<filterName>[:<option>[:<option>...]][[,|/][-]<filterName>[:<option>...]]...\n"
 "long form example:\n"
-"vdeblock:autoq/hdeblock:autoq/linblenddeint	default,-vdeblock\n"
+"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
 "short form example:\n"
-"vb:a/hb:a/lb					de,-vb\n"
+"vb:a/hb:a/lb                                   de,-vb\n"
 "more examples:\n"
 "tn:64:128:256\n"
 ;
 
 pp_mode_t *pp_get_mode_by_name_and_quality(char *name, int quality)
 {
-	char temp[GET_MODE_BUFFER_SIZE];
-	char *p= temp;
-	char *filterDelimiters= ",/";
-	char *optionDelimiters= ":";
-	struct PPMode *ppMode;
-	char *filterToken;
-
-	ppMode= memalign(8, sizeof(PPMode));
-	
-	ppMode->lumMode= 0;
-	ppMode->chromMode= 0;
-	ppMode->maxTmpNoise[0]= 700;
-	ppMode->maxTmpNoise[1]= 1500;
-	ppMode->maxTmpNoise[2]= 3000;
-	ppMode->maxAllowedY= 234;
-	ppMode->minAllowedY= 16;
-	ppMode->baseDcDiff= 256/8;
-	ppMode->flatnessThreshold= 56-16-1;
-	ppMode->maxClippedThreshold= 0.01;
-	ppMode->error=0;
-
-	strncpy(temp, name, GET_MODE_BUFFER_SIZE);
-
-	if(verbose>1) printf("pp: %s\n", name);
-
-	for(;;){
-		char *filterName;
-		int q= 1000000; //PP_QUALITY_MAX;
-		int chrom=-1;
-		int luma=-1;
-		char *option;
-		char *options[OPTIONS_ARRAY_SIZE];
-		int i;
-		int filterNameOk=0;
-		int numOfUnknownOptions=0;
-		int enable=1; //does the user want us to enabled or disabled the filter
-
-		filterToken= strtok(p, filterDelimiters);
-		if(filterToken == NULL) break;
-		p+= strlen(filterToken) + 1; // p points to next filterToken
-		filterName= strtok(filterToken, optionDelimiters);
-		if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
-
-		if(*filterName == '-')
-		{
-			enable=0;
-			filterName++;
-		}
-
-		for(;;){ //for all options
-			option= strtok(NULL, optionDelimiters);
-			if(option == NULL) break;
-
-			if(verbose>1) printf("pp: option: %s\n", option);
-			if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
-			else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
-			else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
-			else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
-			else
-			{
-				options[numOfUnknownOptions] = option;
-				numOfUnknownOptions++;
-			}
-			if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
-		}
-		options[numOfUnknownOptions] = NULL;
-
-		/* replace stuff from the replace Table */
-		for(i=0; replaceTable[2*i]!=NULL; i++)
-		{
-			if(!strcmp(replaceTable[2*i], filterName))
-			{
-				int newlen= strlen(replaceTable[2*i + 1]);
-				int plen;
-				int spaceLeft;
-
-				if(p==NULL) p= temp, *p=0; 	//last filter
-				else p--, *p=',';		//not last filter
-
-				plen= strlen(p);
-				spaceLeft= p - temp + plen;
-				if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
-				{
-					ppMode->error++;
-					break;
-				}
-				memmove(p + newlen, p, plen+1);
-				memcpy(p, replaceTable[2*i + 1], newlen);
-				filterNameOk=1;
-			}
-		}
-
-		for(i=0; filters[i].shortName!=NULL; i++)
-		{
-//			printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
-			if(   !strcmp(filters[i].longName, filterName)
-			   || !strcmp(filters[i].shortName, filterName))
-			{
-				ppMode->lumMode &= ~filters[i].mask;
-				ppMode->chromMode &= ~filters[i].mask;
-
-				filterNameOk=1;
-				if(!enable) break; // user wants to disable it
-
-				if(q >= filters[i].minLumQuality && luma)
-					ppMode->lumMode|= filters[i].mask;
-				if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
-					if(q >= filters[i].minChromQuality)
-						ppMode->chromMode|= filters[i].mask;
-
-				if(filters[i].mask == LEVEL_FIX)
-				{
-					int o;
-					ppMode->minAllowedY= 16;
-					ppMode->maxAllowedY= 234;
-					for(o=0; options[o]!=NULL; o++)
-					{
-						if(  !strcmp(options[o],"fullyrange")
-						   ||!strcmp(options[o],"f"))
-						{
-							ppMode->minAllowedY= 0;
-							ppMode->maxAllowedY= 255;
-							numOfUnknownOptions--;
-						}
-					}
-				}
-				else if(filters[i].mask == TEMP_NOISE_FILTER)
-				{
-					int o;
-					int numOfNoises=0;
-
-					for(o=0; options[o]!=NULL; o++)
-					{
-						char *tail;
-						ppMode->maxTmpNoise[numOfNoises]=
-							strtol(options[o], &tail, 0);
-						if(tail!=options[o])
-						{
-							numOfNoises++;
-							numOfUnknownOptions--;
-							if(numOfNoises >= 3) break;
-						}
-					}
-				}
-				else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK 
-				     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
-				{
-					int o;
-
-					for(o=0; options[o]!=NULL && o<2; o++)
-					{
-						char *tail;
-						int val= strtol(options[o], &tail, 0);
-						if(tail==options[o]) break;
-
-						numOfUnknownOptions--;
-						if(o==0) ppMode->baseDcDiff= val;
-						else ppMode->flatnessThreshold= val;
-					}
-				}
-				else if(filters[i].mask == FORCE_QUANT)
-				{
-					int o;
-					ppMode->forcedQuant= 15;
-
-					for(o=0; options[o]!=NULL && o<1; o++)
-					{
-						char *tail;
-						int val= strtol(options[o], &tail, 0);
-						if(tail==options[o]) break;
-
-						numOfUnknownOptions--;
-						ppMode->forcedQuant= val;
-					}
-				}
-			}
-		}
-		if(!filterNameOk) ppMode->error++;
-		ppMode->error += numOfUnknownOptions;
-	}
-
-	if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
-	if(ppMode->error)
-	{
-		fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
-		free(ppMode);
-		return NULL;
-	}
-	return ppMode;
+        char temp[GET_MODE_BUFFER_SIZE];
+        char *p= temp;
+        char *filterDelimiters= ",/";
+        char *optionDelimiters= ":";
+        struct PPMode *ppMode;
+        char *filterToken;
+
+        ppMode= memalign(8, sizeof(PPMode));
+
+        ppMode->lumMode= 0;
+        ppMode->chromMode= 0;
+        ppMode->maxTmpNoise[0]= 700;
+        ppMode->maxTmpNoise[1]= 1500;
+        ppMode->maxTmpNoise[2]= 3000;
+        ppMode->maxAllowedY= 234;
+        ppMode->minAllowedY= 16;
+        ppMode->baseDcDiff= 256/8;
+        ppMode->flatnessThreshold= 56-16-1;
+        ppMode->maxClippedThreshold= 0.01;
+        ppMode->error=0;
+
+        strncpy(temp, name, GET_MODE_BUFFER_SIZE);
+
+        if(verbose>1) printf("pp: %s\n", name);
+
+        for(;;){
+                char *filterName;
+                int q= 1000000; //PP_QUALITY_MAX;
+                int chrom=-1;
+                int luma=-1;
+                char *option;
+                char *options[OPTIONS_ARRAY_SIZE];
+                int i;
+                int filterNameOk=0;
+                int numOfUnknownOptions=0;
+                int enable=1; //does the user want us to enabled or disabled the filter
+
+                filterToken= strtok(p, filterDelimiters);
+                if(filterToken == NULL) break;
+                p+= strlen(filterToken) + 1; // p points to next filterToken
+                filterName= strtok(filterToken, optionDelimiters);
+                if(verbose>1) printf("pp: %s::%s\n", filterToken, filterName);
+
+                if(*filterName == '-')
+                {
+                        enable=0;
+                        filterName++;
+                }
+
+                for(;;){ //for all options
+                        option= strtok(NULL, optionDelimiters);
+                        if(option == NULL) break;
+
+                        if(verbose>1) printf("pp: option: %s\n", option);
+                        if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
+                        else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
+                        else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
+                        else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
+                        else
+                        {
+                                options[numOfUnknownOptions] = option;
+                                numOfUnknownOptions++;
+                        }
+                        if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
+                }
+                options[numOfUnknownOptions] = NULL;
+
+                /* replace stuff from the replace Table */
+                for(i=0; replaceTable[2*i]!=NULL; i++)
+                {
+                        if(!strcmp(replaceTable[2*i], filterName))
+                        {
+                                int newlen= strlen(replaceTable[2*i + 1]);
+                                int plen;
+                                int spaceLeft;
+
+                                if(p==NULL) p= temp, *p=0;      //last filter
+                                else p--, *p=',';               //not last filter
+
+                                plen= strlen(p);
+                                spaceLeft= p - temp + plen;
+                                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE)
+                                {
+                                        ppMode->error++;
+                                        break;
+                                }
+                                memmove(p + newlen, p, plen+1);
+                                memcpy(p, replaceTable[2*i + 1], newlen);
+                                filterNameOk=1;
+                        }
+                }
+
+                for(i=0; filters[i].shortName!=NULL; i++)
+                {
+//                        printf("Compareing %s, %s, %s\n", filters[i].shortName,filters[i].longName, filterName);
+                        if(   !strcmp(filters[i].longName, filterName)
+                           || !strcmp(filters[i].shortName, filterName))
+                        {
+                                ppMode->lumMode &= ~filters[i].mask;
+                                ppMode->chromMode &= ~filters[i].mask;
+
+                                filterNameOk=1;
+                                if(!enable) break; // user wants to disable it
+
+                                if(q >= filters[i].minLumQuality && luma)
+                                        ppMode->lumMode|= filters[i].mask;
+                                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
+                                        if(q >= filters[i].minChromQuality)
+                                                ppMode->chromMode|= filters[i].mask;
+
+                                if(filters[i].mask == LEVEL_FIX)
+                                {
+                                        int o;
+                                        ppMode->minAllowedY= 16;
+                                        ppMode->maxAllowedY= 234;
+                                        for(o=0; options[o]!=NULL; o++)
+                                        {
+                                                if(  !strcmp(options[o],"fullyrange")
+                                                   ||!strcmp(options[o],"f"))
+                                                {
+                                                        ppMode->minAllowedY= 0;
+                                                        ppMode->maxAllowedY= 255;
+                                                        numOfUnknownOptions--;
+                                                }
+                                        }
+                                }
+                                else if(filters[i].mask == TEMP_NOISE_FILTER)
+                                {
+                                        int o;
+                                        int numOfNoises=0;
+
+                                        for(o=0; options[o]!=NULL; o++)
+                                        {
+                                                char *tail;
+                                                ppMode->maxTmpNoise[numOfNoises]=
+                                                        strtol(options[o], &tail, 0);
+                                                if(tail!=options[o])
+                                                {
+                                                        numOfNoises++;
+                                                        numOfUnknownOptions--;
+                                                        if(numOfNoises >= 3) break;
+                                                }
+                                        }
+                                }
+                                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
+                                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK)
+                                {
+                                        int o;
+
+                                        for(o=0; options[o]!=NULL && o<2; o++)
+                                        {
+                                                char *tail;
+                                                int val= strtol(options[o], &tail, 0);
+                                                if(tail==options[o]) break;
+
+                                                numOfUnknownOptions--;
+                                                if(o==0) ppMode->baseDcDiff= val;
+                                                else ppMode->flatnessThreshold= val;
+                                        }
+                                }
+                                else if(filters[i].mask == FORCE_QUANT)
+                                {
+                                        int o;
+                                        ppMode->forcedQuant= 15;
+
+                                        for(o=0; options[o]!=NULL && o<1; o++)
+                                        {
+                                                char *tail;
+                                                int val= strtol(options[o], &tail, 0);
+                                                if(tail==options[o]) break;
+
+                                                numOfUnknownOptions--;
+                                                ppMode->forcedQuant= val;
+                                        }
+                                }
+                        }
+                }
+                if(!filterNameOk) ppMode->error++;
+                ppMode->error += numOfUnknownOptions;
+        }
+
+        if(verbose>1) printf("pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
+        if(ppMode->error)
+        {
+                fprintf(stderr, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
+                free(ppMode);
+                return NULL;
+        }
+        return ppMode;
 }
 
 void pp_free_mode(pp_mode_t *mode){
@@ -959,199 +959,199 @@ void pp_free_mode(pp_mode_t *mode){
 }
 
 static void reallocAlign(void **p, int alignment, int size){
-	if(*p) free(*p);
-	*p= memalign(alignment, size);
-	memset(*p, 0, size);
+        if(*p) free(*p);
+        *p= memalign(alignment, size);
+        memset(*p, 0, size);
 }
 
 static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
-	int mbWidth = (width+15)>>4;
-	int mbHeight= (height+15)>>4;
-	int i;
-
-	c->stride= stride;
-	c->qpStride= qpStride;
-
-	reallocAlign((void **)&c->tempDst, 8, stride*24);
-	reallocAlign((void **)&c->tempSrc, 8, stride*24);
-	reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
-	reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
-	for(i=0; i<256; i++)
-		c->yHistogram[i]= width*height/64*15/256;
-
-	for(i=0; i<3; i++)
-	{
-		//Note:the +17*1024 is just there so i dont have to worry about r/w over te end
-		reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
-		reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
-	}
-
-	reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
-	reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
-	reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
-	reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
+        int mbWidth = (width+15)>>4;
+        int mbHeight= (height+15)>>4;
+        int i;
+
+        c->stride= stride;
+        c->qpStride= qpStride;
+
+        reallocAlign((void **)&c->tempDst, 8, stride*24);
+        reallocAlign((void **)&c->tempSrc, 8, stride*24);
+        reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
+        reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
+        for(i=0; i<256; i++)
+                c->yHistogram[i]= width*height/64*15/256;
+
+        for(i=0; i<3; i++)
+        {
+                //Note:the +17*1024 is just there so i dont have to worry about r/w over te end
+                reallocAlign((void **)&c->tempBlured[i], 8, stride*mbHeight*16 + 17*1024);
+                reallocAlign((void **)&c->tempBluredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
+        }
+
+        reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
+        reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
+        reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
+        reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
 }
 
 static void global_init(void){
-	int i;
-	memset(clip_table, 0, 256);
-	for(i=256; i<512; i++)
-		clip_table[i]= i;
-	memset(clip_table+512, 0, 256);
+        int i;
+        memset(clip_table, 0, 256);
+        for(i=256; i<512; i++)
+                clip_table[i]= i;
+        memset(clip_table+512, 0, 256);
 }
 
 pp_context_t *pp_get_context(int width, int height, int cpuCaps){
-	PPContext *c= memalign(32, sizeof(PPContext));
-	int stride= (width+15)&(~15); //assumed / will realloc if needed
-	int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
-        
-	global_init();
-
-	memset(c, 0, sizeof(PPContext));
-	c->cpuCaps= cpuCaps;
-	if(cpuCaps&PP_FORMAT){
-		c->hChromaSubSample= cpuCaps&0x3;
-		c->vChromaSubSample= (cpuCaps>>4)&0x3;
-	}else{
-		c->hChromaSubSample= 1;
-		c->vChromaSubSample= 1;
-	}
-
-	reallocBuffers(c, width, height, stride, qpStride);
-        
-	c->frameNum=-1;
-
-	return c;
+        PPContext *c= memalign(32, sizeof(PPContext));
+        int stride= (width+15)&(~15);    //assumed / will realloc if needed
+        int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
+
+        global_init();
+
+        memset(c, 0, sizeof(PPContext));
+        c->cpuCaps= cpuCaps;
+        if(cpuCaps&PP_FORMAT){
+                c->hChromaSubSample= cpuCaps&0x3;
+                c->vChromaSubSample= (cpuCaps>>4)&0x3;
+        }else{
+                c->hChromaSubSample= 1;
+                c->vChromaSubSample= 1;
+        }
+
+        reallocBuffers(c, width, height, stride, qpStride);
+
+        c->frameNum=-1;
+
+        return c;
 }
 
 void pp_free_context(void *vc){
-	PPContext *c = (PPContext*)vc;
-	int i;
-	
-	for(i=0; i<3; i++) free(c->tempBlured[i]);
-	for(i=0; i<3; i++) free(c->tempBluredPast[i]);
-	
-	free(c->tempBlocks);
-	free(c->yHistogram);
-	free(c->tempDst);
-	free(c->tempSrc);
-	free(c->deintTemp);
-	free(c->stdQPTable);
-	free(c->nonBQPTable);
-	free(c->forcedQPTable);
-        
-	memset(c, 0, sizeof(PPContext));
-
-	free(c);
+        PPContext *c = (PPContext*)vc;
+        int i;
+
+        for(i=0; i<3; i++) free(c->tempBlured[i]);
+        for(i=0; i<3; i++) free(c->tempBluredPast[i]);
+
+        free(c->tempBlocks);
+        free(c->yHistogram);
+        free(c->tempDst);
+        free(c->tempSrc);
+        free(c->deintTemp);
+        free(c->stdQPTable);
+        free(c->nonBQPTable);
+        free(c->forcedQPTable);
+
+        memset(c, 0, sizeof(PPContext));
+
+        free(c);
 }
 
 void  pp_postprocess(uint8_t * src[3], int srcStride[3],
                  uint8_t * dst[3], int dstStride[3],
                  int width, int height,
                  QP_STORE_T *QP_store,  int QPStride,
-		 pp_mode_t *vm,  void *vc, int pict_type)
+                 pp_mode_t *vm,  void *vc, int pict_type)
 {
-	int mbWidth = (width+15)>>4;
-	int mbHeight= (height+15)>>4;
-	PPMode *mode = (PPMode*)vm;
-	PPContext *c = (PPContext*)vc;
-	int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0]));
-	int absQPStride = ABS(QPStride);
-
-	// c->stride and c->QPStride are always positive
-	if(c->stride < minStride || c->qpStride < absQPStride)
-		reallocBuffers(c, width, height, 
-				MAX(minStride, c->stride), 
-				MAX(c->qpStride, absQPStride));
-
-	if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)) 
-	{
-		int i;
-		QP_store= c->forcedQPTable;
-		absQPStride = QPStride = 0;
-		if(mode->lumMode & FORCE_QUANT)
-			for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
-		else
-			for(i=0; i<mbWidth; i++) QP_store[i]= 1;
-	}
+        int mbWidth = (width+15)>>4;
+        int mbHeight= (height+15)>>4;
+        PPMode *mode = (PPMode*)vm;
+        PPContext *c = (PPContext*)vc;
+        int minStride= MAX(ABS(srcStride[0]), ABS(dstStride[0]));
+        int absQPStride = ABS(QPStride);
+
+        // c->stride and c->QPStride are always positive
+        if(c->stride < minStride || c->qpStride < absQPStride)
+                reallocBuffers(c, width, height,
+                                MAX(minStride, c->stride),
+                                MAX(c->qpStride, absQPStride));
+
+        if(QP_store==NULL || (mode->lumMode & FORCE_QUANT))
+        {
+                int i;
+                QP_store= c->forcedQPTable;
+                absQPStride = QPStride = 0;
+                if(mode->lumMode & FORCE_QUANT)
+                        for(i=0; i<mbWidth; i++) QP_store[i]= mode->forcedQuant;
+                else
+                        for(i=0; i<mbWidth; i++) QP_store[i]= 1;
+        }
 //printf("pict_type:%d\n", pict_type);
 
-	if(pict_type & PP_PICT_TYPE_QP2){
-		int i;
-		const int count= mbHeight * absQPStride;
-		for(i=0; i<(count>>2); i++){
-			((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
-		}
-		for(i<<=2; i<count; i++){
-			c->stdQPTable[i] = QP_store[i]>>1;
-		}
+        if(pict_type & PP_PICT_TYPE_QP2){
+                int i;
+                const int count= mbHeight * absQPStride;
+                for(i=0; i<(count>>2); i++){
+                        ((uint32_t*)c->stdQPTable)[i] = (((uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
+                }
+                for(i<<=2; i<count; i++){
+                        c->stdQPTable[i] = QP_store[i]>>1;
+                }
                 QP_store= c->stdQPTable;
-		QPStride= absQPStride;		
-	}
+                QPStride= absQPStride;
+        }
 
 if(0){
 int x,y;
 for(y=0; y<mbHeight; y++){
-	for(x=0; x<mbWidth; x++){
-		printf("%2d ", QP_store[x + y*QPStride]);
-	}
-	printf("\n");
+        for(x=0; x<mbWidth; x++){
+                printf("%2d ", QP_store[x + y*QPStride]);
+        }
+        printf("\n");
 }
-	printf("\n");
+        printf("\n");
 }
 
-	if((pict_type&7)!=3)
-	{
-		if (QPStride >= 0) {
-			int i;
-			const int count= mbHeight * QPStride;
-			for(i=0; i<(count>>2); i++){
-				((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
-			}
-			for(i<<=2; i<count; i++){
-				c->nonBQPTable[i] = QP_store[i] & 0x3F;
-			}
-		} else {
-			int i,j;
-			for(i=0; i<mbHeight; i++) {
-		    		for(j=0; j<absQPStride; j++) {
-					c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
-				}
-			}
-		}
-	}
-
-	if(verbose>2)
-	{
-		printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
-	}
-
-	postProcess(src[0], srcStride[0], dst[0], dstStride[0],
-		width, height, QP_store, QPStride, 0, mode, c);
-
-	width  = (width )>>c->hChromaSubSample;
-	height = (height)>>c->vChromaSubSample;
-
-	if(mode->chromMode)
-	{
-		postProcess(src[1], srcStride[1], dst[1], dstStride[1],
-			width, height, QP_store, QPStride, 1, mode, c);
-		postProcess(src[2], srcStride[2], dst[2], dstStride[2],
-			width, height, QP_store, QPStride, 2, mode, c);
-	}
-	else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
-	{
-		linecpy(dst[1], src[1], height, srcStride[1]);
-		linecpy(dst[2], src[2], height, srcStride[2]);
-	}
-	else
-	{
-		int y;
-		for(y=0; y<height; y++)
-		{
-			memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
-			memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
-		}
-	}
+        if((pict_type&7)!=3)
+        {
+                if (QPStride >= 0) {
+                        int i;
+                        const int count= mbHeight * QPStride;
+                        for(i=0; i<(count>>2); i++){
+                                ((uint32_t*)c->nonBQPTable)[i] = ((uint32_t*)QP_store)[i] & 0x3F3F3F3F;
+                        }
+                        for(i<<=2; i<count; i++){
+                                c->nonBQPTable[i] = QP_store[i] & 0x3F;
+                        }
+                } else {
+                        int i,j;
+                        for(i=0; i<mbHeight; i++) {
+                                    for(j=0; j<absQPStride; j++) {
+                                        c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
+                                }
+                        }
+                }
+        }
+
+        if(verbose>2)
+        {
+                printf("using npp filters 0x%X/0x%X\n", mode->lumMode, mode->chromMode);
+        }
+
+        postProcess(src[0], srcStride[0], dst[0], dstStride[0],
+                width, height, QP_store, QPStride, 0, mode, c);
+
+        width  = (width )>>c->hChromaSubSample;
+        height = (height)>>c->vChromaSubSample;
+
+        if(mode->chromMode)
+        {
+                postProcess(src[1], srcStride[1], dst[1], dstStride[1],
+                        width, height, QP_store, QPStride, 1, mode, c);
+                postProcess(src[2], srcStride[2], dst[2], dstStride[2],
+                        width, height, QP_store, QPStride, 2, mode, c);
+        }
+        else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2])
+        {
+                linecpy(dst[1], src[1], height, srcStride[1]);
+                linecpy(dst[2], src[2], height, srcStride[2]);
+        }
+        else
+        {
+                int y;
+                for(y=0; y<height; y++)
+                {
+                        memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
+                        memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
+                }
+        }
 }
 
diff --git a/src/libffmpeg/libavcodec/libpostproc/postprocess.h b/src/libffmpeg/libavcodec/libpostproc/postprocess.h
index b5d4fa319..114c88a38 100644
--- a/src/libffmpeg/libavcodec/libpostproc/postprocess.h
+++ b/src/libffmpeg/libavcodec/libpostproc/postprocess.h
@@ -13,7 +13,7 @@
 
     You should have received a copy of the GNU General Public License
     along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 
 #ifndef NEWPOSTPROCESS_H
@@ -21,7 +21,7 @@
 
 /**
  * @file postprocess.h
- * @brief 
+ * @brief
  *     external api for the pp stuff
  */
 
@@ -29,6 +29,12 @@
 extern "C" {
 #endif
 
+#define LIBPOSTPROC_VERSION_INT ((51<<16)+(1<<8)+0)
+#define LIBPOSTPROC_VERSION     51.1.0
+#define LIBPOSTPROC_BUILD       LIBPOSTPROC_VERSION_INT
+
+#define LIBPOSTPROC_IDENT       "postproc" AV_STRINGIFY(LIBPOSTPROC_VERSION)
+
 #define PP_QUALITY_MAX 6
 
 #define QP_STORE_T int8_t
@@ -42,7 +48,7 @@ void  pp_postprocess(uint8_t * src[3], int srcStride[3],
                  uint8_t * dst[3], int dstStride[3],
                  int horizontalSize, int verticalSize,
                  QP_STORE_T *QP_store,  int QP_stride,
-		 pp_mode_t *mode, pp_context_t *ppContext, int pict_type);
+                 pp_mode_t *mode, pp_context_t *ppContext, int pict_type);
 
 
 /**
diff --git a/src/libffmpeg/libavcodec/libpostproc/postprocess_altivec_template.c b/src/libffmpeg/libavcodec/libpostproc/postprocess_altivec_template.c
index 1c59b9465..38adeb32d 100644
--- a/src/libffmpeg/libavcodec/libpostproc/postprocess_altivec_template.c
+++ b/src/libffmpeg/libavcodec/libpostproc/postprocess_altivec_template.c
@@ -15,7 +15,7 @@
 
     You should have received a copy of the GNU General Public License
     along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 
 
@@ -26,35 +26,35 @@
 #endif
 
 #define ALTIVEC_TRANSPOSE_8x8_SHORT(src_a,src_b,src_c,src_d,src_e,src_f,src_g,src_h) \
-  do {									\
-    __typeof__(src_a) tempA1, tempB1, tempC1, tempD1;			\
-    __typeof__(src_a) tempE1, tempF1, tempG1, tempH1;			\
-    __typeof__(src_a) tempA2, tempB2, tempC2, tempD2;			\
-    __typeof__(src_a) tempE2, tempF2, tempG2, tempH2;			\
-    tempA1 = vec_mergeh (src_a, src_e);					\
-    tempB1 = vec_mergel (src_a, src_e);					\
-    tempC1 = vec_mergeh (src_b, src_f);					\
-    tempD1 = vec_mergel (src_b, src_f);					\
-    tempE1 = vec_mergeh (src_c, src_g);					\
-    tempF1 = vec_mergel (src_c, src_g);					\
-    tempG1 = vec_mergeh (src_d, src_h);					\
-    tempH1 = vec_mergel (src_d, src_h);					\
-    tempA2 = vec_mergeh (tempA1, tempE1);				\
-    tempB2 = vec_mergel (tempA1, tempE1);				\
-    tempC2 = vec_mergeh (tempB1, tempF1);				\
-    tempD2 = vec_mergel (tempB1, tempF1);				\
-    tempE2 = vec_mergeh (tempC1, tempG1);				\
-    tempF2 = vec_mergel (tempC1, tempG1);				\
-    tempG2 = vec_mergeh (tempD1, tempH1);				\
-    tempH2 = vec_mergel (tempD1, tempH1);				\
-    src_a = vec_mergeh (tempA2, tempE2);				\
-    src_b = vec_mergel (tempA2, tempE2);				\
-    src_c = vec_mergeh (tempB2, tempF2);				\
-    src_d = vec_mergel (tempB2, tempF2);				\
-    src_e = vec_mergeh (tempC2, tempG2);				\
-    src_f = vec_mergel (tempC2, tempG2);				\
-    src_g = vec_mergeh (tempD2, tempH2);				\
-    src_h = vec_mergel (tempD2, tempH2);				\
+  do {                                                                  \
+    __typeof__(src_a) tempA1, tempB1, tempC1, tempD1;                   \
+    __typeof__(src_a) tempE1, tempF1, tempG1, tempH1;                   \
+    __typeof__(src_a) tempA2, tempB2, tempC2, tempD2;                   \
+    __typeof__(src_a) tempE2, tempF2, tempG2, tempH2;                   \
+    tempA1 = vec_mergeh (src_a, src_e);                                 \
+    tempB1 = vec_mergel (src_a, src_e);                                 \
+    tempC1 = vec_mergeh (src_b, src_f);                                 \
+    tempD1 = vec_mergel (src_b, src_f);                                 \
+    tempE1 = vec_mergeh (src_c, src_g);                                 \
+    tempF1 = vec_mergel (src_c, src_g);                                 \
+    tempG1 = vec_mergeh (src_d, src_h);                                 \
+    tempH1 = vec_mergel (src_d, src_h);                                 \
+    tempA2 = vec_mergeh (tempA1, tempE1);                               \
+    tempB2 = vec_mergel (tempA1, tempE1);                               \
+    tempC2 = vec_mergeh (tempB1, tempF1);                               \
+    tempD2 = vec_mergel (tempB1, tempF1);                               \
+    tempE2 = vec_mergeh (tempC1, tempG1);                               \
+    tempF2 = vec_mergel (tempC1, tempG1);                               \
+    tempG2 = vec_mergeh (tempD1, tempH1);                               \
+    tempH2 = vec_mergel (tempD1, tempH1);                               \
+    src_a = vec_mergeh (tempA2, tempE2);                                \
+    src_b = vec_mergel (tempA2, tempE2);                                \
+    src_c = vec_mergeh (tempB2, tempF2);                                \
+    src_d = vec_mergel (tempB2, tempF2);                                \
+    src_e = vec_mergeh (tempC2, tempG2);                                \
+    src_f = vec_mergel (tempC2, tempG2);                                \
+    src_g = vec_mergeh (tempD2, tempH2);                                \
+    src_h = vec_mergel (tempD2, tempH2);                                \
   } while (0)
 
 
@@ -79,7 +79,7 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
   const vector signed int zero = vec_splat_s32(0);
   const vector signed short mask = vec_splat_s16(1);
   vector signed int v_numEq = vec_splat_s32(0);
-	
+
   data[0] = ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
   data[1] = data[0] * 2 + 1;
   data[2] = c->QP * 2;
@@ -94,25 +94,25 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
 
   vector signed short v_srcAss0, v_srcAss1, v_srcAss2, v_srcAss3, v_srcAss4, v_srcAss5, v_srcAss6, v_srcAss7;
 
-#define LOAD_LINE(i)							\
-  register int j##i = i * stride;					\
-  vector unsigned char perm##i = vec_lvsl(j##i, src2);			\
-  const vector unsigned char v_srcA1##i = vec_ld(j##i, src2);		\
-  vector unsigned char v_srcA2##i;					\
-  if (two_vectors)							\
-    v_srcA2##i = vec_ld(j##i + 16, src2);				\
-  const vector unsigned char v_srcA##i =				\
-    vec_perm(v_srcA1##i, v_srcA2##i, perm##i);				\
+#define LOAD_LINE(i)                                                    \
+  register int j##i = i * stride;                                       \
+  vector unsigned char perm##i = vec_lvsl(j##i, src2);                  \
+  const vector unsigned char v_srcA1##i = vec_ld(j##i, src2);           \
+  vector unsigned char v_srcA2##i;                                      \
+  if (two_vectors)                                                      \
+    v_srcA2##i = vec_ld(j##i + 16, src2);                               \
+  const vector unsigned char v_srcA##i =                                \
+    vec_perm(v_srcA1##i, v_srcA2##i, perm##i);                          \
   v_srcAss##i =                                                         \
-    (vector signed short)vec_mergeh((vector signed char)zero,		\
-				    (vector signed char)v_srcA##i)
+    (vector signed short)vec_mergeh((vector signed char)zero,           \
+                                    (vector signed char)v_srcA##i)
 
 #define LOAD_LINE_ALIGNED(i)                                            \
   register int j##i = i * stride;                                       \
   const vector unsigned char v_srcA##i = vec_ld(j##i, src2);            \
   v_srcAss##i =                                                         \
-    (vector signed short)vec_mergeh((vector signed char)zero,		\
-				    (vector signed char)v_srcA##i)
+    (vector signed short)vec_mergeh((vector signed char)zero,           \
+                                    (vector signed char)v_srcA##i)
 
     // special casing the aligned case is worthwhile, as all call from
     // the (transposed) horizontable deblocks will be aligned, i naddition
@@ -139,15 +139,15 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
 #undef LOAD_LINE
 #undef LOAD_LINE_ALIGNED
 
-#define ITER(i, j)							\
-  const vector signed short v_diff##i =					\
-    vec_sub(v_srcAss##i, v_srcAss##j);					\
-  const vector signed short v_sum##i =					\
-    vec_add(v_diff##i, v_dcOffset);					\
-  const vector signed short v_comp##i =					\
-    (vector signed short)vec_cmplt((vector unsigned short)v_sum##i,	\
-				   v_dcThreshold);			\
-  const vector signed short v_part##i = vec_and(mask, v_comp##i);	\
+#define ITER(i, j)                                                      \
+  const vector signed short v_diff##i =                                 \
+    vec_sub(v_srcAss##i, v_srcAss##j);                                  \
+  const vector signed short v_sum##i =                                  \
+    vec_add(v_diff##i, v_dcOffset);                                     \
+  const vector signed short v_comp##i =                                 \
+    (vector signed short)vec_cmplt((vector unsigned short)v_sum##i,     \
+                                   v_dcThreshold);                      \
+  const vector signed short v_part##i = vec_and(mask, v_comp##i);       \
   v_numEq = vec_sum4s(v_part##i, v_numEq);
 
   ITER(0, 1);
@@ -160,21 +160,21 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
 #undef ITER
 
   v_numEq = vec_sums(v_numEq, zero);
-	
+
   v_numEq = vec_splat(v_numEq, 3);
   vec_ste(v_numEq, 0, &numEq);
 
   if (numEq > c->ppMode.flatnessThreshold)
     {
       const vector unsigned char mmoP1 = (const vector unsigned char)
-	AVV(0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
-	    0x00, 0x01, 0x12, 0x13, 0x08, 0x09, 0x1A, 0x1B);
+        AVV(0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f,
+            0x00, 0x01, 0x12, 0x13, 0x08, 0x09, 0x1A, 0x1B);
       const vector unsigned char mmoP2 = (const vector unsigned char)
-	AVV(0x04, 0x05, 0x16, 0x17, 0x0C, 0x0D, 0x1E, 0x1F,
-	    0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f);
+        AVV(0x04, 0x05, 0x16, 0x17, 0x0C, 0x0D, 0x1E, 0x1F,
+            0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f);
       const vector unsigned char mmoP = (const vector unsigned char)
-	vec_lvsl(8, (unsigned char*)0);
-      
+        vec_lvsl(8, (unsigned char*)0);
+
       vector signed short mmoL1 = vec_perm(v_srcAss0, v_srcAss2, mmoP1);
       vector signed short mmoL2 = vec_perm(v_srcAss4, v_srcAss6, mmoP2);
       vector signed short mmoL = vec_perm(mmoL1, mmoL2, mmoP);
@@ -183,13 +183,13 @@ static inline int vertClassify_altivec(uint8_t src[], int stride, PPContext *c)
       vector signed short mmoR = vec_perm(mmoR1, mmoR2, mmoP);
       vector signed short mmoDiff = vec_sub(mmoL, mmoR);
       vector unsigned short mmoSum = (vector unsigned short)vec_add(mmoDiff, v2QP);
-      
+
       if (vec_any_gt(mmoSum, v4QP))
-	return 0;
+        return 0;
       else
-	return 1;
+        return 1;
     }
-  else return 2; 
+  else return 2;
 }
 
 static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c) {
@@ -209,30 +209,30 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
   qp[0] = c->QP;
   vector signed short vqp = vec_ld(0, qp);
   vqp = vec_splat(vqp, 0);
-	
+
   src2 += stride*3;
 
   vector signed short vb0, vb1, vb2, vb3, vb4, vb5, vb6, vb7, vb8, vb9;
   vector unsigned char vbA0, vbA1, vbA2, vbA3, vbA4, vbA5, vbA6, vbA7, vbA8, vbA9;
   vector unsigned char vbB0, vbB1, vbB2, vbB3, vbB4, vbB5, vbB6, vbB7, vbB8, vbB9;
   vector unsigned char vbT0, vbT1, vbT2, vbT3, vbT4, vbT5, vbT6, vbT7, vbT8, vbT9;
-	
+
 #define LOAD_LINE(i)                                                    \
-  const vector unsigned char perml##i =					\
-    vec_lvsl(i * stride, src2);						\
+  const vector unsigned char perml##i =                                 \
+    vec_lvsl(i * stride, src2);                                         \
   vbA##i = vec_ld(i * stride, src2);                                    \
   vbB##i = vec_ld(i * stride + 16, src2);                               \
   vbT##i = vec_perm(vbA##i, vbB##i, perml##i);                          \
   vb##i =                                                               \
-    (vector signed short)vec_mergeh((vector unsigned char)zero,		\
-				    (vector unsigned char)vbT##i)
+    (vector signed short)vec_mergeh((vector unsigned char)zero,         \
+                                    (vector unsigned char)vbT##i)
 
 #define LOAD_LINE_ALIGNED(i)                                            \
   register int j##i = i * stride;                                       \
   vbT##i = vec_ld(j##i, src2);                                          \
   vb##i =                                                               \
-    (vector signed short)vec_mergeh((vector signed char)zero,		\
-				    (vector signed char)vbT##i)
+    (vector signed short)vec_mergeh((vector signed char)zero,           \
+                                    (vector signed char)vbT##i)
 
     // special casing the aligned case is worthwhile, as all call from
     // the (transposed) horizontable deblocks will be aligned, in addition
@@ -275,7 +275,7 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
   const vector unsigned short v_cmp89 =
     (const vector unsigned short) vec_cmplt(vec_abs(v_diff89), vqp);
   const vector signed short v_last = vec_sel(vb8, vb9, v_cmp89);
-  
+
   const vector signed short temp01 = vec_mladd(v_first, (vector signed short)v_4, vb1);
   const vector signed short temp02 = vec_add(vb2, vb3);
   const vector signed short temp03 = vec_add(temp01, (vector signed short)v_4);
@@ -308,11 +308,11 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
   const vector signed short temp91 = vec_sub(v_sumsB8, vb5);
   const vector signed short v_sumsB9 = vec_add(temp91, v_last);
 
-#define COMPUTE_VR(i, j, k)						\
-  const vector signed short temps1##i =					\
-    vec_add(v_sumsB##i, v_sumsB##k);					\
-  const vector signed short temps2##i =					\
-    vec_mladd(vb##j, (vector signed short)v_2, temps1##i);		\
+#define COMPUTE_VR(i, j, k)                                             \
+  const vector signed short temps1##i =                                 \
+    vec_add(v_sumsB##i, v_sumsB##k);                                    \
+  const vector signed short temps2##i =                                 \
+    vec_mladd(vb##j, (vector signed short)v_2, temps1##i);              \
   const vector signed short  vr##j = vec_sra(temps2##i, v_4)
 
   COMPUTE_VR(0, 1, 2);
@@ -326,31 +326,31 @@ static inline void doVertLowPass_altivec(uint8_t *src, int stride, PPContext *c)
 
   const vector signed char neg1 = vec_splat_s8(-1);
   const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
-								      0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
-
-#define PACK_AND_STORE(i)					\
-  const vector unsigned char perms##i =				\
-    vec_lvsr(i * stride, src2);					\
-  const vector unsigned char vf##i =				\
-    vec_packsu(vr##i, (vector signed short)zero);		\
-  const vector unsigned char vg##i =				\
-    vec_perm(vf##i, vbT##i, permHH);				\
-  const vector unsigned char mask##i =				\
-    vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i);	\
-  const vector unsigned char vg2##i =				\
-    vec_perm(vg##i, vg##i, perms##i);				\
-  const vector unsigned char svA##i =				\
-    vec_sel(vbA##i, vg2##i, mask##i);				\
-  const vector unsigned char svB##i =				\
-    vec_sel(vg2##i, vbB##i, mask##i);				\
-  vec_st(svA##i, i * stride, src2);				\
+                                                                      0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
+
+#define PACK_AND_STORE(i)                                       \
+  const vector unsigned char perms##i =                         \
+    vec_lvsr(i * stride, src2);                                 \
+  const vector unsigned char vf##i =                            \
+    vec_packsu(vr##i, (vector signed short)zero);               \
+  const vector unsigned char vg##i =                            \
+    vec_perm(vf##i, vbT##i, permHH);                            \
+  const vector unsigned char mask##i =                          \
+    vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \
+  const vector unsigned char vg2##i =                           \
+    vec_perm(vg##i, vg##i, perms##i);                           \
+  const vector unsigned char svA##i =                           \
+    vec_sel(vbA##i, vg2##i, mask##i);                           \
+  const vector unsigned char svB##i =                           \
+    vec_sel(vg2##i, vbB##i, mask##i);                           \
+  vec_st(svA##i, i * stride, src2);                             \
   vec_st(svB##i, i * stride + 16, src2)
 
-#define PACK_AND_STORE_ALIGNED(i)				\
-  const vector unsigned char vf##i =				\
-    vec_packsu(vr##i, (vector signed short)zero);		\
-  const vector unsigned char vg##i =				\
-    vec_perm(vf##i, vbT##i, permHH);				\
+#define PACK_AND_STORE_ALIGNED(i)                               \
+  const vector unsigned char vf##i =                            \
+    vec_packsu(vr##i, (vector signed short)zero);               \
+  const vector unsigned char vg##i =                            \
+    vec_perm(vf##i, vbT##i, permHH);                            \
   vec_st(vg##i, i * stride, src2)
 
   // special casing the aligned case is worthwhile, as all call from
@@ -398,20 +398,20 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext
   vqp = vec_splat(vqp, 0);
 
 #define LOAD_LINE(i)                                                    \
-  const vector unsigned char perm##i =					\
-    vec_lvsl(i * stride, src2);						\
-  const vector unsigned char vbA##i =					\
-    vec_ld(i * stride, src2);						\
-  const vector unsigned char vbB##i =					\
-    vec_ld(i * stride + 16, src2);					\
-  const vector unsigned char vbT##i =					\
-    vec_perm(vbA##i, vbB##i, perm##i);					\
-  const vector signed short vb##i =					\
-    (vector signed short)vec_mergeh((vector unsigned char)zero,		\
-				    (vector unsigned char)vbT##i)
-  
+  const vector unsigned char perm##i =                                  \
+    vec_lvsl(i * stride, src2);                                         \
+  const vector unsigned char vbA##i =                                   \
+    vec_ld(i * stride, src2);                                           \
+  const vector unsigned char vbB##i =                                   \
+    vec_ld(i * stride + 16, src2);                                      \
+  const vector unsigned char vbT##i =                                   \
+    vec_perm(vbA##i, vbB##i, perm##i);                                  \
+  const vector signed short vb##i =                                     \
+    (vector signed short)vec_mergeh((vector unsigned char)zero,         \
+                                    (vector unsigned char)vbT##i)
+
   src2 += stride*3;
-  
+
   LOAD_LINE(1);
   LOAD_LINE(2);
   LOAD_LINE(3);
@@ -421,12 +421,12 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext
   LOAD_LINE(7);
   LOAD_LINE(8);
 #undef LOAD_LINE
-  
+
   const vector signed short v_1 = vec_splat_s16(1);
   const vector signed short v_2 = vec_splat_s16(2);
   const vector signed short v_5 = vec_splat_s16(5);
   const vector signed short v_32 = vec_sl(v_1,
-					  (vector unsigned short)v_5);
+                                          (vector unsigned short)v_5);
   /* middle energy */
   const vector signed short l3minusl6 = vec_sub(vb3, vb6);
   const vector signed short l5minusl4 = vec_sub(vb5, vb4);
@@ -480,27 +480,27 @@ static inline void doVertDefFilter_altivec(uint8_t src[], int stride, PPContext
   /* finally, stores */
   const vector unsigned char st4 = vec_packsu(vb4minusd, (vector signed short)zero);
   const vector unsigned char st5 = vec_packsu(vb5plusd, (vector signed short)zero);
-  
+
   const vector signed char neg1 = vec_splat_s8(-1);
   const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
-								      0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
-	
-#define STORE(i)						\
-  const vector unsigned char perms##i =				\
-    vec_lvsr(i * stride, src2);					\
-  const vector unsigned char vg##i =				\
-    vec_perm(st##i, vbT##i, permHH);				\
-  const vector unsigned char mask##i =				\
-    vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i);	\
-  const vector unsigned char vg2##i =				\
-    vec_perm(vg##i, vg##i, perms##i);				\
-  const vector unsigned char svA##i =				\
-    vec_sel(vbA##i, vg2##i, mask##i);				\
-  const vector unsigned char svB##i =				\
-    vec_sel(vg2##i, vbB##i, mask##i);				\
-  vec_st(svA##i, i * stride, src2);				\
+                                                                      0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
+
+#define STORE(i)                                                \
+  const vector unsigned char perms##i =                         \
+    vec_lvsr(i * stride, src2);                                 \
+  const vector unsigned char vg##i =                            \
+    vec_perm(st##i, vbT##i, permHH);                            \
+  const vector unsigned char mask##i =                          \
+    vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##i); \
+  const vector unsigned char vg2##i =                           \
+    vec_perm(vg##i, vg##i, perms##i);                           \
+  const vector unsigned char svA##i =                           \
+    vec_sel(vbA##i, vg2##i, mask##i);                           \
+  const vector unsigned char svB##i =                           \
+    vec_sel(vg2##i, vbB##i, mask##i);                           \
+  vec_st(svA##i, i * stride, src2);                             \
   vec_st(svB##i, i * stride + 16, src2)
-	
+
   STORE(4);
   STORE(5);
 }
@@ -522,13 +522,13 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
   dt[0] = deringThreshold;
   v_dt = vec_splat(vec_ld(0, dt), 0);
 
-#define LOAD_LINE(i)							\
-  const vector unsigned char perm##i =					\
-    vec_lvsl(i * stride, srcCopy);					\
-  vector unsigned char sA##i = vec_ld(i * stride, srcCopy);		\
-  vector unsigned char sB##i = vec_ld(i * stride + 16, srcCopy);	\
+#define LOAD_LINE(i)                                                    \
+  const vector unsigned char perm##i =                                  \
+    vec_lvsl(i * stride, srcCopy);                                      \
+  vector unsigned char sA##i = vec_ld(i * stride, srcCopy);             \
+  vector unsigned char sB##i = vec_ld(i * stride + 16, srcCopy);        \
   vector unsigned char src##i = vec_perm(sA##i, sB##i, perm##i)
-	
+
   LOAD_LINE(0);
   LOAD_LINE(1);
   LOAD_LINE(2);
@@ -545,13 +545,13 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
   {
     const vector unsigned char trunc_perm = (vector unsigned char)
       AVV(0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
-	  0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18);
+          0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18);
     const vector unsigned char trunc_src12 = vec_perm(src1, src2, trunc_perm);
     const vector unsigned char trunc_src34 = vec_perm(src3, src4, trunc_perm);
     const vector unsigned char trunc_src56 = vec_perm(src5, src6, trunc_perm);
     const vector unsigned char trunc_src78 = vec_perm(src7, src8, trunc_perm);
-	  
-#define EXTRACT(op) do {						\
+
+#define EXTRACT(op) do {                                                \
       const vector unsigned char s##op##_1 = vec_##op(trunc_src12, trunc_src34); \
       const vector unsigned char s##op##_2 = vec_##op(trunc_src56, trunc_src78); \
       const vector unsigned char s##op##_6 = vec_##op(s##op##_1, s##op##_2); \
@@ -567,48 +567,48 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
       const vector unsigned char s##op##_11h = vec_mergeh(s##op##_11, s##op##_11); \
       const vector unsigned char s##op##_11l = vec_mergel(s##op##_11, s##op##_11); \
       v_##op = vec_##op(s##op##_11h, s##op##_11l); } while (0)
-	  
+
     vector unsigned char v_min;
     vector unsigned char v_max;
     EXTRACT(min);
     EXTRACT(max);
 #undef EXTRACT
-	  
+
     if (vec_all_lt(vec_sub(v_max, v_min), v_dt))
       return;
-	  
+
     v_avg = vec_avg(v_min, v_max);
   }
-	
+
   signed int __attribute__((aligned(16))) S[8];
   {
     const vector unsigned short mask1 = (vector unsigned short)
       AVV(0x0001, 0x0002, 0x0004, 0x0008,
-	  0x0010, 0x0020, 0x0040, 0x0080);
+          0x0010, 0x0020, 0x0040, 0x0080);
     const vector unsigned short mask2 = (vector unsigned short)
       AVV(0x0100, 0x0200, 0x0000, 0x0000,
-	  0x0000, 0x0000, 0x0000, 0x0000);
-	  
+          0x0000, 0x0000, 0x0000, 0x0000);
+
     const vector unsigned int vuint32_16 = vec_sl(vec_splat_u32(1), vec_splat_u32(4));
     const vector unsigned int vuint32_1 = vec_splat_u32(1);
-	  
-#define COMPARE(i)							\
-    vector signed int sum##i;						\
-    do {								\
-      const vector unsigned char cmp##i =				\
-	(vector unsigned char)vec_cmpgt(src##i, v_avg);			\
-      const vector unsigned short cmpHi##i =				\
-	(vector unsigned short)vec_mergeh(cmp##i, cmp##i);		\
-      const vector unsigned short cmpLi##i =				\
-	(vector unsigned short)vec_mergel(cmp##i, cmp##i);		\
-      const vector signed short cmpHf##i =				\
-	(vector signed short)vec_and(cmpHi##i, mask1);			\
-      const vector signed short cmpLf##i =				\
-	(vector signed short)vec_and(cmpLi##i, mask2);			\
-      const vector signed int sump##i = vec_sum4s(cmpHf##i, zero);	\
-      const vector signed int sumq##i = vec_sum4s(cmpLf##i, sump##i);	\
+
+#define COMPARE(i)                                                      \
+    vector signed int sum##i;                                           \
+    do {                                                                \
+      const vector unsigned char cmp##i =                               \
+        (vector unsigned char)vec_cmpgt(src##i, v_avg);                 \
+      const vector unsigned short cmpHi##i =                            \
+        (vector unsigned short)vec_mergeh(cmp##i, cmp##i);              \
+      const vector unsigned short cmpLi##i =                            \
+        (vector unsigned short)vec_mergel(cmp##i, cmp##i);              \
+      const vector signed short cmpHf##i =                              \
+        (vector signed short)vec_and(cmpHi##i, mask1);                  \
+      const vector signed short cmpLf##i =                              \
+        (vector signed short)vec_and(cmpLi##i, mask2);                  \
+      const vector signed int sump##i = vec_sum4s(cmpHf##i, zero);      \
+      const vector signed int sumq##i = vec_sum4s(cmpLf##i, sump##i);   \
       sum##i  = vec_sums(sumq##i, zero); } while (0)
-	  
+
     COMPARE(0);
     COMPARE(1);
     COMPARE(2);
@@ -620,22 +620,22 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
     COMPARE(8);
     COMPARE(9);
 #undef COMPARE
-	  
+
     vector signed int sumA2;
     vector signed int sumB2;
     {
       const vector signed int sump02 = vec_mergel(sum0, sum2);
       const vector signed int sump13 = vec_mergel(sum1, sum3);
       const vector signed int sumA = vec_mergel(sump02, sump13);
-	      
+
       const vector signed int sump46 = vec_mergel(sum4, sum6);
       const vector signed int sump57 = vec_mergel(sum5, sum7);
       const vector signed int sumB = vec_mergel(sump46, sump57);
-	      
+
       const vector signed int sump8A = vec_mergel(sum8, zero);
       const vector signed int sump9B = vec_mergel(sum9, zero);
       const vector signed int sumC = vec_mergel(sump8A, sump9B);
-	      
+
       const vector signed int tA = vec_sl(vec_nor(zero, sumA), vuint32_16);
       const vector signed int tB = vec_sl(vec_nor(zero, sumB), vuint32_16);
       const vector signed int tC = vec_sl(vec_nor(zero, sumC), vuint32_16);
@@ -643,15 +643,15 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
       const vector signed int t2B = vec_or(sumB, tB);
       const vector signed int t2C = vec_or(sumC, tC);
       const vector signed int t3A = vec_and(vec_sra(t2A, vuint32_1),
-					    vec_sl(t2A, vuint32_1));
+                                            vec_sl(t2A, vuint32_1));
       const vector signed int t3B = vec_and(vec_sra(t2B, vuint32_1),
-					    vec_sl(t2B, vuint32_1));
+                                            vec_sl(t2B, vuint32_1));
       const vector signed int t3C = vec_and(vec_sra(t2C, vuint32_1),
-					    vec_sl(t2C, vuint32_1));
+                                            vec_sl(t2C, vuint32_1));
       const vector signed int yA = vec_and(t2A, t3A);
       const vector signed int yB = vec_and(t2B, t3B);
       const vector signed int yC = vec_and(t2C, t3C);
-	      
+
       const vector unsigned char strangeperm1 = vec_lvsl(4, (unsigned char*)0);
       const vector unsigned char strangeperm2 = vec_lvsl(8, (unsigned char*)0);
       const vector signed int sumAd4 = vec_perm(yA, yB, strangeperm1);
@@ -659,23 +659,23 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
       const vector signed int sumBd4 = vec_perm(yB, yC, strangeperm1);
       const vector signed int sumBd8 = vec_perm(yB, yC, strangeperm2);
       const vector signed int sumAp = vec_and(yA,
-					      vec_and(sumAd4,sumAd8));
+                                              vec_and(sumAd4,sumAd8));
       const vector signed int sumBp = vec_and(yB,
-					      vec_and(sumBd4,sumBd8));
+                                              vec_and(sumBd4,sumBd8));
       sumA2 = vec_or(sumAp,
-		     vec_sra(sumAp,
-			     vuint32_16));
+                     vec_sra(sumAp,
+                             vuint32_16));
       sumB2  = vec_or(sumBp,
-		      vec_sra(sumBp,
-			      vuint32_16));
-    }	
+                      vec_sra(sumBp,
+                              vuint32_16));
+    }
     vec_st(sumA2, 0, S);
     vec_st(sumB2, 16, S);
   }
 
   /* I'm not sure the following is actually faster
      than straight, unvectorized C code :-( */
-	
+
   int __attribute__((aligned(16))) tQP2[4];
   tQP2[0]= c->QP/2 + 1;
   vector signed int vQP2 = vec_ld(0, tQP2);
@@ -686,84 +686,84 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
 
   const vector unsigned char permA1 = (vector unsigned char)
     AVV(0x00, 0x01, 0x02, 0x10, 0x11, 0x12, 0x1F, 0x1F,
-	0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);
+        0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);
   const vector unsigned char permA2 = (vector unsigned char)
     AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x11,
-	0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);
+        0x12, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F);
   const vector unsigned char permA1inc = (vector unsigned char)
     AVV(0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00,
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
   const vector unsigned char permA2inc = (vector unsigned char)
     AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x01,
-	0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
   const vector unsigned char magic = (vector unsigned char)
     AVV(0x01, 0x02, 0x01, 0x02, 0x04, 0x02, 0x01, 0x02,
-	0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
   const vector unsigned char extractPerm = (vector unsigned char)
     AVV(0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01,
-	0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01);
+        0x10, 0x10, 0x10, 0x01, 0x10, 0x10, 0x10, 0x01);
   const vector unsigned char extractPermInc = (vector unsigned char)
     AVV(0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01,
-	0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01);
+        0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01);
   const vector unsigned char identity = vec_lvsl(0,(unsigned char *)0);
   const vector unsigned char tenRight = (vector unsigned char)
     AVV(0x00, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
   const vector unsigned char eightLeft = (vector unsigned char)
     AVV(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
-	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08);
+        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08);
 
 
-#define F_INIT(i)					\
-  vector unsigned char tenRightM##i = tenRight;		\
-  vector unsigned char permA1M##i = permA1;		\
-  vector unsigned char permA2M##i = permA2;		\
+#define F_INIT(i)                                       \
+  vector unsigned char tenRightM##i = tenRight;         \
+  vector unsigned char permA1M##i = permA1;             \
+  vector unsigned char permA2M##i = permA2;             \
   vector unsigned char extractPermM##i = extractPerm
 
-#define F2(i, j, k, l)							\
-  if (S[i] & (1 << (l+1))) {						\
-    const vector unsigned char a_##j##_A##l =				\
-      vec_perm(src##i, src##j, permA1M##i);				\
-    const vector unsigned char a_##j##_B##l =				\
-      vec_perm(a_##j##_A##l, src##k, permA2M##i);			\
-    const vector signed int a_##j##_sump##l =				\
-      (vector signed int)vec_msum(a_##j##_B##l, magic,			\
-				  (vector unsigned int)zero);		\
-    vector signed int F_##j##_##l =					\
-      vec_sr(vec_sums(a_##j##_sump##l, vsint32_8), vuint32_4);		\
-    F_##j##_##l = vec_splat(F_##j##_##l, 3);				\
-    const vector signed int p_##j##_##l =				\
-      (vector signed int)vec_perm(src##j,				\
-				  (vector unsigned char)zero,		\
-				  extractPermM##i);			\
-    const vector signed int sum_##j##_##l = vec_add( p_##j##_##l, vQP2); \
-    const vector signed int diff_##j##_##l = vec_sub( p_##j##_##l, vQP2); \
-    vector signed int newpm_##j##_##l;					\
-    if (vec_all_lt(sum_##j##_##l, F_##j##_##l))				\
-      newpm_##j##_##l = sum_##j##_##l;					\
-    else if (vec_all_gt(diff_##j##_##l, F_##j##_##l))			\
-      newpm_##j##_##l = diff_##j##_##l;					\
-    else newpm_##j##_##l = F_##j##_##l;					\
-    const vector unsigned char newpm2_##j##_##l =			\
-      vec_splat((vector unsigned char)newpm_##j##_##l, 15);		\
-    const vector unsigned char mask##j##l = vec_add(identity,		\
-						    tenRightM##i);	\
-    src##j = vec_perm(src##j, newpm2_##j##_##l, mask##j##l);		\
-  }									\
-  permA1M##i = vec_add(permA1M##i, permA1inc);				\
-  permA2M##i = vec_add(permA2M##i, permA2inc);				\
-  tenRightM##i = vec_sro(tenRightM##i, eightLeft);			\
+#define F2(i, j, k, l)                                                  \
+  if (S[i] & (1 << (l+1))) {                                            \
+    const vector unsigned char a_##j##_A##l =                           \
+      vec_perm(src##i, src##j, permA1M##i);                             \
+    const vector unsigned char a_##j##_B##l =                           \
+      vec_perm(a_##j##_A##l, src##k, permA2M##i);                       \
+    const vector signed int a_##j##_sump##l =                           \
+      (vector signed int)vec_msum(a_##j##_B##l, magic,                  \
+                                  (vector unsigned int)zero);           \
+    vector signed int F_##j##_##l =                                     \
+      vec_sr(vec_sums(a_##j##_sump##l, vsint32_8), vuint32_4);          \
+    F_##j##_##l = vec_splat(F_##j##_##l, 3);                            \
+    const vector signed int p_##j##_##l =                               \
+      (vector signed int)vec_perm(src##j,                               \
+                                  (vector unsigned char)zero,           \
+                                  extractPermM##i);                     \
+    const vector signed int sum_##j##_##l = vec_add( p_##j##_##l, vQP2);\
+    const vector signed int diff_##j##_##l = vec_sub( p_##j##_##l, vQP2);\
+    vector signed int newpm_##j##_##l;                                  \
+    if (vec_all_lt(sum_##j##_##l, F_##j##_##l))                         \
+      newpm_##j##_##l = sum_##j##_##l;                                  \
+    else if (vec_all_gt(diff_##j##_##l, F_##j##_##l))                   \
+      newpm_##j##_##l = diff_##j##_##l;                                 \
+    else newpm_##j##_##l = F_##j##_##l;                                 \
+    const vector unsigned char newpm2_##j##_##l =                       \
+      vec_splat((vector unsigned char)newpm_##j##_##l, 15);             \
+    const vector unsigned char mask##j##l = vec_add(identity,           \
+                                                    tenRightM##i);      \
+    src##j = vec_perm(src##j, newpm2_##j##_##l, mask##j##l);            \
+  }                                                                     \
+  permA1M##i = vec_add(permA1M##i, permA1inc);                          \
+  permA2M##i = vec_add(permA2M##i, permA2inc);                          \
+  tenRightM##i = vec_sro(tenRightM##i, eightLeft);                      \
   extractPermM##i = vec_add(extractPermM##i, extractPermInc)
 
-#define ITER(i, j, k)				\
-  F_INIT(i);					\
-  F2(i, j, k, 0);				\
-  F2(i, j, k, 1);				\
-  F2(i, j, k, 2);				\
-  F2(i, j, k, 3);				\
-  F2(i, j, k, 4);				\
-  F2(i, j, k, 5);				\
-  F2(i, j, k, 6);				\
+#define ITER(i, j, k)                           \
+  F_INIT(i);                                    \
+  F2(i, j, k, 0);                               \
+  F2(i, j, k, 1);                               \
+  F2(i, j, k, 2);                               \
+  F2(i, j, k, 3);                               \
+  F2(i, j, k, 4);                               \
+  F2(i, j, k, 5);                               \
+  F2(i, j, k, 6);                               \
   F2(i, j, k, 7)
 
   ITER(0, 1, 2);
@@ -776,19 +776,19 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
   ITER(7, 8, 9);
 
   const vector signed char neg1 = vec_splat_s8(-1);
-	
-#define STORE_LINE(i)					\
-  const vector unsigned char permST##i =		\
-    vec_lvsr(i * stride, srcCopy);			\
-  const vector unsigned char maskST##i =		\
-    vec_perm((vector unsigned char)zero,		\
-	     (vector unsigned char)neg1, permST##i);	\
-  src##i = vec_perm(src##i ,src##i, permST##i);		\
-  sA##i= vec_sel(sA##i, src##i, maskST##i);		\
-  sB##i= vec_sel(src##i, sB##i, maskST##i);		\
-  vec_st(sA##i, i * stride, srcCopy);			\
+
+#define STORE_LINE(i)                                   \
+  const vector unsigned char permST##i =                \
+    vec_lvsr(i * stride, srcCopy);                      \
+  const vector unsigned char maskST##i =                \
+    vec_perm((vector unsigned char)zero,                \
+             (vector unsigned char)neg1, permST##i);    \
+  src##i = vec_perm(src##i ,src##i, permST##i);         \
+  sA##i= vec_sel(sA##i, src##i, maskST##i);             \
+  sB##i= vec_sel(src##i, sB##i, maskST##i);             \
+  vec_st(sA##i, i * stride, srcCopy);                   \
   vec_st(sB##i, i * stride + 16, srcCopy)
-	
+
   STORE_LINE(1);
   STORE_LINE(2);
   STORE_LINE(3);
@@ -808,29 +808,29 @@ static inline void dering_altivec(uint8_t src[], int stride, PPContext *c) {
 #define do_a_deblock_altivec(a...) do_a_deblock_C(a)
 
 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
-				    uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise)
+                                    uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise)
 {
   const vector signed int zero = vec_splat_s32(0);
   const vector signed short vsint16_1 = vec_splat_s16(1);
   vector signed int v_dp = zero;
   vector signed int v_sysdp = zero;
   int d, sysd, i;
-  
+
   tempBluredPast[127]= maxNoise[0];
   tempBluredPast[128]= maxNoise[1];
   tempBluredPast[129]= maxNoise[2];
 
-#define LOAD_LINE(src, i)						\
-  register int j##src##i = i * stride;					\
-  vector unsigned char perm##src##i = vec_lvsl(j##src##i, src);		\
-  const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src);	\
+#define LOAD_LINE(src, i)                                               \
+  register int j##src##i = i * stride;                                  \
+  vector unsigned char perm##src##i = vec_lvsl(j##src##i, src);         \
+  const vector unsigned char v_##src##A1##i = vec_ld(j##src##i, src);   \
   const vector unsigned char v_##src##A2##i = vec_ld(j##src##i + 16, src); \
-  const vector unsigned char v_##src##A##i =				\
-    vec_perm(v_##src##A1##i, v_##src##A2##i, perm##src##i);		\
-  vector signed short v_##src##Ass##i =					\
-    (vector signed short)vec_mergeh((vector signed char)zero,		\
-				    (vector signed char)v_##src##A##i)
-  
+  const vector unsigned char v_##src##A##i =                            \
+    vec_perm(v_##src##A1##i, v_##src##A2##i, perm##src##i);             \
+  vector signed short v_##src##Ass##i =                                 \
+    (vector signed short)vec_mergeh((vector signed char)zero,           \
+                                    (vector signed char)v_##src##A##i)
+
   LOAD_LINE(src, 0);
   LOAD_LINE(src, 1);
   LOAD_LINE(src, 2);
@@ -850,10 +850,10 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
   LOAD_LINE(tempBlured, 7);
 #undef LOAD_LINE
 
-#define ACCUMULATE_DIFFS(i)					\
-  vector signed short v_d##i = vec_sub(v_tempBluredAss##i,	\
-				       v_srcAss##i);		\
-  v_dp = vec_msums(v_d##i, v_d##i, v_dp);			\
+#define ACCUMULATE_DIFFS(i)                                     \
+  vector signed short v_d##i = vec_sub(v_tempBluredAss##i,      \
+                                       v_srcAss##i);            \
+  v_dp = vec_msums(v_d##i, v_d##i, v_dp);                       \
   v_sysdp = vec_msums(v_d##i, vsint16_1, v_sysdp)
 
   ACCUMULATE_DIFFS(0);
@@ -871,7 +871,7 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
 
   v_dp = vec_splat(v_dp, 3);
   v_sysdp = vec_splat(v_sysdp, 3);
-  
+
   vec_ste(v_dp, 0, &d);
   vec_ste(v_sysdp, 0, &sysd);
 
@@ -915,13 +915,13 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
       const vector signed short vsint16_7 = vec_splat_s16(7);
       const vector signed short vsint16_4 = vec_splat_s16(4);
       const vector unsigned short vuint16_3 = vec_splat_u16(3);
-      
-#define OP(i)								\
-      const vector signed short v_temp##i =				\
-	vec_mladd(v_tempBluredAss##i,					\
-		  vsint16_7, v_srcAss##i);				\
-      const vector signed short v_temp2##i =				\
-	vec_add(v_temp##i, vsint16_4);					\
+
+#define OP(i)                                                   \
+      const vector signed short v_temp##i =                     \
+        vec_mladd(v_tempBluredAss##i,                           \
+                  vsint16_7, v_srcAss##i);                      \
+      const vector signed short v_temp2##i =                    \
+        vec_add(v_temp##i, vsint16_4);                          \
       v_tempBluredAss##i = vec_sr(v_temp2##i, vuint16_3)
 
       OP(0);
@@ -936,13 +936,13 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
     } else {
       const vector signed short vsint16_3 = vec_splat_s16(3);
       const vector signed short vsint16_2 = vec_splat_s16(2);
-      
-#define OP(i)								\
-      const vector signed short v_temp##i =				\
-	vec_mladd(v_tempBluredAss##i,					\
-		  vsint16_3, v_srcAss##i);				\
-      const vector signed short v_temp2##i =				\
-	vec_add(v_temp##i, vsint16_2);					\
+
+#define OP(i)                                                   \
+      const vector signed short v_temp##i =                     \
+        vec_mladd(v_tempBluredAss##i,                           \
+                  vsint16_3, v_srcAss##i);                      \
+      const vector signed short v_temp2##i =                    \
+        vec_add(v_temp##i, vsint16_2);                          \
       v_tempBluredAss##i = vec_sr(v_temp2##i, (vector unsigned short)vsint16_2)
 
       OP(0);
@@ -959,24 +959,24 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
 
   const vector signed char neg1 = vec_splat_s8(-1);
   const vector unsigned char permHH = (const vector unsigned char)AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
-								      0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
-
-#define PACK_AND_STORE(src, i)						\
-  const vector unsigned char perms##src##i =				\
-    vec_lvsr(i * stride, src);						\
-  const vector unsigned char vf##src##i =				\
-    vec_packsu(v_tempBluredAss##i, (vector signed short)zero);		\
-  const vector unsigned char vg##src##i =				\
-    vec_perm(vf##src##i, v_##src##A##i, permHH);			\
-  const vector unsigned char mask##src##i =				\
+                                                                      0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
+
+#define PACK_AND_STORE(src, i)                                  \
+  const vector unsigned char perms##src##i =                    \
+    vec_lvsr(i * stride, src);                                  \
+  const vector unsigned char vf##src##i =                       \
+    vec_packsu(v_tempBluredAss##i, (vector signed short)zero);  \
+  const vector unsigned char vg##src##i =                       \
+    vec_perm(vf##src##i, v_##src##A##i, permHH);                \
+  const vector unsigned char mask##src##i =                     \
     vec_perm((vector unsigned char)zero, (vector unsigned char)neg1, perms##src##i); \
-  const vector unsigned char vg2##src##i =				\
-    vec_perm(vg##src##i, vg##src##i, perms##src##i);			\
-  const vector unsigned char svA##src##i =				\
-    vec_sel(v_##src##A1##i, vg2##src##i, mask##src##i);			\
-  const vector unsigned char svB##src##i =				\
-    vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i);			\
-  vec_st(svA##src##i, i * stride, src);					\
+  const vector unsigned char vg2##src##i =                      \
+    vec_perm(vg##src##i, vg##src##i, perms##src##i);            \
+  const vector unsigned char svA##src##i =                      \
+    vec_sel(v_##src##A1##i, vg2##src##i, mask##src##i);         \
+  const vector unsigned char svB##src##i =                      \
+    vec_sel(vg2##src##i, v_##src##A2##i, mask##src##i);         \
+  vec_st(svA##src##i, i * stride, src);                         \
   vec_st(svB##src##i, i * stride + 16, src)
 
   PACK_AND_STORE(src, 0);
@@ -1001,16 +1001,16 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
 static inline void transpose_16x8_char_toPackedAlign_altivec(unsigned char* dst, unsigned char* src, int stride) {
   const vector unsigned char zero = vec_splat_u8(0);
 
-#define LOAD_DOUBLE_LINE(i, j)						\
-  vector unsigned char perm1##i = vec_lvsl(i * stride, src);		\
-  vector unsigned char perm2##i = vec_lvsl(j * stride, src);		\
-  vector unsigned char srcA##i = vec_ld(i * stride, src);		\
+#define LOAD_DOUBLE_LINE(i, j)                                          \
+  vector unsigned char perm1##i = vec_lvsl(i * stride, src);            \
+  vector unsigned char perm2##i = vec_lvsl(j * stride, src);            \
+  vector unsigned char srcA##i = vec_ld(i * stride, src);               \
   vector unsigned char srcB##i = vec_ld(i * stride + 16, src);          \
-  vector unsigned char srcC##i = vec_ld(j * stride, src);		\
+  vector unsigned char srcC##i = vec_ld(j * stride, src);               \
   vector unsigned char srcD##i = vec_ld(j * stride+ 16, src);           \
-  vector unsigned char src##i = vec_perm(srcA##i, srcB##i, perm1##i);	\
+  vector unsigned char src##i = vec_perm(srcA##i, srcB##i, perm1##i);   \
   vector unsigned char src##j = vec_perm(srcC##i, srcD##i, perm2##i)
-  
+
   LOAD_DOUBLE_LINE(0, 1);
   LOAD_DOUBLE_LINE(2, 3);
   LOAD_DOUBLE_LINE(4, 5);
@@ -1107,10 +1107,10 @@ static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* ds
   const vector unsigned char zero = vec_splat_u8(0);
   const vector unsigned char magic_perm = (const vector unsigned char)
     AVV(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
-	0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
-  
-#define LOAD_DOUBLE_LINE(i, j)			    		\
-  vector unsigned char src##i = vec_ld(i * 16, src);		\
+        0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F);
+
+#define LOAD_DOUBLE_LINE(i, j)                                  \
+  vector unsigned char src##i = vec_ld(i * 16, src);            \
   vector unsigned char src##j = vec_ld(j * 16, src)
 
   LOAD_DOUBLE_LINE(0, 1);
@@ -1169,24 +1169,24 @@ static inline void transpose_8x16_char_fromPackedAlign_altivec(unsigned char* ds
 
 
   const vector signed char neg1 = vec_splat_s8(-1);
-#define STORE_DOUBLE_LINE(i, j)						\
-  vector unsigned char dstA##i = vec_ld(i * stride, dst);		\
-  vector unsigned char dstB##i = vec_ld(i * stride + 16, dst);		\
-  vector unsigned char dstA##j = vec_ld(j * stride, dst);		\
-  vector unsigned char dstB##j = vec_ld(j * stride+ 16, dst);		\
-  vector unsigned char align##i = vec_lvsr(i * stride, dst);		\
-  vector unsigned char align##j = vec_lvsr(j * stride, dst);		\
-  vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i);	\
-  vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j);	\
-  vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i);	\
-  vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j);	\
-  vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i);	\
-  vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i);	\
-  vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j);	\
-  vector unsigned char dstBF##j = vec_sel(dstR##j, dstB##j, mask##j);	\
-  vec_st(dstAF##i, i * stride, dst);					\
-  vec_st(dstBF##i, i * stride + 16, dst);				\
-  vec_st(dstAF##j, j * stride, dst);					\
+#define STORE_DOUBLE_LINE(i, j)                                         \
+  vector unsigned char dstA##i = vec_ld(i * stride, dst);               \
+  vector unsigned char dstB##i = vec_ld(i * stride + 16, dst);          \
+  vector unsigned char dstA##j = vec_ld(j * stride, dst);               \
+  vector unsigned char dstB##j = vec_ld(j * stride+ 16, dst);           \
+  vector unsigned char align##i = vec_lvsr(i * stride, dst);            \
+  vector unsigned char align##j = vec_lvsr(j * stride, dst);            \
+  vector unsigned char mask##i = vec_perm(zero, (vector unsigned char)neg1, align##i); \
+  vector unsigned char mask##j = vec_perm(zero, (vector unsigned char)neg1, align##j); \
+  vector unsigned char dstR##i = vec_perm(temp##i, temp##i, align##i);  \
+  vector unsigned char dstR##j = vec_perm(temp##j, temp##j, align##j);  \
+  vector unsigned char dstAF##i = vec_sel(dstA##i, dstR##i, mask##i);   \
+  vector unsigned char dstBF##i = vec_sel(dstR##i, dstB##i, mask##i);   \
+  vector unsigned char dstAF##j = vec_sel(dstA##j, dstR##j, mask##j);   \
+  vector unsigned char dstBF##j = vec_sel(dstR##j, dstB##j, mask##j);   \
+  vec_st(dstAF##i, i * stride, dst);                                    \
+  vec_st(dstBF##i, i * stride + 16, dst);                               \
+  vec_st(dstAF##j, j * stride, dst);                                    \
   vec_st(dstBF##j, j * stride + 16, dst)
 
   STORE_DOUBLE_LINE(0,1);
diff --git a/src/libffmpeg/libavcodec/libpostproc/postprocess_internal.h b/src/libffmpeg/libavcodec/libpostproc/postprocess_internal.h
index 01d4679ad..bab4c841c 100644
--- a/src/libffmpeg/libavcodec/libpostproc/postprocess_internal.h
+++ b/src/libffmpeg/libavcodec/libpostproc/postprocess_internal.h
@@ -13,7 +13,7 @@
 
     You should have received a copy of the GNU General Public License
     along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 
 /**
@@ -21,42 +21,42 @@
  * internal api header.
  */
 
-#define V_DEBLOCK	0x01
-#define H_DEBLOCK	0x02
-#define DERING		0x04
-#define LEVEL_FIX	0x08 ///< Brightness & Contrast
+#define V_DEBLOCK       0x01
+#define H_DEBLOCK       0x02
+#define DERING          0x04
+#define LEVEL_FIX       0x08 ///< Brightness & Contrast
 
-#define LUM_V_DEBLOCK	V_DEBLOCK		//   1
-#define LUM_H_DEBLOCK	H_DEBLOCK		//   2
-#define CHROM_V_DEBLOCK	(V_DEBLOCK<<4)		//  16
-#define CHROM_H_DEBLOCK	(H_DEBLOCK<<4)		//  32
-#define LUM_DERING	DERING			//   4
-#define CHROM_DERING	(DERING<<4)		//  64
-#define LUM_LEVEL_FIX	LEVEL_FIX		//   8
-#define CHROM_LEVEL_FIX	(LEVEL_FIX<<4)		// 128 (not implemented yet)
+#define LUM_V_DEBLOCK   V_DEBLOCK               //   1
+#define LUM_H_DEBLOCK   H_DEBLOCK               //   2
+#define CHROM_V_DEBLOCK (V_DEBLOCK<<4)          //  16
+#define CHROM_H_DEBLOCK (H_DEBLOCK<<4)          //  32
+#define LUM_DERING      DERING                  //   4
+#define CHROM_DERING    (DERING<<4)             //  64
+#define LUM_LEVEL_FIX   LEVEL_FIX               //   8
+#define CHROM_LEVEL_FIX (LEVEL_FIX<<4)          // 128 (not implemented yet)
 
 // Experimental vertical filters
-#define V_X1_FILTER	0x0200			// 512
-#define V_A_DEBLOCK	0x0400
+#define V_X1_FILTER     0x0200                  // 512
+#define V_A_DEBLOCK     0x0400
 
 // Experimental horizontal filters
-#define H_X1_FILTER	0x2000			// 8192
-#define H_A_DEBLOCK	0x4000
+#define H_X1_FILTER     0x2000                  // 8192
+#define H_A_DEBLOCK     0x4000
 
 /// select between full y range (255-0) or standart one (234-16)
-#define FULL_Y_RANGE	0x8000			// 32768
+#define FULL_Y_RANGE    0x8000                  // 32768
 
 //Deinterlacing Filters
-#define	LINEAR_IPOL_DEINT_FILTER	0x10000	// 65536
-#define	LINEAR_BLEND_DEINT_FILTER	0x20000	// 131072
-#define	CUBIC_BLEND_DEINT_FILTER	0x8000	// (not implemented yet)
-#define	CUBIC_IPOL_DEINT_FILTER		0x40000	// 262144
-#define	MEDIAN_DEINT_FILTER		0x80000	// 524288
-#define	FFMPEG_DEINT_FILTER		0x400000
-#define	LOWPASS5_DEINT_FILTER		0x800000
+#define        LINEAR_IPOL_DEINT_FILTER         0x10000 // 65536
+#define        LINEAR_BLEND_DEINT_FILTER        0x20000 // 131072
+#define        CUBIC_BLEND_DEINT_FILTER         0x8000  // (not implemented yet)
+#define        CUBIC_IPOL_DEINT_FILTER          0x40000 // 262144
+#define        MEDIAN_DEINT_FILTER              0x80000 // 524288
+#define        FFMPEG_DEINT_FILTER              0x400000
+#define        LOWPASS5_DEINT_FILTER            0x800000
 
-#define TEMP_NOISE_FILTER		0x100000
-#define FORCE_QUANT			0x200000
+#define TEMP_NOISE_FILTER               0x100000
+#define FORCE_QUANT                     0x200000
 
 //use if u want a faster postprocessing code
 //cant differentiate between chroma & luma filters (both on or both off)
@@ -66,8 +66,8 @@
 
 #if 1
 static inline int CLIP(int a){
-	if(a&256) return ((a)>>31)^(-1);
-	else      return a;
+        if(a&256) return ((a)>>31)^(-1);
+        else      return a;
 }
 //#define CLIP(a) (((a)&256) ? ((a)>>31)^(-1) : (a))
 #elif 0
@@ -79,92 +79,92 @@ static inline int CLIP(int a){
  * Postprocessng filter.
  */
 struct PPFilter{
-	char *shortName;
-	char *longName;
-	int chromDefault; 	///< is chrominance filtering on by default if this filter is manually activated
-	int minLumQuality; 	///< minimum quality to turn luminance filtering on
-	int minChromQuality;	///< minimum quality to turn chrominance filtering on
-	int mask; 		///< Bitmask to turn this filter on
+        char *shortName;
+        char *longName;
+        int chromDefault;       ///< is chrominance filtering on by default if this filter is manually activated
+        int minLumQuality;      ///< minimum quality to turn luminance filtering on
+        int minChromQuality;    ///< minimum quality to turn chrominance filtering on
+        int mask;               ///< Bitmask to turn this filter on
 };
 
 /**
  * Postprocessng mode.
  */
 typedef struct PPMode{
-	int lumMode; 			///< acivates filters for luminance
-	int chromMode; 			///< acivates filters for chrominance
-	int error; 			///< non zero on error
+        int lumMode;                    ///< acivates filters for luminance
+        int chromMode;                  ///< acivates filters for chrominance
+        int error;                      ///< non zero on error
 
-	int minAllowedY; 		///< for brigtness correction
-	int maxAllowedY; 		///< for brihtness correction
-	float maxClippedThreshold;	///< amount of "black" u r willing to loose to get a brightness corrected picture
+        int minAllowedY;                ///< for brigtness correction
+        int maxAllowedY;                ///< for brihtness correction
+        float maxClippedThreshold;      ///< amount of "black" u r willing to loose to get a brightness corrected picture
 
-	int maxTmpNoise[3]; 		///< for Temporal Noise Reducing filter (Maximal sum of abs differences)
+        int maxTmpNoise[3];             ///< for Temporal Noise Reducing filter (Maximal sum of abs differences)
 
-	int baseDcDiff;
-	int flatnessThreshold;
+        int baseDcDiff;
+        int flatnessThreshold;
 
-	int forcedQuant; 		///< quantizer if FORCE_QUANT is used
+        int forcedQuant;                ///< quantizer if FORCE_QUANT is used
 } PPMode;
 
 /**
  * postprocess context.
  */
 typedef struct PPContext{
-	uint8_t *tempBlocks; ///<used for the horizontal code
+        uint8_t *tempBlocks; ///<used for the horizontal code
 
-	/**
-	 * luma histogram.         
-	 * we need 64bit here otherwise we'll going to have a problem
-	 * after watching a black picture for 5 hours
-	 */
-	uint64_t *yHistogram;
+        /**
+         * luma histogram.
+         * we need 64bit here otherwise we'll going to have a problem
+         * after watching a black picture for 5 hours
+         */
+        uint64_t *yHistogram;
 
-	uint64_t __attribute__((aligned(8))) packedYOffset;
-	uint64_t __attribute__((aligned(8))) packedYScale;
+        uint64_t __attribute__((aligned(8))) packedYOffset;
+        uint64_t __attribute__((aligned(8))) packedYScale;
 
-	/** Temporal noise reducing buffers */
-	uint8_t *tempBlured[3];
-	int32_t *tempBluredPast[3];
+        /** Temporal noise reducing buffers */
+        uint8_t *tempBlured[3];
+        int32_t *tempBluredPast[3];
 
-	/** Temporary buffers for handling the last row(s) */
-	uint8_t *tempDst;
-	uint8_t *tempSrc;
+        /** Temporary buffers for handling the last row(s) */
+        uint8_t *tempDst;
+        uint8_t *tempSrc;
 
-	uint8_t *deintTemp;
+        uint8_t *deintTemp;
 
-	uint64_t __attribute__((aligned(8))) pQPb;
-	uint64_t __attribute__((aligned(8))) pQPb2;
+        uint64_t __attribute__((aligned(8))) pQPb;
+        uint64_t __attribute__((aligned(8))) pQPb2;
 
-	uint64_t __attribute__((aligned(8))) mmxDcOffset[64];
-	uint64_t __attribute__((aligned(8))) mmxDcThreshold[64];
+        uint64_t __attribute__((aligned(8))) mmxDcOffset[64];
+        uint64_t __attribute__((aligned(8))) mmxDcThreshold[64];
 
-	QP_STORE_T *stdQPTable;       ///< used to fix MPEG2 style qscale
-	QP_STORE_T *nonBQPTable;
-	QP_STORE_T *forcedQPTable;
+        QP_STORE_T *stdQPTable;       ///< used to fix MPEG2 style qscale
+        QP_STORE_T *nonBQPTable;
+        QP_STORE_T *forcedQPTable;
 
-	int QP;
-	int nonBQP;
+        int QP;
+        int nonBQP;
 
-	int frameNum;
-	
-	int cpuCaps;
-        
-	int qpStride; ///<size of qp buffers (needed to realloc them if needed)
-	int stride;   ///<size of some buffers (needed to realloc them if needed)
-        
-	int hChromaSubSample;
-	int vChromaSubSample;
+        int frameNum;
 
-	PPMode ppMode;
+        int cpuCaps;
+
+        int qpStride; ///<size of qp buffers (needed to realloc them if needed)
+        int stride;   ///<size of some buffers (needed to realloc them if needed)
+
+        int hChromaSubSample;
+        int vChromaSubSample;
+
+        PPMode ppMode;
 } PPContext;
 
 
 static inline void linecpy(void *dest, void *src, int lines, int stride)
 {
-	if (stride > 0) {
-		memcpy(dest, src, lines*stride);
-	} else {
-		memcpy(dest+(lines-1)*stride, src+(lines-1)*stride, -lines*stride);
-	}
+        if (stride > 0) {
+                memcpy(dest, src, lines*stride);
+        } else {
+                memcpy(dest+(lines-1)*stride, src+(lines-1)*stride, -lines*stride);
+        }
 }
diff --git a/src/libffmpeg/libavcodec/libpostproc/postprocess_template.c b/src/libffmpeg/libavcodec/libpostproc/postprocess_template.c
index 8f225636e..1171bd2aa 100644
--- a/src/libffmpeg/libavcodec/libpostproc/postprocess_template.c
+++ b/src/libffmpeg/libavcodec/libpostproc/postprocess_template.c
@@ -13,7 +13,7 @@
 
     You should have received a copy of the GNU General Public License
     along with this program; if not, write to the Free Software
-    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */
 
 /**
@@ -58,17 +58,17 @@
 #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t"
 #elif defined (HAVE_MMX)
 #define PMINUB(b,a,t) \
-	"movq " #a ", " #t " \n\t"\
-	"psubusb " #b ", " #t " \n\t"\
-	"psubb " #t ", " #a " \n\t"
+        "movq " #a ", " #t " \n\t"\
+        "psubusb " #b ", " #t " \n\t"\
+        "psubb " #t ", " #a " \n\t"
 #endif
 
 #ifdef HAVE_MMX2
 #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t"
 #elif defined (HAVE_MMX)
 #define PMAXUB(a,b) \
-	"psubusb " #a ", " #b " \n\t"\
-	"paddb " #a ", " #b " \n\t"
+        "psubusb " #a ", " #b " \n\t"\
+        "paddb " #a ", " #b " \n\t"
 #endif
 
 //FIXME? |255-0| = 1 (shouldnt be a problem ...)
@@ -77,116 +77,116 @@
  * Check if the middle 8x8 Block in the given 8x16 block is flat
  */
 static inline int RENAME(vertClassify)(uint8_t src[], int stride, PPContext *c){
-	int numEq= 0, dcOk;
-	src+= stride*4; // src points to begin of the 8x8 Block
+        int numEq= 0, dcOk;
+        src+= stride*4; // src points to begin of the 8x8 Block
 asm volatile(
-		"movq %0, %%mm7					\n\t" 
-		"movq %1, %%mm6					\n\t" 
+                "movq %0, %%mm7                         \n\t"
+                "movq %1, %%mm6                         \n\t"
                 : : "m" (c->mmxDcOffset[c->nonBQP]),  "m" (c->mmxDcThreshold[c->nonBQP])
                 );
-                
+
 asm volatile(
-		"lea (%2, %3), %%"REG_a"			\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%1	eax	eax+%2	eax+2%2	%1+4%2	ecx	ecx+%2	ecx+2%2	%1+8%2	ecx+4%2
-
-		"movq (%2), %%mm0				\n\t"
-		"movq (%%"REG_a"), %%mm1			\n\t"
-                "movq %%mm0, %%mm3				\n\t"
-                "movq %%mm0, %%mm4				\n\t"
+                "lea (%2, %3), %%"REG_a"                \n\t"
+//      0       1       2       3       4       5       6       7       8       9
+//      %1      eax     eax+%2  eax+2%2 %1+4%2  ecx     ecx+%2  ecx+2%2 %1+8%2  ecx+4%2
+
+                "movq (%2), %%mm0                       \n\t"
+                "movq (%%"REG_a"), %%mm1                \n\t"
+                "movq %%mm0, %%mm3                      \n\t"
+                "movq %%mm0, %%mm4                      \n\t"
                 PMAXUB(%%mm1, %%mm4)
                 PMINUB(%%mm1, %%mm3, %%mm5)
-		"psubb %%mm1, %%mm0				\n\t" // mm0 = differnece
-		"paddb %%mm7, %%mm0				\n\t"
-		"pcmpgtb %%mm6, %%mm0				\n\t"
+                "psubb %%mm1, %%mm0                     \n\t" // mm0 = differnece
+                "paddb %%mm7, %%mm0                     \n\t"
+                "pcmpgtb %%mm6, %%mm0                   \n\t"
 
-		"movq (%%"REG_a",%3), %%mm2			\n\t"
+                "movq (%%"REG_a",%3), %%mm2             \n\t"
                 PMAXUB(%%mm2, %%mm4)
                 PMINUB(%%mm2, %%mm3, %%mm5)
-		"psubb %%mm2, %%mm1				\n\t"
-		"paddb %%mm7, %%mm1				\n\t"
-		"pcmpgtb %%mm6, %%mm1				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
+                "psubb %%mm2, %%mm1                     \n\t"
+                "paddb %%mm7, %%mm1                     \n\t"
+                "pcmpgtb %%mm6, %%mm1                   \n\t"
+                "paddb %%mm1, %%mm0                     \n\t"
 
-		"movq (%%"REG_a", %3, 2), %%mm1			\n\t"
+                "movq (%%"REG_a", %3, 2), %%mm1         \n\t"
                 PMAXUB(%%mm1, %%mm4)
                 PMINUB(%%mm1, %%mm3, %%mm5)
-		"psubb %%mm1, %%mm2				\n\t"
-		"paddb %%mm7, %%mm2				\n\t"
-		"pcmpgtb %%mm6, %%mm2				\n\t"
-		"paddb %%mm2, %%mm0				\n\t"
-		
-		"lea (%%"REG_a", %3, 4), %%"REG_a"		\n\t"
-
-		"movq (%2, %3, 4), %%mm2			\n\t"
+                "psubb %%mm1, %%mm2                     \n\t"
+                "paddb %%mm7, %%mm2                     \n\t"
+                "pcmpgtb %%mm6, %%mm2                   \n\t"
+                "paddb %%mm2, %%mm0                     \n\t"
+
+                "lea (%%"REG_a", %3, 4), %%"REG_a"      \n\t"
+
+                "movq (%2, %3, 4), %%mm2                \n\t"
                 PMAXUB(%%mm2, %%mm4)
                 PMINUB(%%mm2, %%mm3, %%mm5)
-		"psubb %%mm2, %%mm1				\n\t"
-		"paddb %%mm7, %%mm1				\n\t"
-		"pcmpgtb %%mm6, %%mm1				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
+                "psubb %%mm2, %%mm1                     \n\t"
+                "paddb %%mm7, %%mm1                     \n\t"
+                "pcmpgtb %%mm6, %%mm1                   \n\t"
+                "paddb %%mm1, %%mm0                     \n\t"
 
-		"movq (%%"REG_a"), %%mm1			\n\t"
+                "movq (%%"REG_a"), %%mm1                \n\t"
                 PMAXUB(%%mm1, %%mm4)
                 PMINUB(%%mm1, %%mm3, %%mm5)
-		"psubb %%mm1, %%mm2				\n\t"
-		"paddb %%mm7, %%mm2				\n\t"
-		"pcmpgtb %%mm6, %%mm2				\n\t"
-		"paddb %%mm2, %%mm0				\n\t"
+                "psubb %%mm1, %%mm2                     \n\t"
+                "paddb %%mm7, %%mm2                     \n\t"
+                "pcmpgtb %%mm6, %%mm2                   \n\t"
+                "paddb %%mm2, %%mm0                     \n\t"
 
-		"movq (%%"REG_a", %3), %%mm2			\n\t"
+                "movq (%%"REG_a", %3), %%mm2            \n\t"
                 PMAXUB(%%mm2, %%mm4)
                 PMINUB(%%mm2, %%mm3, %%mm5)
-		"psubb %%mm2, %%mm1				\n\t"
-		"paddb %%mm7, %%mm1				\n\t"
-		"pcmpgtb %%mm6, %%mm1				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
+                "psubb %%mm2, %%mm1                     \n\t"
+                "paddb %%mm7, %%mm1                     \n\t"
+                "pcmpgtb %%mm6, %%mm1                   \n\t"
+                "paddb %%mm1, %%mm0                     \n\t"
 
-		"movq (%%"REG_a", %3, 2), %%mm1			\n\t"
+                "movq (%%"REG_a", %3, 2), %%mm1         \n\t"
                 PMAXUB(%%mm1, %%mm4)
                 PMINUB(%%mm1, %%mm3, %%mm5)
-		"psubb %%mm1, %%mm2				\n\t"
-		"paddb %%mm7, %%mm2				\n\t"
-		"pcmpgtb %%mm6, %%mm2				\n\t"
-		"paddb %%mm2, %%mm0				\n\t"
-		"psubusb %%mm3, %%mm4				\n\t"
+                "psubb %%mm1, %%mm2                     \n\t"
+                "paddb %%mm7, %%mm2                     \n\t"
+                "pcmpgtb %%mm6, %%mm2                   \n\t"
+                "paddb %%mm2, %%mm0                     \n\t"
+                "psubusb %%mm3, %%mm4                   \n\t"
 
-		"						\n\t"
+                "                                       \n\t"
 #ifdef HAVE_MMX2
-		"pxor %%mm7, %%mm7				\n\t"
-		"psadbw %%mm7, %%mm0				\n\t"
+                "pxor %%mm7, %%mm7                      \n\t"
+                "psadbw %%mm7, %%mm0                    \n\t"
 #else
-		"movq %%mm0, %%mm1				\n\t"
-		"psrlw $8, %%mm0				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
-		"movq %%mm0, %%mm1				\n\t"
-		"psrlq $16, %%mm0				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
-		"movq %%mm0, %%mm1				\n\t"
-		"psrlq $32, %%mm0				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
+                "movq %%mm0, %%mm1                      \n\t"
+                "psrlw $8, %%mm0                        \n\t"
+                "paddb %%mm1, %%mm0                     \n\t"
+                "movq %%mm0, %%mm1                      \n\t"
+                "psrlq $16, %%mm0                       \n\t"
+                "paddb %%mm1, %%mm0                     \n\t"
+                "movq %%mm0, %%mm1                      \n\t"
+                "psrlq $32, %%mm0                       \n\t"
+                "paddb %%mm1, %%mm0                     \n\t"
 #endif
-                "movq %4, %%mm7					\n\t" // QP,..., QP
-		"paddusb %%mm7, %%mm7				\n\t" // 2QP ... 2QP
-		"psubusb %%mm7, %%mm4				\n\t" // Diff <= 2QP -> 0
-		"packssdw %%mm4, %%mm4				\n\t"
-		"movd %%mm0, %0					\n\t"
-		"movd %%mm4, %1					\n\t"
-
-		: "=r" (numEq), "=r" (dcOk)
-		: "r" (src), "r" ((long)stride), "m" (c->pQPb)
-		: "%"REG_a
-		);
-
-	numEq= (-numEq) &0xFF;
-	if(numEq > c->ppMode.flatnessThreshold){
+                "movq %4, %%mm7                         \n\t" // QP,..., QP
+                "paddusb %%mm7, %%mm7                   \n\t" // 2QP ... 2QP
+                "psubusb %%mm7, %%mm4                   \n\t" // Diff <= 2QP -> 0
+                "packssdw %%mm4, %%mm4                  \n\t"
+                "movd %%mm0, %0                         \n\t"
+                "movd %%mm4, %1                         \n\t"
+
+                : "=r" (numEq), "=r" (dcOk)
+                : "r" (src), "r" ((long)stride), "m" (c->pQPb)
+                : "%"REG_a
+                );
+
+        numEq= (-numEq) &0xFF;
+        if(numEq > c->ppMode.flatnessThreshold){
             if(dcOk) return 0;
             else     return 1;
         }else{
             return 2;
         }
 }
-#endif
+#endif //HAVE_MMX
 
 /**
  * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle)
@@ -196,173 +196,173 @@ asm volatile(
 static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
 {
 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
-	src+= stride*3;
-	asm volatile(	//"movv %0 %1 %2\n\t"
-		"movq %2, %%mm0			\n\t"  // QP,..., QP
-		"pxor %%mm4, %%mm4				\n\t"
-
-		"movq (%0), %%mm6				\n\t"
-		"movq (%0, %1), %%mm5				\n\t"
-		"movq %%mm5, %%mm1				\n\t"
-		"movq %%mm6, %%mm2				\n\t"
-		"psubusb %%mm6, %%mm5				\n\t"
-		"psubusb %%mm1, %%mm2				\n\t"
-		"por %%mm5, %%mm2				\n\t" // ABS Diff of lines
-		"psubusb %%mm0, %%mm2				\n\t" // diff <= QP -> 0
-		"pcmpeqb %%mm4, %%mm2			\n\t" // diff <= QP -> FF
-
-		"pand %%mm2, %%mm6				\n\t"
-		"pandn %%mm1, %%mm2				\n\t"
-		"por %%mm2, %%mm6				\n\t"// First Line to Filter
-
-		"movq (%0, %1, 8), %%mm5			\n\t"
-		"lea (%0, %1, 4), %%"REG_a"			\n\t"
-		"lea (%0, %1, 8), %%"REG_c"			\n\t"
-		"sub %1, %%"REG_c"				\n\t"
-		"add %1, %0					\n\t" // %0 points to line 1 not 0
-		"movq (%0, %1, 8), %%mm7			\n\t"
-		"movq %%mm5, %%mm1				\n\t"
-		"movq %%mm7, %%mm2				\n\t"
-		"psubusb %%mm7, %%mm5				\n\t"
-		"psubusb %%mm1, %%mm2				\n\t"
-		"por %%mm5, %%mm2				\n\t" // ABS Diff of lines
-		"psubusb %%mm0, %%mm2				\n\t" // diff <= QP -> 0
-		"pcmpeqb %%mm4, %%mm2			\n\t" // diff <= QP -> FF
-
-		"pand %%mm2, %%mm7				\n\t"
-		"pandn %%mm1, %%mm2				\n\t"
-		"por %%mm2, %%mm7				\n\t" // First Line to Filter
-
-
-		// 	1	2	3	4	5	6	7	8
-		//	%0	%0+%1	%0+2%1	eax	%0+4%1	eax+2%1	ecx	eax+4%1
-		// 6 4 2 2 1 1
-		// 6 4 4 2
-		// 6 8 2
-
-		"movq (%0, %1), %%mm0				\n\t" //  1
-		"movq %%mm0, %%mm1				\n\t" //  1
-		PAVGB(%%mm6, %%mm0)				      //1 1	/2
-		PAVGB(%%mm6, %%mm0)				      //3 1	/4
-
-		"movq (%0, %1, 4), %%mm2			\n\t" //     1
-		"movq %%mm2, %%mm5				\n\t" //     1
-		PAVGB((%%REGa), %%mm2)				      //    11	/2
-		PAVGB((%0, %1, 2), %%mm2)			      //   211	/4
-		"movq %%mm2, %%mm3				\n\t" //   211	/4
-		"movq (%0), %%mm4				\n\t" // 1
-		PAVGB(%%mm4, %%mm3)				      // 4 211	/8
-		PAVGB(%%mm0, %%mm3)				      //642211	/16
-		"movq %%mm3, (%0)				\n\t" // X
-		// mm1=2 mm2=3(211) mm4=1 mm5=5 mm6=0 mm7=9
-		"movq %%mm1, %%mm0				\n\t" //  1
-		PAVGB(%%mm6, %%mm0)				      //1 1	/2
-		"movq %%mm4, %%mm3				\n\t" // 1
-		PAVGB((%0,%1,2), %%mm3)				      // 1 1	/2
-		PAVGB((%%REGa,%1,2), %%mm5)			      //     11	/2
-		PAVGB((%%REGa), %%mm5)				      //    211 /4
-		PAVGB(%%mm5, %%mm3)				      // 2 2211 /8
-		PAVGB(%%mm0, %%mm3)				      //4242211 /16
-		"movq %%mm3, (%0,%1)				\n\t" //  X
-		// mm1=2 mm2=3(211) mm4=1 mm5=4(211) mm6=0 mm7=9
-		PAVGB(%%mm4, %%mm6)				      //11	/2
-		"movq (%%"REG_c"), %%mm0			\n\t" //       1
-		PAVGB((%%REGa, %1, 2), %%mm0)			      //      11/2
-		"movq %%mm0, %%mm3				\n\t" //      11/2
-		PAVGB(%%mm1, %%mm0)				      //  2   11/4
-		PAVGB(%%mm6, %%mm0)				      //222   11/8
-		PAVGB(%%mm2, %%mm0)				      //22242211/16
-		"movq (%0, %1, 2), %%mm2			\n\t" //   1
-		"movq %%mm0, (%0, %1, 2)			\n\t" //   X
-		// mm1=2 mm2=3 mm3=6(11) mm4=1 mm5=4(211) mm6=0(11) mm7=9
-		"movq (%%"REG_a", %1, 4), %%mm0			\n\t" //        1
-		PAVGB((%%REGc), %%mm0)				      //       11	/2
-		PAVGB(%%mm0, %%mm6)				      //11     11	/4
-		PAVGB(%%mm1, %%mm4)				      // 11		/2
-		PAVGB(%%mm2, %%mm1)				      //  11		/2
-		PAVGB(%%mm1, %%mm6)				      //1122   11	/8
-		PAVGB(%%mm5, %%mm6)				      //112242211	/16
-		"movq (%%"REG_a"), %%mm5			\n\t" //    1
-		"movq %%mm6, (%%"REG_a")			\n\t" //    X
-		// mm0=7(11) mm1=2(11) mm2=3 mm3=6(11) mm4=1(11) mm5=4 mm7=9
-		"movq (%%"REG_a", %1, 4), %%mm6			\n\t" //        1
-		PAVGB(%%mm7, %%mm6)				      //        11	/2
-		PAVGB(%%mm4, %%mm6)				      // 11     11	/4
-		PAVGB(%%mm3, %%mm6)				      // 11   2211	/8
-		PAVGB(%%mm5, %%mm2)				      //   11		/2
-		"movq (%0, %1, 4), %%mm4			\n\t" //     1
-		PAVGB(%%mm4, %%mm2)				      //   112		/4
-		PAVGB(%%mm2, %%mm6)				      // 112242211	/16
-		"movq %%mm6, (%0, %1, 4)			\n\t" //     X
-		// mm0=7(11) mm1=2(11) mm2=3(112) mm3=6(11) mm4=5 mm5=4 mm7=9
-		PAVGB(%%mm7, %%mm1)				      //  11     2	/4
-		PAVGB(%%mm4, %%mm5)				      //    11		/2
-		PAVGB(%%mm5, %%mm0)				      //    11 11	/4
-		"movq (%%"REG_a", %1, 2), %%mm6			\n\t" //      1
-		PAVGB(%%mm6, %%mm1)				      //  11  4  2	/8
-		PAVGB(%%mm0, %%mm1)				      //  11224222	/16
-		"movq %%mm1, (%%"REG_a", %1, 2)			\n\t" //      X
-		// mm2=3(112) mm3=6(11) mm4=5 mm5=4(11) mm6=6 mm7=9
-		PAVGB((%%REGc), %%mm2)				      //   112 4	/8
-		"movq (%%"REG_a", %1, 4), %%mm0			\n\t" //        1
-		PAVGB(%%mm0, %%mm6)				      //      1 1	/2
-		PAVGB(%%mm7, %%mm6)				      //      1 12	/4
-		PAVGB(%%mm2, %%mm6)				      //   1122424	/4
-		"movq %%mm6, (%%"REG_c")			\n\t" //       X
-		// mm0=8 mm3=6(11) mm4=5 mm5=4(11) mm7=9
-		PAVGB(%%mm7, %%mm5)				      //    11   2	/4
-		PAVGB(%%mm7, %%mm5)				      //    11   6	/8
-
-		PAVGB(%%mm3, %%mm0)				      //      112	/4
-		PAVGB(%%mm0, %%mm5)				      //    112246	/16
-		"movq %%mm5, (%%"REG_a", %1, 4)			\n\t" //        X
-		"sub %1, %0					\n\t"
-
-		:
-		: "r" (src), "r" ((long)stride), "m" (c->pQPb)
-		: "%"REG_a, "%"REG_c
-	);
-#else
-	const int l1= stride;
-	const int l2= stride + l1;
-	const int l3= stride + l2;
-	const int l4= stride + l3;
-	const int l5= stride + l4;
-	const int l6= stride + l5;
-	const int l7= stride + l6;
-	const int l8= stride + l7;
-	const int l9= stride + l8;
-	int x;
-	src+= stride*3;
-	for(x=0; x<BLOCK_SIZE; x++)
-	{
-		const int first= ABS(src[0] - src[l1]) < c->QP ? src[0] : src[l1];
-		const int last= ABS(src[l8] - src[l9]) < c->QP ? src[l9] : src[l8];
-
-		int sums[10];
-		sums[0] = 4*first + src[l1] + src[l2] + src[l3] + 4;
-		sums[1] = sums[0] - first  + src[l4];
-		sums[2] = sums[1] - first  + src[l5];
-		sums[3] = sums[2] - first  + src[l6];
-		sums[4] = sums[3] - first  + src[l7];
-		sums[5] = sums[4] - src[l1] + src[l8];
-		sums[6] = sums[5] - src[l2] + last;
-		sums[7] = sums[6] - src[l3] + last;
-		sums[8] = sums[7] - src[l4] + last;
-		sums[9] = sums[8] - src[l5] + last;
-
-		src[l1]= (sums[0] + sums[2] + 2*src[l1])>>4;
-		src[l2]= (sums[1] + sums[3] + 2*src[l2])>>4;
-		src[l3]= (sums[2] + sums[4] + 2*src[l3])>>4;
-		src[l4]= (sums[3] + sums[5] + 2*src[l4])>>4;
-		src[l5]= (sums[4] + sums[6] + 2*src[l5])>>4;
-		src[l6]= (sums[5] + sums[7] + 2*src[l6])>>4;
-		src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
-		src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
-
-		src++;
-	}
-#endif
+        src+= stride*3;
+        asm volatile(        //"movv %0 %1 %2\n\t"
+                "movq %2, %%mm0                         \n\t"  // QP,..., QP
+                "pxor %%mm4, %%mm4                      \n\t"
+
+                "movq (%0), %%mm6                       \n\t"
+                "movq (%0, %1), %%mm5                   \n\t"
+                "movq %%mm5, %%mm1                      \n\t"
+                "movq %%mm6, %%mm2                      \n\t"
+                "psubusb %%mm6, %%mm5                   \n\t"
+                "psubusb %%mm1, %%mm2                   \n\t"
+                "por %%mm5, %%mm2                       \n\t" // ABS Diff of lines
+                "psubusb %%mm0, %%mm2                   \n\t" // diff <= QP -> 0
+                "pcmpeqb %%mm4, %%mm2                   \n\t" // diff <= QP -> FF
+
+                "pand %%mm2, %%mm6                      \n\t"
+                "pandn %%mm1, %%mm2                     \n\t"
+                "por %%mm2, %%mm6                       \n\t"// First Line to Filter
+
+                "movq (%0, %1, 8), %%mm5                \n\t"
+                "lea (%0, %1, 4), %%"REG_a"             \n\t"
+                "lea (%0, %1, 8), %%"REG_c"             \n\t"
+                "sub %1, %%"REG_c"                      \n\t"
+                "add %1, %0                             \n\t" // %0 points to line 1 not 0
+                "movq (%0, %1, 8), %%mm7                \n\t"
+                "movq %%mm5, %%mm1                      \n\t"
+                "movq %%mm7, %%mm2                      \n\t"
+                "psubusb %%mm7, %%mm5                   \n\t"
+                "psubusb %%mm1, %%mm2                   \n\t"
+                "por %%mm5, %%mm2                       \n\t" // ABS Diff of lines
+                "psubusb %%mm0, %%mm2                   \n\t" // diff <= QP -> 0
+                "pcmpeqb %%mm4, %%mm2                   \n\t" // diff <= QP -> FF
+
+                "pand %%mm2, %%mm7                      \n\t"
+                "pandn %%mm1, %%mm2                     \n\t"
+                "por %%mm2, %%mm7                       \n\t" // First Line to Filter
+
+
+                //      1       2       3       4       5       6       7       8
+                //      %0      %0+%1   %0+2%1  eax     %0+4%1  eax+2%1 ecx     eax+4%1
+                // 6 4 2 2 1 1
+                // 6 4 4 2
+                // 6 8 2
+
+                "movq (%0, %1), %%mm0                   \n\t" //  1
+                "movq %%mm0, %%mm1                      \n\t" //  1
+                PAVGB(%%mm6, %%mm0)                           //1 1        /2
+                PAVGB(%%mm6, %%mm0)                           //3 1        /4
+
+                "movq (%0, %1, 4), %%mm2                \n\t" //     1
+                "movq %%mm2, %%mm5                      \n\t" //     1
+                PAVGB((%%REGa), %%mm2)                        //    11        /2
+                PAVGB((%0, %1, 2), %%mm2)                     //   211        /4
+                "movq %%mm2, %%mm3                      \n\t" //   211        /4
+                "movq (%0), %%mm4                       \n\t" // 1
+                PAVGB(%%mm4, %%mm3)                           // 4 211        /8
+                PAVGB(%%mm0, %%mm3)                           //642211        /16
+                "movq %%mm3, (%0)                       \n\t" // X
+                // mm1=2 mm2=3(211) mm4=1 mm5=5 mm6=0 mm7=9
+                "movq %%mm1, %%mm0                      \n\t" //  1
+                PAVGB(%%mm6, %%mm0)                           //1 1        /2
+                "movq %%mm4, %%mm3                      \n\t" // 1
+                PAVGB((%0,%1,2), %%mm3)                       // 1 1        /2
+                PAVGB((%%REGa,%1,2), %%mm5)                   //     11        /2
+                PAVGB((%%REGa), %%mm5)                        //    211 /4
+                PAVGB(%%mm5, %%mm3)                           // 2 2211 /8
+                PAVGB(%%mm0, %%mm3)                           //4242211 /16
+                "movq %%mm3, (%0,%1)                    \n\t" //  X
+                // mm1=2 mm2=3(211) mm4=1 mm5=4(211) mm6=0 mm7=9
+                PAVGB(%%mm4, %%mm6)                                   //11        /2
+                "movq (%%"REG_c"), %%mm0                \n\t" //       1
+                PAVGB((%%REGa, %1, 2), %%mm0)                 //      11/2
+                "movq %%mm0, %%mm3                      \n\t" //      11/2
+                PAVGB(%%mm1, %%mm0)                           //  2   11/4
+                PAVGB(%%mm6, %%mm0)                           //222   11/8
+                PAVGB(%%mm2, %%mm0)                           //22242211/16
+                "movq (%0, %1, 2), %%mm2                \n\t" //   1
+                "movq %%mm0, (%0, %1, 2)                \n\t" //   X
+                // mm1=2 mm2=3 mm3=6(11) mm4=1 mm5=4(211) mm6=0(11) mm7=9
+                "movq (%%"REG_a", %1, 4), %%mm0         \n\t" //        1
+                PAVGB((%%REGc), %%mm0)                        //       11        /2
+                PAVGB(%%mm0, %%mm6)                           //11     11        /4
+                PAVGB(%%mm1, %%mm4)                           // 11                /2
+                PAVGB(%%mm2, %%mm1)                           //  11                /2
+                PAVGB(%%mm1, %%mm6)                           //1122   11        /8
+                PAVGB(%%mm5, %%mm6)                           //112242211        /16
+                "movq (%%"REG_a"), %%mm5                \n\t" //    1
+                "movq %%mm6, (%%"REG_a")                \n\t" //    X
+                // mm0=7(11) mm1=2(11) mm2=3 mm3=6(11) mm4=1(11) mm5=4 mm7=9
+                "movq (%%"REG_a", %1, 4), %%mm6         \n\t" //        1
+                PAVGB(%%mm7, %%mm6)                           //        11        /2
+                PAVGB(%%mm4, %%mm6)                           // 11     11        /4
+                PAVGB(%%mm3, %%mm6)                           // 11   2211        /8
+                PAVGB(%%mm5, %%mm2)                           //   11                /2
+                "movq (%0, %1, 4), %%mm4                \n\t" //     1
+                PAVGB(%%mm4, %%mm2)                           //   112                /4
+                PAVGB(%%mm2, %%mm6)                           // 112242211        /16
+                "movq %%mm6, (%0, %1, 4)                \n\t" //     X
+                // mm0=7(11) mm1=2(11) mm2=3(112) mm3=6(11) mm4=5 mm5=4 mm7=9
+                PAVGB(%%mm7, %%mm1)                           //  11     2        /4
+                PAVGB(%%mm4, %%mm5)                           //    11                /2
+                PAVGB(%%mm5, %%mm0)                           //    11 11        /4
+                "movq (%%"REG_a", %1, 2), %%mm6         \n\t" //      1
+                PAVGB(%%mm6, %%mm1)                           //  11  4  2        /8
+                PAVGB(%%mm0, %%mm1)                           //  11224222        /16
+                "movq %%mm1, (%%"REG_a", %1, 2)         \n\t" //      X
+                // mm2=3(112) mm3=6(11) mm4=5 mm5=4(11) mm6=6 mm7=9
+                PAVGB((%%REGc), %%mm2)                        //   112 4        /8
+                "movq (%%"REG_a", %1, 4), %%mm0         \n\t" //        1
+                PAVGB(%%mm0, %%mm6)                           //      1 1        /2
+                PAVGB(%%mm7, %%mm6)                           //      1 12        /4
+                PAVGB(%%mm2, %%mm6)                           //   1122424        /4
+                "movq %%mm6, (%%"REG_c")                \n\t" //       X
+                // mm0=8 mm3=6(11) mm4=5 mm5=4(11) mm7=9
+                PAVGB(%%mm7, %%mm5)                           //    11   2        /4
+                PAVGB(%%mm7, %%mm5)                           //    11   6        /8
+
+                PAVGB(%%mm3, %%mm0)                           //      112        /4
+                PAVGB(%%mm0, %%mm5)                           //    112246        /16
+                "movq %%mm5, (%%"REG_a", %1, 4)         \n\t" //        X
+                "sub %1, %0                             \n\t"
+
+                :
+                : "r" (src), "r" ((long)stride), "m" (c->pQPb)
+                : "%"REG_a, "%"REG_c
+        );
+#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+        const int l1= stride;
+        const int l2= stride + l1;
+        const int l3= stride + l2;
+        const int l4= stride + l3;
+        const int l5= stride + l4;
+        const int l6= stride + l5;
+        const int l7= stride + l6;
+        const int l8= stride + l7;
+        const int l9= stride + l8;
+        int x;
+        src+= stride*3;
+        for(x=0; x<BLOCK_SIZE; x++)
+        {
+                const int first= ABS(src[0] - src[l1]) < c->QP ? src[0] : src[l1];
+                const int last= ABS(src[l8] - src[l9]) < c->QP ? src[l9] : src[l8];
+
+                int sums[10];
+                sums[0] = 4*first + src[l1] + src[l2] + src[l3] + 4;
+                sums[1] = sums[0] - first  + src[l4];
+                sums[2] = sums[1] - first  + src[l5];
+                sums[3] = sums[2] - first  + src[l6];
+                sums[4] = sums[3] - first  + src[l7];
+                sums[5] = sums[4] - src[l1] + src[l8];
+                sums[6] = sums[5] - src[l2] + last;
+                sums[7] = sums[6] - src[l3] + last;
+                sums[8] = sums[7] - src[l4] + last;
+                sums[9] = sums[8] - src[l5] + last;
+
+                src[l1]= (sums[0] + sums[2] + 2*src[l1])>>4;
+                src[l2]= (sums[1] + sums[3] + 2*src[l2])>>4;
+                src[l3]= (sums[2] + sums[4] + 2*src[l3])>>4;
+                src[l4]= (sums[3] + sums[5] + 2*src[l4])>>4;
+                src[l5]= (sums[4] + sums[6] + 2*src[l5])>>4;
+                src[l6]= (sums[5] + sums[7] + 2*src[l6])>>4;
+                src[l7]= (sums[6] + sums[8] + 2*src[l7])>>4;
+                src[l8]= (sums[7] + sums[9] + 2*src[l8])>>4;
+
+                src++;
+        }
+#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
 }
 #endif //HAVE_ALTIVEC
 
@@ -372,105 +372,105 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
  * values are correctly clipped (MMX2)
  * values are wraparound (C)
  * conclusion: its fast, but introduces ugly horizontal patterns if there is a continious gradient
-	0 8 16 24
-	x = 8
-	x/2 = 4
-	x/8 = 1
-	1 12 12 23
+        0 8 16 24
+        x = 8
+        x/2 = 4
+        x/8 = 1
+        1 12 12 23
  */
 static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
 {
 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
-	src+= stride*3;
+        src+= stride*3;
 // FIXME rounding
-	asm volatile(
-		"pxor %%mm7, %%mm7				\n\t" // 0
-		"movq "MANGLE(b80)", %%mm6			\n\t" // MIN_SIGNED_BYTE
-		"leal (%0, %1), %%"REG_a"			\n\t"
-		"leal (%%"REG_a", %1, 4), %%"REG_c"		\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	ecx	ecx+%1	ecx+2%1	%0+8%1	ecx+4%1
-		"movq "MANGLE(pQPb)", %%mm0			\n\t" // QP,..., QP
-		"movq %%mm0, %%mm1				\n\t" // QP,..., QP
-		"paddusb "MANGLE(b02)", %%mm0			\n\t"
-		"psrlw $2, %%mm0				\n\t"
-		"pand "MANGLE(b3F)", %%mm0			\n\t" // QP/4,..., QP/4
-		"paddusb %%mm1, %%mm0				\n\t" // QP*1.25 ...
-		"movq (%0, %1, 4), %%mm2			\n\t" // line 4
-		"movq (%%"REG_c"), %%mm3				\n\t" // line 5
-		"movq %%mm2, %%mm4				\n\t" // line 4
-		"pcmpeqb %%mm5, %%mm5				\n\t" // -1
-		"pxor %%mm2, %%mm5				\n\t" // -line 4 - 1
-		PAVGB(%%mm3, %%mm5)
-		"paddb %%mm6, %%mm5				\n\t" // (l5-l4)/2
-		"psubusb %%mm3, %%mm4				\n\t"
-		"psubusb %%mm2, %%mm3				\n\t"
-		"por %%mm3, %%mm4				\n\t" // |l4 - l5|
-		"psubusb %%mm0, %%mm4				\n\t"
-		"pcmpeqb %%mm7, %%mm4				\n\t"
-		"pand %%mm4, %%mm5				\n\t" // d/2
-
-//		"paddb %%mm6, %%mm2				\n\t" // line 4 + 0x80
-		"paddb %%mm5, %%mm2				\n\t"
-//		"psubb %%mm6, %%mm2				\n\t"
-		"movq %%mm2, (%0,%1, 4)				\n\t"
-
-		"movq (%%"REG_c"), %%mm2				\n\t"
-//		"paddb %%mm6, %%mm2				\n\t" // line 5 + 0x80
-		"psubb %%mm5, %%mm2				\n\t"
-//		"psubb %%mm6, %%mm2				\n\t"
-		"movq %%mm2, (%%"REG_c")				\n\t"
-
-		"paddb %%mm6, %%mm5				\n\t"
-		"psrlw $2, %%mm5				\n\t"
-		"pand "MANGLE(b3F)", %%mm5			\n\t"
-		"psubb "MANGLE(b20)", %%mm5			\n\t" // (l5-l4)/8
-
-		"movq (%%"REG_a", %1, 2), %%mm2			\n\t"
-		"paddb %%mm6, %%mm2				\n\t" // line 3 + 0x80
-		"paddsb %%mm5, %%mm2				\n\t"
-		"psubb %%mm6, %%mm2				\n\t"
-		"movq %%mm2, (%%"REG_a", %1, 2)			\n\t"
-
-		"movq (%%"REG_c", %1), %%mm2			\n\t"
-		"paddb %%mm6, %%mm2				\n\t" // line 6 + 0x80
-		"psubsb %%mm5, %%mm2				\n\t"
-		"psubb %%mm6, %%mm2				\n\t"
-		"movq %%mm2, (%%"REG_c", %1)			\n\t"
-
-		:
-		: "r" (src), "r" ((long)stride)
-		: "%"REG_a, "%"REG_c
-	);
-#else
- 	const int l1= stride;
-	const int l2= stride + l1;
-	const int l3= stride + l2;
-	const int l4= stride + l3;
-	const int l5= stride + l4;
-	const int l6= stride + l5;
-//	const int l7= stride + l6;
-//	const int l8= stride + l7;
-//	const int l9= stride + l8;
-	int x;
-	const int QP15= QP + (QP>>2);
-	src+= stride*3;
-	for(x=0; x<BLOCK_SIZE; x++)
-	{
-		const int v = (src[x+l5] - src[x+l4]);
-		if(ABS(v) < QP15)
-		{
-			src[x+l3] +=v>>3;
-			src[x+l4] +=v>>1;
-			src[x+l5] -=v>>1;
-			src[x+l6] -=v>>3;
-
-		}
-	}
+        asm volatile(
+                "pxor %%mm7, %%mm7                      \n\t" // 0
+                "movq "MANGLE(b80)", %%mm6              \n\t" // MIN_SIGNED_BYTE
+                "leal (%0, %1), %%"REG_a"               \n\t"
+                "leal (%%"REG_a", %1, 4), %%"REG_c"     \n\t"
+//      0       1       2       3       4       5       6       7       8       9
+//      %0      eax     eax+%1  eax+2%1 %0+4%1  ecx     ecx+%1  ecx+2%1 %0+8%1  ecx+4%1
+                "movq "MANGLE(pQPb)", %%mm0             \n\t" // QP,..., QP
+                "movq %%mm0, %%mm1                      \n\t" // QP,..., QP
+                "paddusb "MANGLE(b02)", %%mm0           \n\t"
+                "psrlw $2, %%mm0                        \n\t"
+                "pand "MANGLE(b3F)", %%mm0              \n\t" // QP/4,..., QP/4
+                "paddusb %%mm1, %%mm0                   \n\t" // QP*1.25 ...
+                "movq (%0, %1, 4), %%mm2                \n\t" // line 4
+                "movq (%%"REG_c"), %%mm3                \n\t" // line 5
+                "movq %%mm2, %%mm4                      \n\t" // line 4
+                "pcmpeqb %%mm5, %%mm5                   \n\t" // -1
+                "pxor %%mm2, %%mm5                      \n\t" // -line 4 - 1
+                PAVGB(%%mm3, %%mm5)
+                "paddb %%mm6, %%mm5                     \n\t" // (l5-l4)/2
+                "psubusb %%mm3, %%mm4                   \n\t"
+                "psubusb %%mm2, %%mm3                   \n\t"
+                "por %%mm3, %%mm4                       \n\t" // |l4 - l5|
+                "psubusb %%mm0, %%mm4                   \n\t"
+                "pcmpeqb %%mm7, %%mm4                   \n\t"
+                "pand %%mm4, %%mm5                      \n\t" // d/2
+
+//                "paddb %%mm6, %%mm2                     \n\t" // line 4 + 0x80
+                "paddb %%mm5, %%mm2                     \n\t"
+//                "psubb %%mm6, %%mm2                     \n\t"
+                "movq %%mm2, (%0,%1, 4)                 \n\t"
+
+                "movq (%%"REG_c"), %%mm2                \n\t"
+//                "paddb %%mm6, %%mm2                     \n\t" // line 5 + 0x80
+                "psubb %%mm5, %%mm2                     \n\t"
+//                "psubb %%mm6, %%mm2                     \n\t"
+                "movq %%mm2, (%%"REG_c")                \n\t"
+
+                "paddb %%mm6, %%mm5                     \n\t"
+                "psrlw $2, %%mm5                        \n\t"
+                "pand "MANGLE(b3F)", %%mm5              \n\t"
+                "psubb "MANGLE(b20)", %%mm5             \n\t" // (l5-l4)/8
+
+                "movq (%%"REG_a", %1, 2), %%mm2         \n\t"
+                "paddb %%mm6, %%mm2                     \n\t" // line 3 + 0x80
+                "paddsb %%mm5, %%mm2                    \n\t"
+                "psubb %%mm6, %%mm2                     \n\t"
+                "movq %%mm2, (%%"REG_a", %1, 2)         \n\t"
+
+                "movq (%%"REG_c", %1), %%mm2            \n\t"
+                "paddb %%mm6, %%mm2                     \n\t" // line 6 + 0x80
+                "psubsb %%mm5, %%mm2                    \n\t"
+                "psubb %%mm6, %%mm2                     \n\t"
+                "movq %%mm2, (%%"REG_c", %1)            \n\t"
+
+                :
+                : "r" (src), "r" ((long)stride)
+                : "%"REG_a, "%"REG_c
+        );
+#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+         const int l1= stride;
+        const int l2= stride + l1;
+        const int l3= stride + l2;
+        const int l4= stride + l3;
+        const int l5= stride + l4;
+        const int l6= stride + l5;
+//        const int l7= stride + l6;
+//        const int l8= stride + l7;
+//        const int l9= stride + l8;
+        int x;
+        const int QP15= QP + (QP>>2);
+        src+= stride*3;
+        for(x=0; x<BLOCK_SIZE; x++)
+        {
+                const int v = (src[x+l5] - src[x+l4]);
+                if(ABS(v) < QP15)
+                {
+                        src[x+l3] +=v>>3;
+                        src[x+l4] +=v>>1;
+                        src[x+l5] -=v>>1;
+                        src[x+l6] -=v>>3;
+
+                }
+        }
 
-#endif
+#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
 }
-#endif
+#endif //0
 
 /**
  * Experimental Filter 1
@@ -482,129 +482,129 @@ static inline void RENAME(vertRK1Filter)(uint8_t *src, int stride, int QP)
 static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
 {
 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
-	src+= stride*3;
-
-	asm volatile(
-		"pxor %%mm7, %%mm7				\n\t" // 0
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_c"		\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	ecx	ecx+%1	ecx+2%1	%0+8%1	ecx+4%1
-		"movq (%%"REG_a", %1, 2), %%mm0			\n\t" // line 3
-		"movq (%0, %1, 4), %%mm1			\n\t" // line 4
-		"movq %%mm1, %%mm2				\n\t" // line 4
-		"psubusb %%mm0, %%mm1				\n\t"
-		"psubusb %%mm2, %%mm0				\n\t"
-		"por %%mm1, %%mm0				\n\t" // |l2 - l3|
-		"movq (%%"REG_c"), %%mm3				\n\t" // line 5
-		"movq (%%"REG_c", %1), %%mm4			\n\t" // line 6
-		"movq %%mm3, %%mm5				\n\t" // line 5
-		"psubusb %%mm4, %%mm3				\n\t"
-		"psubusb %%mm5, %%mm4				\n\t"
-		"por %%mm4, %%mm3				\n\t" // |l5 - l6|
-		PAVGB(%%mm3, %%mm0)				      // (|l2 - l3| + |l5 - l6|)/2
-		"movq %%mm2, %%mm1				\n\t" // line 4
-		"psubusb %%mm5, %%mm2				\n\t"
-		"movq %%mm2, %%mm4				\n\t"
-		"pcmpeqb %%mm7, %%mm2				\n\t" // (l4 - l5) <= 0 ? -1 : 0
-		"psubusb %%mm1, %%mm5				\n\t"
-		"por %%mm5, %%mm4				\n\t" // |l4 - l5|
-		"psubusb %%mm0, %%mm4		\n\t" //d = MAX(0, |l4-l5| - (|l2-l3| + |l5-l6|)/2)
-		"movq %%mm4, %%mm3				\n\t" // d
-		"movq %2, %%mm0			\n\t"
-                "paddusb %%mm0, %%mm0				\n\t"
-		"psubusb %%mm0, %%mm4				\n\t"
-		"pcmpeqb %%mm7, %%mm4				\n\t" // d <= QP ? -1 : 0
-		"psubusb "MANGLE(b01)", %%mm3			\n\t"
-		"pand %%mm4, %%mm3				\n\t" // d <= QP ? d : 0
-
-		PAVGB(%%mm7, %%mm3)				      // d/2
-		"movq %%mm3, %%mm1				\n\t" // d/2
-		PAVGB(%%mm7, %%mm3)				      // d/4
-		PAVGB(%%mm1, %%mm3)				      // 3*d/8
-
-		"movq (%0, %1, 4), %%mm0			\n\t" // line 4
-		"pxor %%mm2, %%mm0				\n\t" //(l4 - l5) <= 0 ? -l4-1 : l4
-		"psubusb %%mm3, %%mm0				\n\t"
-		"pxor %%mm2, %%mm0				\n\t"
-		"movq %%mm0, (%0, %1, 4)			\n\t" // line 4
-
-		"movq (%%"REG_c"), %%mm0			\n\t" // line 5
-		"pxor %%mm2, %%mm0				\n\t" //(l4 - l5) <= 0 ? -l5-1 : l5
-		"paddusb %%mm3, %%mm0				\n\t"
-		"pxor %%mm2, %%mm0				\n\t"
-		"movq %%mm0, (%%"REG_c")			\n\t" // line 5
-
-		PAVGB(%%mm7, %%mm1)				      // d/4
-
-		"movq (%%"REG_a", %1, 2), %%mm0			\n\t" // line 3
-		"pxor %%mm2, %%mm0				\n\t" //(l4 - l5) <= 0 ? -l4-1 : l4
-		"psubusb %%mm1, %%mm0				\n\t"
-		"pxor %%mm2, %%mm0				\n\t"
-		"movq %%mm0, (%%"REG_a", %1, 2)			\n\t" // line 3
-
-		"movq (%%"REG_c", %1), %%mm0			\n\t" // line 6
-		"pxor %%mm2, %%mm0				\n\t" //(l4 - l5) <= 0 ? -l5-1 : l5
-		"paddusb %%mm1, %%mm0				\n\t"
-		"pxor %%mm2, %%mm0				\n\t"
-		"movq %%mm0, (%%"REG_c", %1)			\n\t" // line 6
-
-		PAVGB(%%mm7, %%mm1)				      // d/8
-
-		"movq (%%"REG_a", %1), %%mm0			\n\t" // line 2
-		"pxor %%mm2, %%mm0				\n\t" //(l4 - l5) <= 0 ? -l2-1 : l2
-		"psubusb %%mm1, %%mm0				\n\t"
-		"pxor %%mm2, %%mm0				\n\t"
-		"movq %%mm0, (%%"REG_a", %1)			\n\t" // line 2
-
-		"movq (%%"REG_c", %1, 2), %%mm0			\n\t" // line 7
-		"pxor %%mm2, %%mm0				\n\t" //(l4 - l5) <= 0 ? -l7-1 : l7
-		"paddusb %%mm1, %%mm0				\n\t"
-		"pxor %%mm2, %%mm0				\n\t"
-		"movq %%mm0, (%%"REG_c", %1, 2)			\n\t" // line 7
-
-		:
-		: "r" (src), "r" ((long)stride), "m" (co->pQPb)
-		: "%"REG_a, "%"REG_c
-	);
-#else
-
- 	const int l1= stride;
-	const int l2= stride + l1;
-	const int l3= stride + l2;
-	const int l4= stride + l3;
-	const int l5= stride + l4;
-	const int l6= stride + l5;
-	const int l7= stride + l6;
-//	const int l8= stride + l7;
-//	const int l9= stride + l8;
-	int x;
-
-	src+= stride*3;
-	for(x=0; x<BLOCK_SIZE; x++)
-	{
-		int a= src[l3] - src[l4];
-		int b= src[l4] - src[l5];
-		int c= src[l5] - src[l6];
-
-		int d= ABS(b) - ((ABS(a) + ABS(c))>>1);
-		d= MAX(d, 0);
-
-		if(d < co->QP*2)
-		{
-			int v = d * SIGN(-b);
-
-			src[l2] +=v>>3;
-			src[l3] +=v>>2;
-			src[l4] +=(3*v)>>3;
-			src[l5] -=(3*v)>>3;
-			src[l6] -=v>>2;
-			src[l7] -=v>>3;
-
-		}
-		src++;
-	}
-#endif
+        src+= stride*3;
+
+        asm volatile(
+                "pxor %%mm7, %%mm7                      \n\t" // 0
+                "lea (%0, %1), %%"REG_a"                \n\t"
+                "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
+//      0       1       2       3       4       5       6       7       8       9
+//      %0      eax     eax+%1  eax+2%1 %0+4%1  ecx     ecx+%1  ecx+2%1 %0+8%1  ecx+4%1
+                "movq (%%"REG_a", %1, 2), %%mm0         \n\t" // line 3
+                "movq (%0, %1, 4), %%mm1                \n\t" // line 4
+                "movq %%mm1, %%mm2                      \n\t" // line 4
+                "psubusb %%mm0, %%mm1                   \n\t"
+                "psubusb %%mm2, %%mm0                   \n\t"
+                "por %%mm1, %%mm0                       \n\t" // |l2 - l3|
+                "movq (%%"REG_c"), %%mm3                \n\t" // line 5
+                "movq (%%"REG_c", %1), %%mm4            \n\t" // line 6
+                "movq %%mm3, %%mm5                      \n\t" // line 5
+                "psubusb %%mm4, %%mm3                   \n\t"
+                "psubusb %%mm5, %%mm4                   \n\t"
+                "por %%mm4, %%mm3                       \n\t" // |l5 - l6|
+                PAVGB(%%mm3, %%mm0)                           // (|l2 - l3| + |l5 - l6|)/2
+                "movq %%mm2, %%mm1                      \n\t" // line 4
+                "psubusb %%mm5, %%mm2                   \n\t"
+                "movq %%mm2, %%mm4                      \n\t"
+                "pcmpeqb %%mm7, %%mm2                   \n\t" // (l4 - l5) <= 0 ? -1 : 0
+                "psubusb %%mm1, %%mm5                   \n\t"
+                "por %%mm5, %%mm4                       \n\t" // |l4 - l5|
+                "psubusb %%mm0, %%mm4                   \n\t" //d = MAX(0, |l4-l5| - (|l2-l3| + |l5-l6|)/2)
+                "movq %%mm4, %%mm3                      \n\t" // d
+                "movq %2, %%mm0                         \n\t"
+                "paddusb %%mm0, %%mm0                   \n\t"
+                "psubusb %%mm0, %%mm4                   \n\t"
+                "pcmpeqb %%mm7, %%mm4                   \n\t" // d <= QP ? -1 : 0
+                "psubusb "MANGLE(b01)", %%mm3           \n\t"
+                "pand %%mm4, %%mm3                      \n\t" // d <= QP ? d : 0
+
+                PAVGB(%%mm7, %%mm3)                           // d/2
+                "movq %%mm3, %%mm1                      \n\t" // d/2
+                PAVGB(%%mm7, %%mm3)                           // d/4
+                PAVGB(%%mm1, %%mm3)                           // 3*d/8
+
+                "movq (%0, %1, 4), %%mm0                \n\t" // line 4
+                "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l4-1 : l4
+                "psubusb %%mm3, %%mm0                   \n\t"
+                "pxor %%mm2, %%mm0                      \n\t"
+                "movq %%mm0, (%0, %1, 4)                \n\t" // line 4
+
+                "movq (%%"REG_c"), %%mm0                \n\t" // line 5
+                "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l5-1 : l5
+                "paddusb %%mm3, %%mm0                   \n\t"
+                "pxor %%mm2, %%mm0                      \n\t"
+                "movq %%mm0, (%%"REG_c")                \n\t" // line 5
+
+                PAVGB(%%mm7, %%mm1)                           // d/4
+
+                "movq (%%"REG_a", %1, 2), %%mm0         \n\t" // line 3
+                "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l4-1 : l4
+                "psubusb %%mm1, %%mm0                   \n\t"
+                "pxor %%mm2, %%mm0                      \n\t"
+                "movq %%mm0, (%%"REG_a", %1, 2)         \n\t" // line 3
+
+                "movq (%%"REG_c", %1), %%mm0            \n\t" // line 6
+                "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l5-1 : l5
+                "paddusb %%mm1, %%mm0                   \n\t"
+                "pxor %%mm2, %%mm0                      \n\t"
+                "movq %%mm0, (%%"REG_c", %1)            \n\t" // line 6
+
+                PAVGB(%%mm7, %%mm1)                           // d/8
+
+                "movq (%%"REG_a", %1), %%mm0            \n\t" // line 2
+                "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l2-1 : l2
+                "psubusb %%mm1, %%mm0                   \n\t"
+                "pxor %%mm2, %%mm0                      \n\t"
+                "movq %%mm0, (%%"REG_a", %1)            \n\t" // line 2
+
+                "movq (%%"REG_c", %1, 2), %%mm0         \n\t" // line 7
+                "pxor %%mm2, %%mm0                      \n\t" //(l4 - l5) <= 0 ? -l7-1 : l7
+                "paddusb %%mm1, %%mm0                   \n\t"
+                "pxor %%mm2, %%mm0                      \n\t"
+                "movq %%mm0, (%%"REG_c", %1, 2)         \n\t" // line 7
+
+                :
+                : "r" (src), "r" ((long)stride), "m" (co->pQPb)
+                : "%"REG_a, "%"REG_c
+        );
+#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+
+        const int l1= stride;
+        const int l2= stride + l1;
+        const int l3= stride + l2;
+        const int l4= stride + l3;
+        const int l5= stride + l4;
+        const int l6= stride + l5;
+        const int l7= stride + l6;
+//        const int l8= stride + l7;
+//        const int l9= stride + l8;
+        int x;
+
+        src+= stride*3;
+        for(x=0; x<BLOCK_SIZE; x++)
+        {
+                int a= src[l3] - src[l4];
+                int b= src[l4] - src[l5];
+                int c= src[l5] - src[l6];
+
+                int d= ABS(b) - ((ABS(a) + ABS(c))>>1);
+                d= MAX(d, 0);
+
+                if(d < co->QP*2)
+                {
+                        int v = d * SIGN(-b);
+
+                        src[l2] +=v>>3;
+                        src[l3] +=v>>2;
+                        src[l4] +=(3*v)>>3;
+                        src[l5] -=(3*v)>>3;
+                        src[l6] -=v>>2;
+                        src[l7] -=v>>3;
+
+                }
+                src++;
+        }
+#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
 }
 
 #ifndef HAVE_ALTIVEC
@@ -612,570 +612,570 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
 {
 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
 /*
-	uint8_t tmp[16];
-	const int l1= stride;
-	const int l2= stride + l1;
-	const int l3= stride + l2;
-	const int l4= (int)tmp - (int)src - stride*3;
-	const int l5= (int)tmp - (int)src - stride*3 + 8;
-	const int l6= stride*3 + l3;
-	const int l7= stride + l6;
-	const int l8= stride + l7;
-
-	memcpy(tmp, src+stride*7, 8);
-	memcpy(tmp+8, src+stride*8, 8);
+        uint8_t tmp[16];
+        const int l1= stride;
+        const int l2= stride + l1;
+        const int l3= stride + l2;
+        const int l4= (int)tmp - (int)src - stride*3;
+        const int l5= (int)tmp - (int)src - stride*3 + 8;
+        const int l6= stride*3 + l3;
+        const int l7= stride + l6;
+        const int l8= stride + l7;
+
+        memcpy(tmp, src+stride*7, 8);
+        memcpy(tmp+8, src+stride*8, 8);
 */
-	src+= stride*4;
-	asm volatile(
+        src+= stride*4;
+        asm volatile(
 
 #if 0 //sligtly more accurate and slightly slower
-		"pxor %%mm7, %%mm7				\n\t" // 0
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_c"		\n\t"
-//	0	1	2	3	4	5	6	7
-//	%0	%0+%1	%0+2%1	eax+2%1	%0+4%1	eax+4%1	ecx+%1	ecx+2%1
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	ecx	ecx+%1	ecx+2%1
-
-
-		"movq (%0, %1, 2), %%mm0			\n\t" // l2
-		"movq (%0), %%mm1				\n\t" // l0
-		"movq %%mm0, %%mm2				\n\t" // l2
-		PAVGB(%%mm7, %%mm0)				      // ~l2/2
-		PAVGB(%%mm1, %%mm0)				      // ~(l2 + 2l0)/4
-		PAVGB(%%mm2, %%mm0)				      // ~(5l2 + 2l0)/8
-
-		"movq (%%"REG_a"), %%mm1			\n\t" // l1
-		"movq (%%"REG_a", %1, 2), %%mm3			\n\t" // l3
-		"movq %%mm1, %%mm4				\n\t" // l1
-		PAVGB(%%mm7, %%mm1)				      // ~l1/2
-		PAVGB(%%mm3, %%mm1)				      // ~(l1 + 2l3)/4
-		PAVGB(%%mm4, %%mm1)				      // ~(5l1 + 2l3)/8
-
-		"movq %%mm0, %%mm4				\n\t" // ~(5l2 + 2l0)/8
-		"psubusb %%mm1, %%mm0				\n\t"
-		"psubusb %%mm4, %%mm1				\n\t"
-		"por %%mm0, %%mm1				\n\t" // ~|2l0 - 5l1 + 5l2 - 2l3|/8
+                "pxor %%mm7, %%mm7                      \n\t" // 0
+                "lea (%0, %1), %%"REG_a"                \n\t"
+                "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
+//      0       1       2       3       4       5       6       7
+//      %0      %0+%1   %0+2%1  eax+2%1 %0+4%1  eax+4%1 ecx+%1  ecx+2%1
+//      %0      eax     eax+%1  eax+2%1 %0+4%1  ecx     ecx+%1  ecx+2%1
+
+
+                "movq (%0, %1, 2), %%mm0                \n\t" // l2
+                "movq (%0), %%mm1                       \n\t" // l0
+                "movq %%mm0, %%mm2                      \n\t" // l2
+                PAVGB(%%mm7, %%mm0)                           // ~l2/2
+                PAVGB(%%mm1, %%mm0)                           // ~(l2 + 2l0)/4
+                PAVGB(%%mm2, %%mm0)                           // ~(5l2 + 2l0)/8
+
+                "movq (%%"REG_a"), %%mm1                \n\t" // l1
+                "movq (%%"REG_a", %1, 2), %%mm3         \n\t" // l3
+                "movq %%mm1, %%mm4                      \n\t" // l1
+                PAVGB(%%mm7, %%mm1)                           // ~l1/2
+                PAVGB(%%mm3, %%mm1)                           // ~(l1 + 2l3)/4
+                PAVGB(%%mm4, %%mm1)                           // ~(5l1 + 2l3)/8
+
+                "movq %%mm0, %%mm4                      \n\t" // ~(5l2 + 2l0)/8
+                "psubusb %%mm1, %%mm0                   \n\t"
+                "psubusb %%mm4, %%mm1                   \n\t"
+                "por %%mm0, %%mm1                       \n\t" // ~|2l0 - 5l1 + 5l2 - 2l3|/8
 // mm1= |lenergy|, mm2= l2, mm3= l3, mm7=0
 
-		"movq (%0, %1, 4), %%mm0			\n\t" // l4
-		"movq %%mm0, %%mm4				\n\t" // l4
-		PAVGB(%%mm7, %%mm0)				      // ~l4/2
-		PAVGB(%%mm2, %%mm0)				      // ~(l4 + 2l2)/4
-		PAVGB(%%mm4, %%mm0)				      // ~(5l4 + 2l2)/8
-
-		"movq (%%"REG_c"), %%mm2			\n\t" // l5
-		"movq %%mm3, %%mm5				\n\t" // l3
-		PAVGB(%%mm7, %%mm3)				      // ~l3/2
-		PAVGB(%%mm2, %%mm3)				      // ~(l3 + 2l5)/4
-		PAVGB(%%mm5, %%mm3)				      // ~(5l3 + 2l5)/8
-
-		"movq %%mm0, %%mm6				\n\t" // ~(5l4 + 2l2)/8
-		"psubusb %%mm3, %%mm0				\n\t"
-		"psubusb %%mm6, %%mm3				\n\t"
-		"por %%mm0, %%mm3				\n\t" // ~|2l2 - 5l3 + 5l4 - 2l5|/8
-		"pcmpeqb %%mm7, %%mm0				\n\t" // SIGN(2l2 - 5l3 + 5l4 - 2l5)
+                "movq (%0, %1, 4), %%mm0                \n\t" // l4
+                "movq %%mm0, %%mm4                      \n\t" // l4
+                PAVGB(%%mm7, %%mm0)                           // ~l4/2
+                PAVGB(%%mm2, %%mm0)                           // ~(l4 + 2l2)/4
+                PAVGB(%%mm4, %%mm0)                           // ~(5l4 + 2l2)/8
+
+                "movq (%%"REG_c"), %%mm2                \n\t" // l5
+                "movq %%mm3, %%mm5                      \n\t" // l3
+                PAVGB(%%mm7, %%mm3)                           // ~l3/2
+                PAVGB(%%mm2, %%mm3)                           // ~(l3 + 2l5)/4
+                PAVGB(%%mm5, %%mm3)                           // ~(5l3 + 2l5)/8
+
+                "movq %%mm0, %%mm6                      \n\t" // ~(5l4 + 2l2)/8
+                "psubusb %%mm3, %%mm0                   \n\t"
+                "psubusb %%mm6, %%mm3                   \n\t"
+                "por %%mm0, %%mm3                       \n\t" // ~|2l2 - 5l3 + 5l4 - 2l5|/8
+                "pcmpeqb %%mm7, %%mm0                   \n\t" // SIGN(2l2 - 5l3 + 5l4 - 2l5)
 // mm0= SIGN(menergy), mm1= |lenergy|, mm2= l5, mm3= |menergy|, mm4=l4, mm5= l3, mm7=0
 
-		"movq (%%"REG_c", %1), %%mm6			\n\t" // l6
-		"movq %%mm6, %%mm5				\n\t" // l6
-		PAVGB(%%mm7, %%mm6)				      // ~l6/2
-		PAVGB(%%mm4, %%mm6)				      // ~(l6 + 2l4)/4
-		PAVGB(%%mm5, %%mm6)				      // ~(5l6 + 2l4)/8
-
-		"movq (%%"REG_c", %1, 2), %%mm5			\n\t" // l7
-		"movq %%mm2, %%mm4				\n\t" // l5
-		PAVGB(%%mm7, %%mm2)				      // ~l5/2
-		PAVGB(%%mm5, %%mm2)				      // ~(l5 + 2l7)/4
-		PAVGB(%%mm4, %%mm2)				      // ~(5l5 + 2l7)/8
-
-		"movq %%mm6, %%mm4				\n\t" // ~(5l6 + 2l4)/8
-		"psubusb %%mm2, %%mm6				\n\t"
-		"psubusb %%mm4, %%mm2				\n\t"
-		"por %%mm6, %%mm2				\n\t" // ~|2l4 - 5l5 + 5l6 - 2l7|/8
+                "movq (%%"REG_c", %1), %%mm6            \n\t" // l6
+                "movq %%mm6, %%mm5                      \n\t" // l6
+                PAVGB(%%mm7, %%mm6)                           // ~l6/2
+                PAVGB(%%mm4, %%mm6)                           // ~(l6 + 2l4)/4
+                PAVGB(%%mm5, %%mm6)                           // ~(5l6 + 2l4)/8
+
+                "movq (%%"REG_c", %1, 2), %%mm5         \n\t" // l7
+                "movq %%mm2, %%mm4                      \n\t" // l5
+                PAVGB(%%mm7, %%mm2)                           // ~l5/2
+                PAVGB(%%mm5, %%mm2)                           // ~(l5 + 2l7)/4
+                PAVGB(%%mm4, %%mm2)                           // ~(5l5 + 2l7)/8
+
+                "movq %%mm6, %%mm4                      \n\t" // ~(5l6 + 2l4)/8
+                "psubusb %%mm2, %%mm6                   \n\t"
+                "psubusb %%mm4, %%mm2                   \n\t"
+                "por %%mm6, %%mm2                       \n\t" // ~|2l4 - 5l5 + 5l6 - 2l7|/8
 // mm0= SIGN(menergy), mm1= |lenergy|/8, mm2= |renergy|/8, mm3= |menergy|/8, mm7=0
 
 
-		PMINUB(%%mm2, %%mm1, %%mm4)			      // MIN(|lenergy|,|renergy|)/8
-		"movq %2, %%mm4					\n\t" // QP //FIXME QP+1 ?
-		"paddusb "MANGLE(b01)", %%mm4			\n\t"
-		"pcmpgtb %%mm3, %%mm4				\n\t" // |menergy|/8 < QP
-		"psubusb %%mm1, %%mm3				\n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8
-		"pand %%mm4, %%mm3				\n\t"
-
-		"movq %%mm3, %%mm1				\n\t"
-//		"psubusb "MANGLE(b01)", %%mm3			\n\t"
-		PAVGB(%%mm7, %%mm3)
-		PAVGB(%%mm7, %%mm3)
-		"paddusb %%mm1, %%mm3				\n\t"
-//		"paddusb "MANGLE(b01)", %%mm3			\n\t"
-
-		"movq (%%"REG_a", %1, 2), %%mm6			\n\t" //l3
-		"movq (%0, %1, 4), %%mm5			\n\t" //l4
-		"movq (%0, %1, 4), %%mm4			\n\t" //l4
-		"psubusb %%mm6, %%mm5				\n\t"
-		"psubusb %%mm4, %%mm6				\n\t"
-		"por %%mm6, %%mm5				\n\t" // |l3-l4|
-		"pcmpeqb %%mm7, %%mm6				\n\t" // SIGN(l3-l4)
-		"pxor %%mm6, %%mm0				\n\t"
-		"pand %%mm0, %%mm3				\n\t"
-		PMINUB(%%mm5, %%mm3, %%mm0)
-
-		"psubusb "MANGLE(b01)", %%mm3			\n\t"
-		PAVGB(%%mm7, %%mm3)
-
-		"movq (%%"REG_a", %1, 2), %%mm0			\n\t"
-		"movq (%0, %1, 4), %%mm2			\n\t"
-		"pxor %%mm6, %%mm0				\n\t"
-		"pxor %%mm6, %%mm2				\n\t"
-		"psubb %%mm3, %%mm0				\n\t"
-		"paddb %%mm3, %%mm2				\n\t"
-		"pxor %%mm6, %%mm0				\n\t"
-		"pxor %%mm6, %%mm2				\n\t"
-		"movq %%mm0, (%%"REG_a", %1, 2)			\n\t"
-		"movq %%mm2, (%0, %1, 4)			\n\t"
-#endif
-
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"pcmpeqb %%mm6, %%mm6				\n\t" // -1
-//	0	1	2	3	4	5	6	7
-//	%0	%0+%1	%0+2%1	eax+2%1	%0+4%1	eax+4%1	ecx+%1	ecx+2%1
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	ecx	ecx+%1	ecx+2%1
-
-
-		"movq (%%"REG_a", %1, 2), %%mm1			\n\t" // l3
-		"movq (%0, %1, 4), %%mm0			\n\t" // l4
-		"pxor %%mm6, %%mm1				\n\t" // -l3-1
-		PAVGB(%%mm1, %%mm0)				      // -q+128 = (l4-l3+256)/2
+                PMINUB(%%mm2, %%mm1, %%mm4)                   // MIN(|lenergy|,|renergy|)/8
+                "movq %2, %%mm4                         \n\t" // QP //FIXME QP+1 ?
+                "paddusb "MANGLE(b01)", %%mm4           \n\t"
+                "pcmpgtb %%mm3, %%mm4                   \n\t" // |menergy|/8 < QP
+                "psubusb %%mm1, %%mm3                   \n\t" // d=|menergy|/8-MIN(|lenergy|,|renergy|)/8
+                "pand %%mm4, %%mm3                      \n\t"
+
+                "movq %%mm3, %%mm1                      \n\t"
+//                "psubusb "MANGLE(b01)", %%mm3           \n\t"
+                PAVGB(%%mm7, %%mm3)
+                PAVGB(%%mm7, %%mm3)
+                "paddusb %%mm1, %%mm3                   \n\t"
+//                "paddusb "MANGLE(b01)", %%mm3           \n\t"
+
+                "movq (%%"REG_a", %1, 2), %%mm6         \n\t" //l3
+                "movq (%0, %1, 4), %%mm5                \n\t" //l4
+                "movq (%0, %1, 4), %%mm4                \n\t" //l4
+                "psubusb %%mm6, %%mm5                   \n\t"
+                "psubusb %%mm4, %%mm6                   \n\t"
+                "por %%mm6, %%mm5                       \n\t" // |l3-l4|
+                "pcmpeqb %%mm7, %%mm6                   \n\t" // SIGN(l3-l4)
+                "pxor %%mm6, %%mm0                      \n\t"
+                "pand %%mm0, %%mm3                      \n\t"
+                PMINUB(%%mm5, %%mm3, %%mm0)
+
+                "psubusb "MANGLE(b01)", %%mm3           \n\t"
+                PAVGB(%%mm7, %%mm3)
+
+                "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
+                "movq (%0, %1, 4), %%mm2                \n\t"
+                "pxor %%mm6, %%mm0                      \n\t"
+                "pxor %%mm6, %%mm2                      \n\t"
+                "psubb %%mm3, %%mm0                     \n\t"
+                "paddb %%mm3, %%mm2                     \n\t"
+                "pxor %%mm6, %%mm0                      \n\t"
+                "pxor %%mm6, %%mm2                      \n\t"
+                "movq %%mm0, (%%"REG_a", %1, 2)         \n\t"
+                "movq %%mm2, (%0, %1, 4)                \n\t"
+#endif //0
+
+                "lea (%0, %1), %%"REG_a"                \n\t"
+                "pcmpeqb %%mm6, %%mm6                   \n\t" // -1
+//      0       1       2       3       4       5       6       7
+//      %0      %0+%1   %0+2%1  eax+2%1 %0+4%1  eax+4%1 ecx+%1  ecx+2%1
+//      %0      eax     eax+%1  eax+2%1 %0+4%1  ecx     ecx+%1  ecx+2%1
+
+
+                "movq (%%"REG_a", %1, 2), %%mm1         \n\t" // l3
+                "movq (%0, %1, 4), %%mm0                \n\t" // l4
+                "pxor %%mm6, %%mm1                      \n\t" // -l3-1
+                PAVGB(%%mm1, %%mm0)                           // -q+128 = (l4-l3+256)/2
 // mm1=-l3-1, mm0=128-q
 
-		"movq (%%"REG_a", %1, 4), %%mm2			\n\t" // l5
-		"movq (%%"REG_a", %1), %%mm3			\n\t" // l2
-		"pxor %%mm6, %%mm2				\n\t" // -l5-1
-		"movq %%mm2, %%mm5				\n\t" // -l5-1
-		"movq "MANGLE(b80)", %%mm4			\n\t" // 128
-		"lea (%%"REG_a", %1, 4), %%"REG_c"		\n\t"
-		PAVGB(%%mm3, %%mm2)				      // (l2-l5+256)/2
-		PAVGB(%%mm0, %%mm4)				      // ~(l4-l3)/4 + 128
-		PAVGB(%%mm2, %%mm4)				      // ~(l2-l5)/4 +(l4-l3)/8 + 128
-		PAVGB(%%mm0, %%mm4)				      // ~(l2-l5)/8 +5(l4-l3)/16 + 128
+                "movq (%%"REG_a", %1, 4), %%mm2         \n\t" // l5
+                "movq (%%"REG_a", %1), %%mm3            \n\t" // l2
+                "pxor %%mm6, %%mm2                      \n\t" // -l5-1
+                "movq %%mm2, %%mm5                      \n\t" // -l5-1
+                "movq "MANGLE(b80)", %%mm4              \n\t" // 128
+                "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
+                PAVGB(%%mm3, %%mm2)                           // (l2-l5+256)/2
+                PAVGB(%%mm0, %%mm4)                           // ~(l4-l3)/4 + 128
+                PAVGB(%%mm2, %%mm4)                           // ~(l2-l5)/4 +(l4-l3)/8 + 128
+                PAVGB(%%mm0, %%mm4)                           // ~(l2-l5)/8 +5(l4-l3)/16 + 128
 // mm1=-l3-1, mm0=128-q, mm3=l2, mm4=menergy/16 + 128, mm5= -l5-1
 
-		"movq (%%"REG_a"), %%mm2			\n\t" // l1
-		"pxor %%mm6, %%mm2				\n\t" // -l1-1
-		PAVGB(%%mm3, %%mm2)				      // (l2-l1+256)/2
-		PAVGB((%0), %%mm1)				      // (l0-l3+256)/2
-		"movq "MANGLE(b80)", %%mm3			\n\t" // 128
-		PAVGB(%%mm2, %%mm3)				      // ~(l2-l1)/4 + 128
-		PAVGB(%%mm1, %%mm3)				      // ~(l0-l3)/4 +(l2-l1)/8 + 128
-		PAVGB(%%mm2, %%mm3)				      // ~(l0-l3)/8 +5(l2-l1)/16 + 128
+                "movq (%%"REG_a"), %%mm2                \n\t" // l1
+                "pxor %%mm6, %%mm2                      \n\t" // -l1-1
+                PAVGB(%%mm3, %%mm2)                           // (l2-l1+256)/2
+                PAVGB((%0), %%mm1)                            // (l0-l3+256)/2
+                "movq "MANGLE(b80)", %%mm3              \n\t" // 128
+                PAVGB(%%mm2, %%mm3)                           // ~(l2-l1)/4 + 128
+                PAVGB(%%mm1, %%mm3)                           // ~(l0-l3)/4 +(l2-l1)/8 + 128
+                PAVGB(%%mm2, %%mm3)                           // ~(l0-l3)/8 +5(l2-l1)/16 + 128
 // mm0=128-q, mm3=lenergy/16 + 128, mm4= menergy/16 + 128, mm5= -l5-1
 
-		PAVGB((%%REGc, %1), %%mm5)			      // (l6-l5+256)/2
-		"movq (%%"REG_c", %1, 2), %%mm1			\n\t" // l7
-		"pxor %%mm6, %%mm1				\n\t" // -l7-1
-		PAVGB((%0, %1, 4), %%mm1)			      // (l4-l7+256)/2
-		"movq "MANGLE(b80)", %%mm2			\n\t" // 128
-		PAVGB(%%mm5, %%mm2)				      // ~(l6-l5)/4 + 128
-		PAVGB(%%mm1, %%mm2)				      // ~(l4-l7)/4 +(l6-l5)/8 + 128
-		PAVGB(%%mm5, %%mm2)				      // ~(l4-l7)/8 +5(l6-l5)/16 + 128
+                PAVGB((%%REGc, %1), %%mm5)                    // (l6-l5+256)/2
+                "movq (%%"REG_c", %1, 2), %%mm1         \n\t" // l7
+                "pxor %%mm6, %%mm1                      \n\t" // -l7-1
+                PAVGB((%0, %1, 4), %%mm1)                     // (l4-l7+256)/2
+                "movq "MANGLE(b80)", %%mm2              \n\t" // 128
+                PAVGB(%%mm5, %%mm2)                           // ~(l6-l5)/4 + 128
+                PAVGB(%%mm1, %%mm2)                           // ~(l4-l7)/4 +(l6-l5)/8 + 128
+                PAVGB(%%mm5, %%mm2)                           // ~(l4-l7)/8 +5(l6-l5)/16 + 128
 // mm0=128-q, mm2=renergy/16 + 128, mm3=lenergy/16 + 128, mm4= menergy/16 + 128
 
-		"movq "MANGLE(b00)", %%mm1			\n\t" // 0
-		"movq "MANGLE(b00)", %%mm5			\n\t" // 0
-		"psubb %%mm2, %%mm1				\n\t" // 128 - renergy/16
-		"psubb %%mm3, %%mm5				\n\t" // 128 - lenergy/16
-		PMAXUB(%%mm1, %%mm2)				      // 128 + |renergy/16|
- 		PMAXUB(%%mm5, %%mm3)				      // 128 + |lenergy/16|
-		PMINUB(%%mm2, %%mm3, %%mm1)			      // 128 + MIN(|lenergy|,|renergy|)/16
+                "movq "MANGLE(b00)", %%mm1              \n\t" // 0
+                "movq "MANGLE(b00)", %%mm5              \n\t" // 0
+                "psubb %%mm2, %%mm1                     \n\t" // 128 - renergy/16
+                "psubb %%mm3, %%mm5                     \n\t" // 128 - lenergy/16
+                PMAXUB(%%mm1, %%mm2)                          // 128 + |renergy/16|
+                 PMAXUB(%%mm5, %%mm3)                         // 128 + |lenergy/16|
+                PMINUB(%%mm2, %%mm3, %%mm1)                   // 128 + MIN(|lenergy|,|renergy|)/16
 
 // mm0=128-q, mm3=128 + MIN(|lenergy|,|renergy|)/16, mm4= menergy/16 + 128
 
-		"movq "MANGLE(b00)", %%mm7			\n\t" // 0
-		"movq %2, %%mm2					\n\t" // QP
-		PAVGB(%%mm6, %%mm2)				      // 128 + QP/2
-		"psubb %%mm6, %%mm2				\n\t"
-
-		"movq %%mm4, %%mm1				\n\t"
-		"pcmpgtb %%mm7, %%mm1				\n\t" // SIGN(menergy)
-		"pxor %%mm1, %%mm4				\n\t"
-		"psubb %%mm1, %%mm4				\n\t" // 128 + |menergy|/16
-		"pcmpgtb %%mm4, %%mm2				\n\t" // |menergy|/16 < QP/2
-		"psubusb %%mm3, %%mm4				\n\t" //d=|menergy|/16 - MIN(|lenergy|,|renergy|)/16
+                "movq "MANGLE(b00)", %%mm7              \n\t" // 0
+                "movq %2, %%mm2                         \n\t" // QP
+                PAVGB(%%mm6, %%mm2)                           // 128 + QP/2
+                "psubb %%mm6, %%mm2                     \n\t"
+
+                "movq %%mm4, %%mm1                      \n\t"
+                "pcmpgtb %%mm7, %%mm1                   \n\t" // SIGN(menergy)
+                "pxor %%mm1, %%mm4                      \n\t"
+                "psubb %%mm1, %%mm4                     \n\t" // 128 + |menergy|/16
+                "pcmpgtb %%mm4, %%mm2                   \n\t" // |menergy|/16 < QP/2
+                "psubusb %%mm3, %%mm4                   \n\t" //d=|menergy|/16 - MIN(|lenergy|,|renergy|)/16
 // mm0=128-q, mm1= SIGN(menergy), mm2= |menergy|/16 < QP/2, mm4= d/16
 
-		"movq %%mm4, %%mm3				\n\t" // d
-		"psubusb "MANGLE(b01)", %%mm4			\n\t"
-		PAVGB(%%mm7, %%mm4)				      // d/32
-		PAVGB(%%mm7, %%mm4)				      // (d + 32)/64
-		"paddb %%mm3, %%mm4				\n\t" // 5d/64
-		"pand %%mm2, %%mm4				\n\t"
-
-		"movq "MANGLE(b80)", %%mm5			\n\t" // 128
-		"psubb %%mm0, %%mm5				\n\t" // q
-		"paddsb %%mm6, %%mm5				\n\t" // fix bad rounding
-		"pcmpgtb %%mm5, %%mm7				\n\t" // SIGN(q)
-		"pxor %%mm7, %%mm5				\n\t"
-
-		PMINUB(%%mm5, %%mm4, %%mm3)			      // MIN(|q|, 5d/64)
-		"pxor %%mm1, %%mm7				\n\t" // SIGN(d*q)
-
-		"pand %%mm7, %%mm4				\n\t"
-		"movq (%%"REG_a", %1, 2), %%mm0			\n\t"
-		"movq (%0, %1, 4), %%mm2			\n\t"
-		"pxor %%mm1, %%mm0				\n\t"
-		"pxor %%mm1, %%mm2				\n\t"
-		"paddb %%mm4, %%mm0				\n\t"
-		"psubb %%mm4, %%mm2				\n\t"
-		"pxor %%mm1, %%mm0				\n\t"
-		"pxor %%mm1, %%mm2				\n\t"
-		"movq %%mm0, (%%"REG_a", %1, 2)			\n\t"
-		"movq %%mm2, (%0, %1, 4)			\n\t"
-
-		:
-		: "r" (src), "r" ((long)stride), "m" (c->pQPb)
-		: "%"REG_a, "%"REG_c
-	);
+                "movq %%mm4, %%mm3                      \n\t" // d
+                "psubusb "MANGLE(b01)", %%mm4           \n\t"
+                PAVGB(%%mm7, %%mm4)                           // d/32
+                PAVGB(%%mm7, %%mm4)                           // (d + 32)/64
+                "paddb %%mm3, %%mm4                     \n\t" // 5d/64
+                "pand %%mm2, %%mm4                      \n\t"
+
+                "movq "MANGLE(b80)", %%mm5              \n\t" // 128
+                "psubb %%mm0, %%mm5                     \n\t" // q
+                "paddsb %%mm6, %%mm5                    \n\t" // fix bad rounding
+                "pcmpgtb %%mm5, %%mm7                   \n\t" // SIGN(q)
+                "pxor %%mm7, %%mm5                      \n\t"
+
+                PMINUB(%%mm5, %%mm4, %%mm3)                   // MIN(|q|, 5d/64)
+                "pxor %%mm1, %%mm7                      \n\t" // SIGN(d*q)
+
+                "pand %%mm7, %%mm4                      \n\t"
+                "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
+                "movq (%0, %1, 4), %%mm2                \n\t"
+                "pxor %%mm1, %%mm0                      \n\t"
+                "pxor %%mm1, %%mm2                      \n\t"
+                "paddb %%mm4, %%mm0                     \n\t"
+                "psubb %%mm4, %%mm2                     \n\t"
+                "pxor %%mm1, %%mm0                      \n\t"
+                "pxor %%mm1, %%mm2                      \n\t"
+                "movq %%mm0, (%%"REG_a", %1, 2)         \n\t"
+                "movq %%mm2, (%0, %1, 4)                \n\t"
+
+                :
+                : "r" (src), "r" ((long)stride), "m" (c->pQPb)
+                : "%"REG_a, "%"REG_c
+        );
 
 /*
-	{
-	int x;
-	src-= stride;
-	for(x=0; x<BLOCK_SIZE; x++)
-	{
-		const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]);
-		if(ABS(middleEnergy)< 8*QP)
-		{
-			const int q=(src[l4] - src[l5])/2;
-			const int leftEnergy=  5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]);
-			const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]);
-
-			int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
-			d= MAX(d, 0);
-
-			d= (5*d + 32) >> 6;
-			d*= SIGN(-middleEnergy);
-
-			if(q>0)
-			{
-				d= d<0 ? 0 : d;
-				d= d>q ? q : d;
-			}
-			else
-			{
-				d= d>0 ? 0 : d;
-				d= d<q ? q : d;
-			}
-
-        		src[l4]-= d;
-	        	src[l5]+= d;
-		}
-		src++;
-	}
+        {
+        int x;
+        src-= stride;
+        for(x=0; x<BLOCK_SIZE; x++)
+        {
+                const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]);
+                if(ABS(middleEnergy)< 8*QP)
+                {
+                        const int q=(src[l4] - src[l5])/2;
+                        const int leftEnergy=  5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]);
+                        const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]);
+
+                        int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
+                        d= MAX(d, 0);
+
+                        d= (5*d + 32) >> 6;
+                        d*= SIGN(-middleEnergy);
+
+                        if(q>0)
+                        {
+                                d= d<0 ? 0 : d;
+                                d= d>q ? q : d;
+                        }
+                        else
+                        {
+                                d= d>0 ? 0 : d;
+                                d= d<q ? q : d;
+                        }
+
+                        src[l4]-= d;
+                        src[l5]+= d;
+                }
+                src++;
+        }
 src-=8;
-	for(x=0; x<8; x++)
-	{
-		int y;
-		for(y=4; y<6; y++)
-		{
-			int d= src[x+y*stride] - tmp[x+(y-4)*8];
-			int ad= ABS(d);
-			static int max=0;
-			static int sum=0;
-			static int num=0;
-			static int bias=0;
-
-			if(max<ad) max=ad;
-			sum+= ad>3 ? 1 : 0;
-			if(ad>3)
-			{
-				src[0] = src[7] = src[stride*7] = src[(stride+1)*7]=255;
-			}
-			if(y==4) bias+=d;
-			num++;
-			if(num%1000000 == 0)
-			{
-				printf(" %d %d %d %d\n", num, sum, max, bias);
-			}
-		}
-	}
+        for(x=0; x<8; x++)
+        {
+                int y;
+                for(y=4; y<6; y++)
+                {
+                        int d= src[x+y*stride] - tmp[x+(y-4)*8];
+                        int ad= ABS(d);
+                        static int max=0;
+                        static int sum=0;
+                        static int num=0;
+                        static int bias=0;
+
+                        if(max<ad) max=ad;
+                        sum+= ad>3 ? 1 : 0;
+                        if(ad>3)
+                        {
+                                src[0] = src[7] = src[stride*7] = src[(stride+1)*7]=255;
+                        }
+                        if(y==4) bias+=d;
+                        num++;
+                        if(num%1000000 == 0)
+                        {
+                                printf(" %d %d %d %d\n", num, sum, max, bias);
+                        }
+                }
+        }
 }
 */
 #elif defined (HAVE_MMX)
-	src+= stride*4;
-	asm volatile(
-		"pxor %%mm7, %%mm7				\n\t"
-		"lea -40(%%"REG_SP"), %%"REG_c"			\n\t" // make space for 4 8-byte vars
-		"and "ALIGN_MASK", %%"REG_c"			\n\t" // align
-//	0	1	2	3	4	5	6	7
-//	%0	%0+%1	%0+2%1	eax+2%1	%0+4%1	eax+4%1	edx+%1	edx+2%1
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1
-
-		"movq (%0), %%mm0				\n\t"
-		"movq %%mm0, %%mm1				\n\t"
-		"punpcklbw %%mm7, %%mm0				\n\t" // low part of line 0
-		"punpckhbw %%mm7, %%mm1				\n\t" // high part of line 0
-
-		"movq (%0, %1), %%mm2				\n\t"
-		"lea (%0, %1, 2), %%"REG_a"			\n\t"
-		"movq %%mm2, %%mm3				\n\t"
-		"punpcklbw %%mm7, %%mm2				\n\t" // low part of line 1
-		"punpckhbw %%mm7, %%mm3				\n\t" // high part of line 1
-
-		"movq (%%"REG_a"), %%mm4			\n\t"
-		"movq %%mm4, %%mm5				\n\t"
-		"punpcklbw %%mm7, %%mm4				\n\t" // low part of line 2
-		"punpckhbw %%mm7, %%mm5				\n\t" // high part of line 2
-
-		"paddw %%mm0, %%mm0				\n\t" // 2L0
-		"paddw %%mm1, %%mm1				\n\t" // 2H0
-		"psubw %%mm4, %%mm2				\n\t" // L1 - L2
-		"psubw %%mm5, %%mm3				\n\t" // H1 - H2
-		"psubw %%mm2, %%mm0				\n\t" // 2L0 - L1 + L2
-		"psubw %%mm3, %%mm1				\n\t" // 2H0 - H1 + H2
-
-		"psllw $2, %%mm2				\n\t" // 4L1 - 4L2
-		"psllw $2, %%mm3				\n\t" // 4H1 - 4H2
-		"psubw %%mm2, %%mm0				\n\t" // 2L0 - 5L1 + 5L2
-		"psubw %%mm3, %%mm1				\n\t" // 2H0 - 5H1 + 5H2
-
-		"movq (%%"REG_a", %1), %%mm2			\n\t"
-		"movq %%mm2, %%mm3				\n\t"
-		"punpcklbw %%mm7, %%mm2				\n\t" // L3
-		"punpckhbw %%mm7, %%mm3				\n\t" // H3
-
-		"psubw %%mm2, %%mm0				\n\t" // 2L0 - 5L1 + 5L2 - L3
-		"psubw %%mm3, %%mm1				\n\t" // 2H0 - 5H1 + 5H2 - H3
-		"psubw %%mm2, %%mm0				\n\t" // 2L0 - 5L1 + 5L2 - 2L3
-		"psubw %%mm3, %%mm1				\n\t" // 2H0 - 5H1 + 5H2 - 2H3
-		"movq %%mm0, (%%"REG_c")			\n\t" // 2L0 - 5L1 + 5L2 - 2L3
-		"movq %%mm1, 8(%%"REG_c")			\n\t" // 2H0 - 5H1 + 5H2 - 2H3
-
-		"movq (%%"REG_a", %1, 2), %%mm0			\n\t"
-		"movq %%mm0, %%mm1				\n\t"
-		"punpcklbw %%mm7, %%mm0				\n\t" // L4
-		"punpckhbw %%mm7, %%mm1				\n\t" // H4
-
-		"psubw %%mm0, %%mm2				\n\t" // L3 - L4
-		"psubw %%mm1, %%mm3				\n\t" // H3 - H4
-		"movq %%mm2, 16(%%"REG_c")			\n\t" // L3 - L4
-		"movq %%mm3, 24(%%"REG_c")			\n\t" // H3 - H4
-		"paddw %%mm4, %%mm4				\n\t" // 2L2
-		"paddw %%mm5, %%mm5				\n\t" // 2H2
-		"psubw %%mm2, %%mm4				\n\t" // 2L2 - L3 + L4
-		"psubw %%mm3, %%mm5				\n\t" // 2H2 - H3 + H4
-
-		"lea (%%"REG_a", %1), %0			\n\t"
-		"psllw $2, %%mm2				\n\t" // 4L3 - 4L4
-		"psllw $2, %%mm3				\n\t" // 4H3 - 4H4
-		"psubw %%mm2, %%mm4				\n\t" // 2L2 - 5L3 + 5L4
-		"psubw %%mm3, %%mm5				\n\t" // 2H2 - 5H3 + 5H4
+        src+= stride*4;
+        asm volatile(
+                "pxor %%mm7, %%mm7                      \n\t"
+                "lea -40(%%"REG_SP"), %%"REG_c"         \n\t" // make space for 4 8-byte vars
+                "and "ALIGN_MASK", %%"REG_c"            \n\t" // align
+//      0       1       2       3       4       5       6       7
+//      %0      %0+%1   %0+2%1  eax+2%1 %0+4%1  eax+4%1 edx+%1  edx+2%1
+//      %0      eax     eax+%1  eax+2%1 %0+4%1  edx     edx+%1  edx+2%1
+
+                "movq (%0), %%mm0                       \n\t"
+                "movq %%mm0, %%mm1                      \n\t"
+                "punpcklbw %%mm7, %%mm0                 \n\t" // low part of line 0
+                "punpckhbw %%mm7, %%mm1                 \n\t" // high part of line 0
+
+                "movq (%0, %1), %%mm2                   \n\t"
+                "lea (%0, %1, 2), %%"REG_a"             \n\t"
+                "movq %%mm2, %%mm3                      \n\t"
+                "punpcklbw %%mm7, %%mm2                 \n\t" // low part of line 1
+                "punpckhbw %%mm7, %%mm3                 \n\t" // high part of line 1
+
+                "movq (%%"REG_a"), %%mm4                \n\t"
+                "movq %%mm4, %%mm5                      \n\t"
+                "punpcklbw %%mm7, %%mm4                 \n\t" // low part of line 2
+                "punpckhbw %%mm7, %%mm5                 \n\t" // high part of line 2
+
+                "paddw %%mm0, %%mm0                     \n\t" // 2L0
+                "paddw %%mm1, %%mm1                     \n\t" // 2H0
+                "psubw %%mm4, %%mm2                     \n\t" // L1 - L2
+                "psubw %%mm5, %%mm3                     \n\t" // H1 - H2
+                "psubw %%mm2, %%mm0                     \n\t" // 2L0 - L1 + L2
+                "psubw %%mm3, %%mm1                     \n\t" // 2H0 - H1 + H2
+
+                "psllw $2, %%mm2                        \n\t" // 4L1 - 4L2
+                "psllw $2, %%mm3                        \n\t" // 4H1 - 4H2
+                "psubw %%mm2, %%mm0                     \n\t" // 2L0 - 5L1 + 5L2
+                "psubw %%mm3, %%mm1                     \n\t" // 2H0 - 5H1 + 5H2
+
+                "movq (%%"REG_a", %1), %%mm2            \n\t"
+                "movq %%mm2, %%mm3                      \n\t"
+                "punpcklbw %%mm7, %%mm2                 \n\t" // L3
+                "punpckhbw %%mm7, %%mm3                 \n\t" // H3
+
+                "psubw %%mm2, %%mm0                     \n\t" // 2L0 - 5L1 + 5L2 - L3
+                "psubw %%mm3, %%mm1                     \n\t" // 2H0 - 5H1 + 5H2 - H3
+                "psubw %%mm2, %%mm0                     \n\t" // 2L0 - 5L1 + 5L2 - 2L3
+                "psubw %%mm3, %%mm1                     \n\t" // 2H0 - 5H1 + 5H2 - 2H3
+                "movq %%mm0, (%%"REG_c")                \n\t" // 2L0 - 5L1 + 5L2 - 2L3
+                "movq %%mm1, 8(%%"REG_c")               \n\t" // 2H0 - 5H1 + 5H2 - 2H3
+
+                "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
+                "movq %%mm0, %%mm1                      \n\t"
+                "punpcklbw %%mm7, %%mm0                 \n\t" // L4
+                "punpckhbw %%mm7, %%mm1                 \n\t" // H4
+
+                "psubw %%mm0, %%mm2                     \n\t" // L3 - L4
+                "psubw %%mm1, %%mm3                     \n\t" // H3 - H4
+                "movq %%mm2, 16(%%"REG_c")              \n\t" // L3 - L4
+                "movq %%mm3, 24(%%"REG_c")              \n\t" // H3 - H4
+                "paddw %%mm4, %%mm4                     \n\t" // 2L2
+                "paddw %%mm5, %%mm5                     \n\t" // 2H2
+                "psubw %%mm2, %%mm4                     \n\t" // 2L2 - L3 + L4
+                "psubw %%mm3, %%mm5                     \n\t" // 2H2 - H3 + H4
+
+                "lea (%%"REG_a", %1), %0                \n\t"
+                "psllw $2, %%mm2                        \n\t" // 4L3 - 4L4
+                "psllw $2, %%mm3                        \n\t" // 4H3 - 4H4
+                "psubw %%mm2, %%mm4                     \n\t" // 2L2 - 5L3 + 5L4
+                "psubw %%mm3, %%mm5                     \n\t" // 2H2 - 5H3 + 5H4
 //50 opcodes so far
-		"movq (%0, %1, 2), %%mm2			\n\t"
-		"movq %%mm2, %%mm3				\n\t"
-		"punpcklbw %%mm7, %%mm2				\n\t" // L5
-		"punpckhbw %%mm7, %%mm3				\n\t" // H5
-		"psubw %%mm2, %%mm4				\n\t" // 2L2 - 5L3 + 5L4 - L5
-		"psubw %%mm3, %%mm5				\n\t" // 2H2 - 5H3 + 5H4 - H5
-		"psubw %%mm2, %%mm4				\n\t" // 2L2 - 5L3 + 5L4 - 2L5
-		"psubw %%mm3, %%mm5				\n\t" // 2H2 - 5H3 + 5H4 - 2H5
-
-		"movq (%%"REG_a", %1, 4), %%mm6			\n\t"
-		"punpcklbw %%mm7, %%mm6				\n\t" // L6
-		"psubw %%mm6, %%mm2				\n\t" // L5 - L6
-		"movq (%%"REG_a", %1, 4), %%mm6			\n\t"
-		"punpckhbw %%mm7, %%mm6				\n\t" // H6
-		"psubw %%mm6, %%mm3				\n\t" // H5 - H6
-
-		"paddw %%mm0, %%mm0				\n\t" // 2L4
-		"paddw %%mm1, %%mm1				\n\t" // 2H4
-		"psubw %%mm2, %%mm0				\n\t" // 2L4 - L5 + L6
-		"psubw %%mm3, %%mm1				\n\t" // 2H4 - H5 + H6
-
-		"psllw $2, %%mm2				\n\t" // 4L5 - 4L6
-		"psllw $2, %%mm3				\n\t" // 4H5 - 4H6
-		"psubw %%mm2, %%mm0				\n\t" // 2L4 - 5L5 + 5L6
-		"psubw %%mm3, %%mm1				\n\t" // 2H4 - 5H5 + 5H6
-
-		"movq (%0, %1, 4), %%mm2			\n\t"
-		"movq %%mm2, %%mm3				\n\t"
-		"punpcklbw %%mm7, %%mm2				\n\t" // L7
-		"punpckhbw %%mm7, %%mm3				\n\t" // H7
-
-		"paddw %%mm2, %%mm2				\n\t" // 2L7
-		"paddw %%mm3, %%mm3				\n\t" // 2H7
-		"psubw %%mm2, %%mm0				\n\t" // 2L4 - 5L5 + 5L6 - 2L7
-		"psubw %%mm3, %%mm1				\n\t" // 2H4 - 5H5 + 5H6 - 2H7
-
-		"movq (%%"REG_c"), %%mm2			\n\t" // 2L0 - 5L1 + 5L2 - 2L3
-		"movq 8(%%"REG_c"), %%mm3			\n\t" // 2H0 - 5H1 + 5H2 - 2H3
+                "movq (%0, %1, 2), %%mm2                \n\t"
+                "movq %%mm2, %%mm3                      \n\t"
+                "punpcklbw %%mm7, %%mm2                 \n\t" // L5
+                "punpckhbw %%mm7, %%mm3                 \n\t" // H5
+                "psubw %%mm2, %%mm4                     \n\t" // 2L2 - 5L3 + 5L4 - L5
+                "psubw %%mm3, %%mm5                     \n\t" // 2H2 - 5H3 + 5H4 - H5
+                "psubw %%mm2, %%mm4                     \n\t" // 2L2 - 5L3 + 5L4 - 2L5
+                "psubw %%mm3, %%mm5                     \n\t" // 2H2 - 5H3 + 5H4 - 2H5
+
+                "movq (%%"REG_a", %1, 4), %%mm6         \n\t"
+                "punpcklbw %%mm7, %%mm6                 \n\t" // L6
+                "psubw %%mm6, %%mm2                     \n\t" // L5 - L6
+                "movq (%%"REG_a", %1, 4), %%mm6         \n\t"
+                "punpckhbw %%mm7, %%mm6                 \n\t" // H6
+                "psubw %%mm6, %%mm3                     \n\t" // H5 - H6
+
+                "paddw %%mm0, %%mm0                     \n\t" // 2L4
+                "paddw %%mm1, %%mm1                     \n\t" // 2H4
+                "psubw %%mm2, %%mm0                     \n\t" // 2L4 - L5 + L6
+                "psubw %%mm3, %%mm1                     \n\t" // 2H4 - H5 + H6
+
+                "psllw $2, %%mm2                        \n\t" // 4L5 - 4L6
+                "psllw $2, %%mm3                        \n\t" // 4H5 - 4H6
+                "psubw %%mm2, %%mm0                     \n\t" // 2L4 - 5L5 + 5L6
+                "psubw %%mm3, %%mm1                     \n\t" // 2H4 - 5H5 + 5H6
+
+                "movq (%0, %1, 4), %%mm2                \n\t"
+                "movq %%mm2, %%mm3                      \n\t"
+                "punpcklbw %%mm7, %%mm2                 \n\t" // L7
+                "punpckhbw %%mm7, %%mm3                 \n\t" // H7
+
+                "paddw %%mm2, %%mm2                     \n\t" // 2L7
+                "paddw %%mm3, %%mm3                     \n\t" // 2H7
+                "psubw %%mm2, %%mm0                     \n\t" // 2L4 - 5L5 + 5L6 - 2L7
+                "psubw %%mm3, %%mm1                     \n\t" // 2H4 - 5H5 + 5H6 - 2H7
+
+                "movq (%%"REG_c"), %%mm2                \n\t" // 2L0 - 5L1 + 5L2 - 2L3
+                "movq 8(%%"REG_c"), %%mm3               \n\t" // 2H0 - 5H1 + 5H2 - 2H3
 
 #ifdef HAVE_MMX2
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"psubw %%mm0, %%mm6				\n\t"
-		"pmaxsw %%mm6, %%mm0				\n\t" // |2L4 - 5L5 + 5L6 - 2L7|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"psubw %%mm1, %%mm6				\n\t"
-		"pmaxsw %%mm6, %%mm1				\n\t" // |2H4 - 5H5 + 5H6 - 2H7|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"psubw %%mm2, %%mm6				\n\t"
-		"pmaxsw %%mm6, %%mm2				\n\t" // |2L0 - 5L1 + 5L2 - 2L3|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"psubw %%mm3, %%mm6				\n\t"
-		"pmaxsw %%mm6, %%mm3				\n\t" // |2H0 - 5H1 + 5H2 - 2H3|
+                "movq %%mm7, %%mm6                      \n\t" // 0
+                "psubw %%mm0, %%mm6                     \n\t"
+                "pmaxsw %%mm6, %%mm0                    \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
+                "movq %%mm7, %%mm6                      \n\t" // 0
+                "psubw %%mm1, %%mm6                     \n\t"
+                "pmaxsw %%mm6, %%mm1                    \n\t" // |2H4 - 5H5 + 5H6 - 2H7|
+                "movq %%mm7, %%mm6                      \n\t" // 0
+                "psubw %%mm2, %%mm6                     \n\t"
+                "pmaxsw %%mm6, %%mm2                    \n\t" // |2L0 - 5L1 + 5L2 - 2L3|
+                "movq %%mm7, %%mm6                      \n\t" // 0
+                "psubw %%mm3, %%mm6                     \n\t"
+                "pmaxsw %%mm6, %%mm3                    \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
 #else
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm0, %%mm6				\n\t"
-		"pxor %%mm6, %%mm0				\n\t"
-		"psubw %%mm6, %%mm0				\n\t" // |2L4 - 5L5 + 5L6 - 2L7|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm1, %%mm6				\n\t"
-		"pxor %%mm6, %%mm1				\n\t"
-		"psubw %%mm6, %%mm1				\n\t" // |2H4 - 5H5 + 5H6 - 2H7|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm2, %%mm6				\n\t"
-		"pxor %%mm6, %%mm2				\n\t"
-		"psubw %%mm6, %%mm2				\n\t" // |2L0 - 5L1 + 5L2 - 2L3|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm3, %%mm6				\n\t"
-		"pxor %%mm6, %%mm3				\n\t"
-		"psubw %%mm6, %%mm3				\n\t" // |2H0 - 5H1 + 5H2 - 2H3|
+                "movq %%mm7, %%mm6                      \n\t" // 0
+                "pcmpgtw %%mm0, %%mm6                   \n\t"
+                "pxor %%mm6, %%mm0                      \n\t"
+                "psubw %%mm6, %%mm0                     \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
+                "movq %%mm7, %%mm6                      \n\t" // 0
+                "pcmpgtw %%mm1, %%mm6                   \n\t"
+                "pxor %%mm6, %%mm1                      \n\t"
+                "psubw %%mm6, %%mm1                     \n\t" // |2H4 - 5H5 + 5H6 - 2H7|
+                "movq %%mm7, %%mm6                      \n\t" // 0
+                "pcmpgtw %%mm2, %%mm6                   \n\t"
+                "pxor %%mm6, %%mm2                      \n\t"
+                "psubw %%mm6, %%mm2                     \n\t" // |2L0 - 5L1 + 5L2 - 2L3|
+                "movq %%mm7, %%mm6                      \n\t" // 0
+                "pcmpgtw %%mm3, %%mm6                   \n\t"
+                "pxor %%mm6, %%mm3                      \n\t"
+                "psubw %%mm6, %%mm3                     \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
 #endif
 
 #ifdef HAVE_MMX2
-		"pminsw %%mm2, %%mm0				\n\t"
-		"pminsw %%mm3, %%mm1				\n\t"
+                "pminsw %%mm2, %%mm0                    \n\t"
+                "pminsw %%mm3, %%mm1                    \n\t"
 #else
-		"movq %%mm0, %%mm6				\n\t"
-		"psubusw %%mm2, %%mm6				\n\t"
-		"psubw %%mm6, %%mm0				\n\t"
-		"movq %%mm1, %%mm6				\n\t"
-		"psubusw %%mm3, %%mm6				\n\t"
-		"psubw %%mm6, %%mm1				\n\t"
+                "movq %%mm0, %%mm6                      \n\t"
+                "psubusw %%mm2, %%mm6                   \n\t"
+                "psubw %%mm6, %%mm0                     \n\t"
+                "movq %%mm1, %%mm6                      \n\t"
+                "psubusw %%mm3, %%mm6                   \n\t"
+                "psubw %%mm6, %%mm1                     \n\t"
 #endif
 
-		"movd %2, %%mm2					\n\t" // QP
-		"punpcklbw %%mm7, %%mm2				\n\t"
+                "movd %2, %%mm2                         \n\t" // QP
+                "punpcklbw %%mm7, %%mm2                 \n\t"
 
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm4, %%mm6				\n\t" // sign(2L2 - 5L3 + 5L4 - 2L5)
-		"pxor %%mm6, %%mm4				\n\t"
-		"psubw %%mm6, %%mm4				\n\t" // |2L2 - 5L3 + 5L4 - 2L5|
-		"pcmpgtw %%mm5, %%mm7				\n\t" // sign(2H2 - 5H3 + 5H4 - 2H5)
-		"pxor %%mm7, %%mm5				\n\t"
-		"psubw %%mm7, %%mm5				\n\t" // |2H2 - 5H3 + 5H4 - 2H5|
+                "movq %%mm7, %%mm6                      \n\t" // 0
+                "pcmpgtw %%mm4, %%mm6                   \n\t" // sign(2L2 - 5L3 + 5L4 - 2L5)
+                "pxor %%mm6, %%mm4                      \n\t"
+                "psubw %%mm6, %%mm4                     \n\t" // |2L2 - 5L3 + 5L4 - 2L5|
+                "pcmpgtw %%mm5, %%mm7                   \n\t" // sign(2H2 - 5H3 + 5H4 - 2H5)
+                "pxor %%mm7, %%mm5                      \n\t"
+                "psubw %%mm7, %%mm5                     \n\t" // |2H2 - 5H3 + 5H4 - 2H5|
 // 100 opcodes
-		"psllw $3, %%mm2				\n\t" // 8QP
-		"movq %%mm2, %%mm3				\n\t" // 8QP
-		"pcmpgtw %%mm4, %%mm2				\n\t"
-		"pcmpgtw %%mm5, %%mm3				\n\t"
-		"pand %%mm2, %%mm4				\n\t"
-		"pand %%mm3, %%mm5				\n\t"
-
-
-		"psubusw %%mm0, %%mm4				\n\t" // hd
-		"psubusw %%mm1, %%mm5				\n\t" // ld
-
-
-		"movq "MANGLE(w05)", %%mm2			\n\t" // 5
-		"pmullw %%mm2, %%mm4				\n\t"
-		"pmullw %%mm2, %%mm5				\n\t"
-		"movq "MANGLE(w20)", %%mm2			\n\t" // 32
-		"paddw %%mm2, %%mm4				\n\t"
-		"paddw %%mm2, %%mm5				\n\t"
-		"psrlw $6, %%mm4				\n\t"
-		"psrlw $6, %%mm5				\n\t"
-
-		"movq 16(%%"REG_c"), %%mm0			\n\t" // L3 - L4
-		"movq 24(%%"REG_c"), %%mm1			\n\t" // H3 - H4
-
-		"pxor %%mm2, %%mm2				\n\t"
-		"pxor %%mm3, %%mm3				\n\t"
-
-		"pcmpgtw %%mm0, %%mm2				\n\t" // sign (L3-L4)
-		"pcmpgtw %%mm1, %%mm3				\n\t" // sign (H3-H4)
-		"pxor %%mm2, %%mm0				\n\t"
-		"pxor %%mm3, %%mm1				\n\t"
-		"psubw %%mm2, %%mm0				\n\t" // |L3-L4|
-		"psubw %%mm3, %%mm1				\n\t" // |H3-H4|
-		"psrlw $1, %%mm0				\n\t" // |L3 - L4|/2
-		"psrlw $1, %%mm1				\n\t" // |H3 - H4|/2
-
-		"pxor %%mm6, %%mm2				\n\t"
-		"pxor %%mm7, %%mm3				\n\t"
-		"pand %%mm2, %%mm4				\n\t"
-		"pand %%mm3, %%mm5				\n\t"
+                "psllw $3, %%mm2                        \n\t" // 8QP
+                "movq %%mm2, %%mm3                      \n\t" // 8QP
+                "pcmpgtw %%mm4, %%mm2                   \n\t"
+                "pcmpgtw %%mm5, %%mm3                   \n\t"
+                "pand %%mm2, %%mm4                      \n\t"
+                "pand %%mm3, %%mm5                      \n\t"
+
+
+                "psubusw %%mm0, %%mm4                   \n\t" // hd
+                "psubusw %%mm1, %%mm5                   \n\t" // ld
+
+
+                "movq "MANGLE(w05)", %%mm2              \n\t" // 5
+                "pmullw %%mm2, %%mm4                    \n\t"
+                "pmullw %%mm2, %%mm5                    \n\t"
+                "movq "MANGLE(w20)", %%mm2              \n\t" // 32
+                "paddw %%mm2, %%mm4                     \n\t"
+                "paddw %%mm2, %%mm5                     \n\t"
+                "psrlw $6, %%mm4                        \n\t"
+                "psrlw $6, %%mm5                        \n\t"
+
+                "movq 16(%%"REG_c"), %%mm0              \n\t" // L3 - L4
+                "movq 24(%%"REG_c"), %%mm1              \n\t" // H3 - H4
+
+                "pxor %%mm2, %%mm2                      \n\t"
+                "pxor %%mm3, %%mm3                      \n\t"
+
+                "pcmpgtw %%mm0, %%mm2                   \n\t" // sign (L3-L4)
+                "pcmpgtw %%mm1, %%mm3                   \n\t" // sign (H3-H4)
+                "pxor %%mm2, %%mm0                      \n\t"
+                "pxor %%mm3, %%mm1                      \n\t"
+                "psubw %%mm2, %%mm0                     \n\t" // |L3-L4|
+                "psubw %%mm3, %%mm1                     \n\t" // |H3-H4|
+                "psrlw $1, %%mm0                        \n\t" // |L3 - L4|/2
+                "psrlw $1, %%mm1                        \n\t" // |H3 - H4|/2
+
+                "pxor %%mm6, %%mm2                      \n\t"
+                "pxor %%mm7, %%mm3                      \n\t"
+                "pand %%mm2, %%mm4                      \n\t"
+                "pand %%mm3, %%mm5                      \n\t"
 
 #ifdef HAVE_MMX2
-		"pminsw %%mm0, %%mm4				\n\t"
-		"pminsw %%mm1, %%mm5				\n\t"
+                "pminsw %%mm0, %%mm4                    \n\t"
+                "pminsw %%mm1, %%mm5                    \n\t"
 #else
-		"movq %%mm4, %%mm2				\n\t"
-		"psubusw %%mm0, %%mm2				\n\t"
-		"psubw %%mm2, %%mm4				\n\t"
-		"movq %%mm5, %%mm2				\n\t"
-		"psubusw %%mm1, %%mm2				\n\t"
-		"psubw %%mm2, %%mm5				\n\t"
-#endif
-		"pxor %%mm6, %%mm4				\n\t"
-		"pxor %%mm7, %%mm5				\n\t"
-		"psubw %%mm6, %%mm4				\n\t"
-		"psubw %%mm7, %%mm5				\n\t"
-		"packsswb %%mm5, %%mm4				\n\t"
-		"movq (%0), %%mm0				\n\t"
-		"paddb   %%mm4, %%mm0				\n\t"
-		"movq %%mm0, (%0)				\n\t"
-		"movq (%0, %1), %%mm0				\n\t"
-		"psubb %%mm4, %%mm0				\n\t"
-		"movq %%mm0, (%0, %1)				\n\t"
-
-		: "+r" (src)
-		: "r" ((long)stride), "m" (c->pQPb)
-		: "%"REG_a, "%"REG_c
-	);
-#else
-	const int l1= stride;
-	const int l2= stride + l1;
-	const int l3= stride + l2;
-	const int l4= stride + l3;
-	const int l5= stride + l4;
-	const int l6= stride + l5;
-	const int l7= stride + l6;
-	const int l8= stride + l7;
-//	const int l9= stride + l8;
-	int x;
-	src+= stride*3;
-	for(x=0; x<BLOCK_SIZE; x++)
-	{
-		const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]);
-		if(ABS(middleEnergy) < 8*c->QP)
-		{
-			const int q=(src[l4] - src[l5])/2;
-			const int leftEnergy=  5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]);
-			const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]);
-
-			int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
-			d= MAX(d, 0);
-
-			d= (5*d + 32) >> 6;
-			d*= SIGN(-middleEnergy);
-
-			if(q>0)
-			{
-				d= d<0 ? 0 : d;
-				d= d>q ? q : d;
-			}
-			else
-			{
-				d= d>0 ? 0 : d;
-				d= d<q ? q : d;
-			}
-
-        		src[l4]-= d;
-	        	src[l5]+= d;
-		}
-		src++;
-	}
+                "movq %%mm4, %%mm2                      \n\t"
+                "psubusw %%mm0, %%mm2                   \n\t"
+                "psubw %%mm2, %%mm4                     \n\t"
+                "movq %%mm5, %%mm2                      \n\t"
+                "psubusw %%mm1, %%mm2                   \n\t"
+                "psubw %%mm2, %%mm5                     \n\t"
 #endif
+                "pxor %%mm6, %%mm4                      \n\t"
+                "pxor %%mm7, %%mm5                      \n\t"
+                "psubw %%mm6, %%mm4                     \n\t"
+                "psubw %%mm7, %%mm5                     \n\t"
+                "packsswb %%mm5, %%mm4                  \n\t"
+                "movq (%0), %%mm0                       \n\t"
+                "paddb   %%mm4, %%mm0                   \n\t"
+                "movq %%mm0, (%0)                       \n\t"
+                "movq (%0, %1), %%mm0                   \n\t"
+                "psubb %%mm4, %%mm0                     \n\t"
+                "movq %%mm0, (%0, %1)                   \n\t"
+
+                : "+r" (src)
+                : "r" ((long)stride), "m" (c->pQPb)
+                : "%"REG_a, "%"REG_c
+        );
+#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+        const int l1= stride;
+        const int l2= stride + l1;
+        const int l3= stride + l2;
+        const int l4= stride + l3;
+        const int l5= stride + l4;
+        const int l6= stride + l5;
+        const int l7= stride + l6;
+        const int l8= stride + l7;
+//        const int l9= stride + l8;
+        int x;
+        src+= stride*3;
+        for(x=0; x<BLOCK_SIZE; x++)
+        {
+                const int middleEnergy= 5*(src[l5] - src[l4]) + 2*(src[l3] - src[l6]);
+                if(ABS(middleEnergy) < 8*c->QP)
+                {
+                        const int q=(src[l4] - src[l5])/2;
+                        const int leftEnergy=  5*(src[l3] - src[l2]) + 2*(src[l1] - src[l4]);
+                        const int rightEnergy= 5*(src[l7] - src[l6]) + 2*(src[l5] - src[l8]);
+
+                        int d= ABS(middleEnergy) - MIN( ABS(leftEnergy), ABS(rightEnergy) );
+                        d= MAX(d, 0);
+
+                        d= (5*d + 32) >> 6;
+                        d*= SIGN(-middleEnergy);
+
+                        if(q>0)
+                        {
+                                d= d<0 ? 0 : d;
+                                d= d>q ? q : d;
+                        }
+                        else
+                        {
+                                d= d>0 ? 0 : d;
+                                d= d<q ? q : d;
+                        }
+
+                        src[l4]-= d;
+                        src[l5]+= d;
+                }
+                src++;
+        }
+#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
 }
 #endif //HAVE_ALTIVEC
 
@@ -1183,36 +1183,36 @@ src-=8;
 static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
 {
 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
-	asm volatile(
-		"pxor %%mm6, %%mm6				\n\t"
-		"pcmpeqb %%mm7, %%mm7				\n\t"
-		"movq %2, %%mm0					\n\t"
-		"punpcklbw %%mm6, %%mm0				\n\t"
-		"psrlw $1, %%mm0				\n\t"
-		"psubw %%mm7, %%mm0				\n\t"
-		"packuswb %%mm0, %%mm0				\n\t"
-		"movq %%mm0, %3					\n\t"
-
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_d"		\n\t"
-		
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1
+        asm volatile(
+                "pxor %%mm6, %%mm6                      \n\t"
+                "pcmpeqb %%mm7, %%mm7                   \n\t"
+                "movq %2, %%mm0                         \n\t"
+                "punpcklbw %%mm6, %%mm0                 \n\t"
+                "psrlw $1, %%mm0                        \n\t"
+                "psubw %%mm7, %%mm0                     \n\t"
+                "packuswb %%mm0, %%mm0                  \n\t"
+                "movq %%mm0, %3                         \n\t"
+
+                "lea (%0, %1), %%"REG_a"                \n\t"
+                "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
+
+//        0        1        2        3        4        5        6        7        8        9
+//        %0        eax        eax+%1        eax+2%1        %0+4%1        edx        edx+%1        edx+2%1        %0+8%1        edx+4%1
 
 #undef FIND_MIN_MAX
 #ifdef HAVE_MMX2
 #define REAL_FIND_MIN_MAX(addr)\
-		"movq " #addr ", %%mm0				\n\t"\
-		"pminub %%mm0, %%mm7				\n\t"\
-		"pmaxub %%mm0, %%mm6				\n\t"
+                "movq " #addr ", %%mm0                  \n\t"\
+                "pminub %%mm0, %%mm7                    \n\t"\
+                "pmaxub %%mm0, %%mm6                    \n\t"
 #else
 #define REAL_FIND_MIN_MAX(addr)\
-		"movq " #addr ", %%mm0				\n\t"\
-		"movq %%mm7, %%mm1				\n\t"\
-		"psubusb %%mm0, %%mm6				\n\t"\
-		"paddb %%mm0, %%mm6				\n\t"\
-		"psubusb %%mm0, %%mm1				\n\t"\
-		"psubb %%mm1, %%mm7				\n\t"
+                "movq " #addr ", %%mm0                  \n\t"\
+                "movq %%mm7, %%mm1                      \n\t"\
+                "psubusb %%mm0, %%mm6                   \n\t"\
+                "paddb %%mm0, %%mm6                     \n\t"\
+                "psubusb %%mm0, %%mm1                   \n\t"\
+                "psubb %%mm1, %%mm7                     \n\t"
 #endif
 #define FIND_MIN_MAX(addr)  REAL_FIND_MIN_MAX(addr)
 
@@ -1225,155 +1225,155 @@ FIND_MIN_MAX((%%REGd, %1))
 FIND_MIN_MAX((%%REGd, %1, 2))
 FIND_MIN_MAX((%0, %1, 8))
 
-		"movq %%mm7, %%mm4				\n\t"
-		"psrlq $8, %%mm7				\n\t"
+                "movq %%mm7, %%mm4                      \n\t"
+                "psrlq $8, %%mm7                        \n\t"
 #ifdef HAVE_MMX2
-		"pminub %%mm4, %%mm7				\n\t" // min of pixels
-		"pshufw $0xF9, %%mm7, %%mm4			\n\t"
-		"pminub %%mm4, %%mm7				\n\t" // min of pixels
-		"pshufw $0xFE, %%mm7, %%mm4			\n\t"
-		"pminub %%mm4, %%mm7				\n\t"
+                "pminub %%mm4, %%mm7                    \n\t" // min of pixels
+                "pshufw $0xF9, %%mm7, %%mm4             \n\t"
+                "pminub %%mm4, %%mm7                    \n\t" // min of pixels
+                "pshufw $0xFE, %%mm7, %%mm4             \n\t"
+                "pminub %%mm4, %%mm7                    \n\t"
 #else
-		"movq %%mm7, %%mm1				\n\t"
-		"psubusb %%mm4, %%mm1				\n\t"
-		"psubb %%mm1, %%mm7				\n\t"
-		"movq %%mm7, %%mm4				\n\t"
-		"psrlq $16, %%mm7				\n\t"
-		"movq %%mm7, %%mm1				\n\t"
-		"psubusb %%mm4, %%mm1				\n\t"
-		"psubb %%mm1, %%mm7				\n\t"
-		"movq %%mm7, %%mm4				\n\t"
-		"psrlq $32, %%mm7				\n\t"
-		"movq %%mm7, %%mm1				\n\t"
-		"psubusb %%mm4, %%mm1				\n\t"
-		"psubb %%mm1, %%mm7				\n\t"
+                "movq %%mm7, %%mm1                      \n\t"
+                "psubusb %%mm4, %%mm1                   \n\t"
+                "psubb %%mm1, %%mm7                     \n\t"
+                "movq %%mm7, %%mm4                      \n\t"
+                "psrlq $16, %%mm7                       \n\t"
+                "movq %%mm7, %%mm1                      \n\t"
+                "psubusb %%mm4, %%mm1                   \n\t"
+                "psubb %%mm1, %%mm7                     \n\t"
+                "movq %%mm7, %%mm4                      \n\t"
+                "psrlq $32, %%mm7                       \n\t"
+                "movq %%mm7, %%mm1                      \n\t"
+                "psubusb %%mm4, %%mm1                   \n\t"
+                "psubb %%mm1, %%mm7                     \n\t"
 #endif
 
 
-		"movq %%mm6, %%mm4				\n\t"
-		"psrlq $8, %%mm6				\n\t"
+                "movq %%mm6, %%mm4                      \n\t"
+                "psrlq $8, %%mm6                        \n\t"
 #ifdef HAVE_MMX2
-		"pmaxub %%mm4, %%mm6				\n\t" // max of pixels
-		"pshufw $0xF9, %%mm6, %%mm4			\n\t"
-		"pmaxub %%mm4, %%mm6				\n\t"
-		"pshufw $0xFE, %%mm6, %%mm4			\n\t"
-		"pmaxub %%mm4, %%mm6				\n\t"
+                "pmaxub %%mm4, %%mm6                    \n\t" // max of pixels
+                "pshufw $0xF9, %%mm6, %%mm4             \n\t"
+                "pmaxub %%mm4, %%mm6                    \n\t"
+                "pshufw $0xFE, %%mm6, %%mm4             \n\t"
+                "pmaxub %%mm4, %%mm6                    \n\t"
 #else
-		"psubusb %%mm4, %%mm6				\n\t"
-		"paddb %%mm4, %%mm6				\n\t"
-		"movq %%mm6, %%mm4				\n\t"
-		"psrlq $16, %%mm6				\n\t"
-		"psubusb %%mm4, %%mm6				\n\t"
-		"paddb %%mm4, %%mm6				\n\t"
-		"movq %%mm6, %%mm4				\n\t"
-		"psrlq $32, %%mm6				\n\t"
-		"psubusb %%mm4, %%mm6				\n\t"
-		"paddb %%mm4, %%mm6				\n\t"
+                "psubusb %%mm4, %%mm6                   \n\t"
+                "paddb %%mm4, %%mm6                     \n\t"
+                "movq %%mm6, %%mm4                      \n\t"
+                "psrlq $16, %%mm6                       \n\t"
+                "psubusb %%mm4, %%mm6                   \n\t"
+                "paddb %%mm4, %%mm6                     \n\t"
+                "movq %%mm6, %%mm4                      \n\t"
+                "psrlq $32, %%mm6                       \n\t"
+                "psubusb %%mm4, %%mm6                   \n\t"
+                "paddb %%mm4, %%mm6                     \n\t"
 #endif
-		"movq %%mm6, %%mm0				\n\t" // max
-		"psubb %%mm7, %%mm6				\n\t" // max - min
-		"movd %%mm6, %%ecx				\n\t"
-		"cmpb "MANGLE(deringThreshold)", %%cl		\n\t"
-		" jb 1f						\n\t"
-		"lea -24(%%"REG_SP"), %%"REG_c"			\n\t"
-		"and "ALIGN_MASK", %%"REG_c"			\n\t" 
-		PAVGB(%%mm0, %%mm7)				      // a=(max + min)/2
-		"punpcklbw %%mm7, %%mm7				\n\t"
-		"punpcklbw %%mm7, %%mm7				\n\t"
-		"punpcklbw %%mm7, %%mm7				\n\t"
-		"movq %%mm7, (%%"REG_c")			\n\t"
-
-		"movq (%0), %%mm0				\n\t" // L10
-		"movq %%mm0, %%mm1				\n\t" // L10
-		"movq %%mm0, %%mm2				\n\t" // L10
-		"psllq $8, %%mm1				\n\t"
-		"psrlq $8, %%mm2				\n\t"
-		"movd -4(%0), %%mm3				\n\t"
-		"movd 8(%0), %%mm4				\n\t"
-		"psrlq $24, %%mm3				\n\t"
-		"psllq $56, %%mm4				\n\t"
-		"por %%mm3, %%mm1				\n\t" // L00
-		"por %%mm4, %%mm2				\n\t" // L20
-		"movq %%mm1, %%mm3				\n\t" // L00
-		PAVGB(%%mm2, %%mm1)				      // (L20 + L00)/2
-		PAVGB(%%mm0, %%mm1)				      // (L20 + L00 + 2L10)/4
-		"psubusb %%mm7, %%mm0				\n\t"
-		"psubusb %%mm7, %%mm2				\n\t"
-		"psubusb %%mm7, %%mm3				\n\t"
-		"pcmpeqb "MANGLE(b00)", %%mm0			\n\t" // L10 > a ? 0 : -1
-		"pcmpeqb "MANGLE(b00)", %%mm2			\n\t" // L20 > a ? 0 : -1
-		"pcmpeqb "MANGLE(b00)", %%mm3			\n\t" // L00 > a ? 0 : -1
-		"paddb %%mm2, %%mm0				\n\t"
-		"paddb %%mm3, %%mm0				\n\t"
-
-		"movq (%%"REG_a"), %%mm2			\n\t" // L11
-		"movq %%mm2, %%mm3				\n\t" // L11
-		"movq %%mm2, %%mm4				\n\t" // L11
-		"psllq $8, %%mm3				\n\t"
-		"psrlq $8, %%mm4				\n\t"
-		"movd -4(%%"REG_a"), %%mm5			\n\t"
-		"movd 8(%%"REG_a"), %%mm6			\n\t"
-		"psrlq $24, %%mm5				\n\t"
-		"psllq $56, %%mm6				\n\t"
-		"por %%mm5, %%mm3				\n\t" // L01
-		"por %%mm6, %%mm4				\n\t" // L21
-		"movq %%mm3, %%mm5				\n\t" // L01
-		PAVGB(%%mm4, %%mm3)				      // (L21 + L01)/2
-		PAVGB(%%mm2, %%mm3)				      // (L21 + L01 + 2L11)/4
-		"psubusb %%mm7, %%mm2				\n\t"
-		"psubusb %%mm7, %%mm4				\n\t"
-		"psubusb %%mm7, %%mm5				\n\t"
-		"pcmpeqb "MANGLE(b00)", %%mm2			\n\t" // L11 > a ? 0 : -1
-		"pcmpeqb "MANGLE(b00)", %%mm4			\n\t" // L21 > a ? 0 : -1
-		"pcmpeqb "MANGLE(b00)", %%mm5			\n\t" // L01 > a ? 0 : -1
-		"paddb %%mm4, %%mm2				\n\t"
-		"paddb %%mm5, %%mm2				\n\t"
+                "movq %%mm6, %%mm0                      \n\t" // max
+                "psubb %%mm7, %%mm6                     \n\t" // max - min
+                "movd %%mm6, %%ecx                      \n\t"
+                "cmpb "MANGLE(deringThreshold)", %%cl   \n\t"
+                " jb 1f                                 \n\t"
+                "lea -24(%%"REG_SP"), %%"REG_c"         \n\t"
+                "and "ALIGN_MASK", %%"REG_c"            \n\t"
+                PAVGB(%%mm0, %%mm7)                           // a=(max + min)/2
+                "punpcklbw %%mm7, %%mm7                 \n\t"
+                "punpcklbw %%mm7, %%mm7                 \n\t"
+                "punpcklbw %%mm7, %%mm7                 \n\t"
+                "movq %%mm7, (%%"REG_c")                \n\t"
+
+                "movq (%0), %%mm0                       \n\t" // L10
+                "movq %%mm0, %%mm1                      \n\t" // L10
+                "movq %%mm0, %%mm2                      \n\t" // L10
+                "psllq $8, %%mm1                        \n\t"
+                "psrlq $8, %%mm2                        \n\t"
+                "movd -4(%0), %%mm3                     \n\t"
+                "movd 8(%0), %%mm4                      \n\t"
+                "psrlq $24, %%mm3                       \n\t"
+                "psllq $56, %%mm4                       \n\t"
+                "por %%mm3, %%mm1                       \n\t" // L00
+                "por %%mm4, %%mm2                       \n\t" // L20
+                "movq %%mm1, %%mm3                      \n\t" // L00
+                PAVGB(%%mm2, %%mm1)                           // (L20 + L00)/2
+                PAVGB(%%mm0, %%mm1)                           // (L20 + L00 + 2L10)/4
+                "psubusb %%mm7, %%mm0                   \n\t"
+                "psubusb %%mm7, %%mm2                   \n\t"
+                "psubusb %%mm7, %%mm3                   \n\t"
+                "pcmpeqb "MANGLE(b00)", %%mm0           \n\t" // L10 > a ? 0 : -1
+                "pcmpeqb "MANGLE(b00)", %%mm2           \n\t" // L20 > a ? 0 : -1
+                "pcmpeqb "MANGLE(b00)", %%mm3           \n\t" // L00 > a ? 0 : -1
+                "paddb %%mm2, %%mm0                     \n\t"
+                "paddb %%mm3, %%mm0                     \n\t"
+
+                "movq (%%"REG_a"), %%mm2                \n\t" // L11
+                "movq %%mm2, %%mm3                      \n\t" // L11
+                "movq %%mm2, %%mm4                      \n\t" // L11
+                "psllq $8, %%mm3                        \n\t"
+                "psrlq $8, %%mm4                        \n\t"
+                "movd -4(%%"REG_a"), %%mm5              \n\t"
+                "movd 8(%%"REG_a"), %%mm6               \n\t"
+                "psrlq $24, %%mm5                       \n\t"
+                "psllq $56, %%mm6                       \n\t"
+                "por %%mm5, %%mm3                       \n\t" // L01
+                "por %%mm6, %%mm4                       \n\t" // L21
+                "movq %%mm3, %%mm5                      \n\t" // L01
+                PAVGB(%%mm4, %%mm3)                           // (L21 + L01)/2
+                PAVGB(%%mm2, %%mm3)                           // (L21 + L01 + 2L11)/4
+                "psubusb %%mm7, %%mm2                   \n\t"
+                "psubusb %%mm7, %%mm4                   \n\t"
+                "psubusb %%mm7, %%mm5                   \n\t"
+                "pcmpeqb "MANGLE(b00)", %%mm2           \n\t" // L11 > a ? 0 : -1
+                "pcmpeqb "MANGLE(b00)", %%mm4           \n\t" // L21 > a ? 0 : -1
+                "pcmpeqb "MANGLE(b00)", %%mm5           \n\t" // L01 > a ? 0 : -1
+                "paddb %%mm4, %%mm2                     \n\t"
+                "paddb %%mm5, %%mm2                     \n\t"
 // 0, 2, 3, 1
 #define REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
-		"movq " #src ", " #sx "				\n\t" /* src[0] */\
-		"movq " #sx ", " #lx "				\n\t" /* src[0] */\
-		"movq " #sx ", " #t0 "				\n\t" /* src[0] */\
-		"psllq $8, " #lx "				\n\t"\
-		"psrlq $8, " #t0 "				\n\t"\
-		"movd -4" #src ", " #t1 "			\n\t"\
-		"psrlq $24, " #t1 "				\n\t"\
-		"por " #t1 ", " #lx "				\n\t" /* src[-1] */\
-		"movd 8" #src ", " #t1 "			\n\t"\
-		"psllq $56, " #t1 "				\n\t"\
-		"por " #t1 ", " #t0 "				\n\t" /* src[+1] */\
-		"movq " #lx ", " #t1 "				\n\t" /* src[-1] */\
-		PAVGB(t0, lx)				              /* (src[-1] + src[+1])/2 */\
-		PAVGB(sx, lx)				      /* (src[-1] + 2src[0] + src[+1])/4 */\
-		PAVGB(lx, pplx)					     \
-		"movq " #lx ", 8(%%"REG_c")			\n\t"\
-		"movq (%%"REG_c"), " #lx "			\n\t"\
-		"psubusb " #lx ", " #t1 "			\n\t"\
-		"psubusb " #lx ", " #t0 "			\n\t"\
-		"psubusb " #lx ", " #sx "			\n\t"\
-		"movq "MANGLE(b00)", " #lx "			\n\t"\
-		"pcmpeqb " #lx ", " #t1 "			\n\t" /* src[-1] > a ? 0 : -1*/\
-		"pcmpeqb " #lx ", " #t0 "			\n\t" /* src[+1] > a ? 0 : -1*/\
-		"pcmpeqb " #lx ", " #sx "			\n\t" /* src[0]  > a ? 0 : -1*/\
-		"paddb " #t1 ", " #t0 "				\n\t"\
-		"paddb " #t0 ", " #sx "				\n\t"\
+                "movq " #src ", " #sx "                 \n\t" /* src[0] */\
+                "movq " #sx ", " #lx "                  \n\t" /* src[0] */\
+                "movq " #sx ", " #t0 "                  \n\t" /* src[0] */\
+                "psllq $8, " #lx "                      \n\t"\
+                "psrlq $8, " #t0 "                      \n\t"\
+                "movd -4" #src ", " #t1 "               \n\t"\
+                "psrlq $24, " #t1 "                     \n\t"\
+                "por " #t1 ", " #lx "                   \n\t" /* src[-1] */\
+                "movd 8" #src ", " #t1 "                \n\t"\
+                "psllq $56, " #t1 "                     \n\t"\
+                "por " #t1 ", " #t0 "                   \n\t" /* src[+1] */\
+                "movq " #lx ", " #t1 "                  \n\t" /* src[-1] */\
+                PAVGB(t0, lx)                                 /* (src[-1] + src[+1])/2 */\
+                PAVGB(sx, lx)                                 /* (src[-1] + 2src[0] + src[+1])/4 */\
+                PAVGB(lx, pplx)                                     \
+                "movq " #lx ", 8(%%"REG_c")             \n\t"\
+                "movq (%%"REG_c"), " #lx "              \n\t"\
+                "psubusb " #lx ", " #t1 "               \n\t"\
+                "psubusb " #lx ", " #t0 "               \n\t"\
+                "psubusb " #lx ", " #sx "               \n\t"\
+                "movq "MANGLE(b00)", " #lx "            \n\t"\
+                "pcmpeqb " #lx ", " #t1 "               \n\t" /* src[-1] > a ? 0 : -1*/\
+                "pcmpeqb " #lx ", " #t0 "               \n\t" /* src[+1] > a ? 0 : -1*/\
+                "pcmpeqb " #lx ", " #sx "               \n\t" /* src[0]  > a ? 0 : -1*/\
+                "paddb " #t1 ", " #t0 "                 \n\t"\
+                "paddb " #t0 ", " #sx "                 \n\t"\
 \
-		PAVGB(plx, pplx)				      /* filtered */\
-		"movq " #dst ", " #t0 "				\n\t" /* dst */\
-		"movq " #t0 ", " #t1 "				\n\t" /* dst */\
-		"psubusb %3, " #t0 "				\n\t"\
-		"paddusb %3, " #t1 "				\n\t"\
-		PMAXUB(t0, pplx)\
-		PMINUB(t1, pplx, t0)\
-		"paddb " #sx ", " #ppsx "			\n\t"\
-		"paddb " #psx ", " #ppsx "			\n\t"\
-		"#paddb "MANGLE(b02)", " #ppsx "		\n\t"\
-		"pand "MANGLE(b08)", " #ppsx "			\n\t"\
-		"pcmpeqb " #lx ", " #ppsx "			\n\t"\
-		"pand " #ppsx ", " #pplx "			\n\t"\
-		"pandn " #dst ", " #ppsx "			\n\t"\
-		"por " #pplx ", " #ppsx "			\n\t"\
-		"movq " #ppsx ", " #dst "			\n\t"\
-		"movq 8(%%"REG_c"), " #lx "			\n\t"
+                PAVGB(plx, pplx)                              /* filtered */\
+                "movq " #dst ", " #t0 "                 \n\t" /* dst */\
+                "movq " #t0 ", " #t1 "                  \n\t" /* dst */\
+                "psubusb %3, " #t0 "                    \n\t"\
+                "paddusb %3, " #t1 "                    \n\t"\
+                PMAXUB(t0, pplx)\
+                PMINUB(t1, pplx, t0)\
+                "paddb " #sx ", " #ppsx "               \n\t"\
+                "paddb " #psx ", " #ppsx "              \n\t"\
+                "#paddb "MANGLE(b02)", " #ppsx "        \n\t"\
+                "pand "MANGLE(b08)", " #ppsx "          \n\t"\
+                "pcmpeqb " #lx ", " #ppsx "             \n\t"\
+                "pand " #ppsx ", " #pplx "              \n\t"\
+                "pandn " #dst ", " #ppsx "              \n\t"\
+                "por " #pplx ", " #ppsx "               \n\t"\
+                "movq " #ppsx ", " #dst "               \n\t"\
+                "movq 8(%%"REG_c"), " #lx "             \n\t"
 
 #define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
    REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1)
@@ -1392,151 +1392,151 @@ FIND_MIN_MAX((%0, %1, 8))
 1110111
 
 */
-//DERING_CORE(dst,src                  ,ppsx ,psx  ,sx   ,pplx ,plx  ,lx   ,t0   ,t1)
-DERING_CORE((%%REGa),(%%REGa, %1)        ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
-DERING_CORE((%%REGa, %1),(%%REGa, %1, 2) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
-DERING_CORE((%%REGa, %1, 2),(%0, %1, 4) ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
-DERING_CORE((%0, %1, 4),(%%REGd)        ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
-DERING_CORE((%%REGd),(%%REGd, %1)        ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
-DERING_CORE((%%REGd, %1), (%%REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
-DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
-DERING_CORE((%0, %1, 8),(%%REGd, %1, 4) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
-
-		"1:			\n\t"
-		: : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2)
-		: "%"REG_a, "%"REG_d, "%"REG_c
-	);
-#else
-	int y;
-	int min=255;
-	int max=0;
-	int avg;
-	uint8_t *p;
-	int s[10];
-	const int QP2= c->QP/2 + 1;
-
-	for(y=1; y<9; y++)
-	{
-		int x;
-		p= src + stride*y;
-		for(x=1; x<9; x++)
-		{
-			p++;
-			if(*p > max) max= *p;
-			if(*p < min) min= *p;
-		}
-	}
-	avg= (min + max + 1)>>1;
-
-	if(max - min <deringThreshold) return;
-
-	for(y=0; y<10; y++)
-	{
-		int t = 0;
-
-		if(src[stride*y + 0] > avg) t+= 1;
-		if(src[stride*y + 1] > avg) t+= 2;
-		if(src[stride*y + 2] > avg) t+= 4;
-		if(src[stride*y + 3] > avg) t+= 8;
-		if(src[stride*y + 4] > avg) t+= 16;
-		if(src[stride*y + 5] > avg) t+= 32;
-		if(src[stride*y + 6] > avg) t+= 64;
-		if(src[stride*y + 7] > avg) t+= 128;
-		if(src[stride*y + 8] > avg) t+= 256;
-		if(src[stride*y + 9] > avg) t+= 512;
-		
-		t |= (~t)<<16;
-		t &= (t<<1) & (t>>1);
-		s[y] = t;
-	}
-	
-	for(y=1; y<9; y++)
-	{
-		int t = s[y-1] & s[y] & s[y+1];
-		t|= t>>16;
-		s[y-1]= t;
-	}
-
-	for(y=1; y<9; y++)
-	{
-		int x;
-		int t = s[y-1];
-
-		p= src + stride*y;
-		for(x=1; x<9; x++)
-		{
-			p++;
-			if(t & (1<<x))
-			{
-				int f= (*(p-stride-1)) + 2*(*(p-stride)) + (*(p-stride+1))
-				      +2*(*(p     -1)) + 4*(*p         ) + 2*(*(p     +1))
-				      +(*(p+stride-1)) + 2*(*(p+stride)) + (*(p+stride+1));
-				f= (f + 8)>>4;
+//DERING_CORE(dst          ,src            ,ppsx ,psx  ,sx   ,pplx ,plx  ,lx   ,t0   ,t1)
+DERING_CORE((%%REGa)       ,(%%REGa, %1)   ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
+DERING_CORE((%%REGa, %1)   ,(%%REGa, %1, 2),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
+DERING_CORE((%%REGa, %1, 2),(%0, %1, 4)    ,%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
+DERING_CORE((%0, %1, 4)    ,(%%REGd)       ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
+DERING_CORE((%%REGd)       ,(%%REGd, %1)   ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
+DERING_CORE((%%REGd, %1)   ,(%%REGd, %1, 2),%%mm4,%%mm0,%%mm2,%%mm5,%%mm1,%%mm3,%%mm6,%%mm7)
+DERING_CORE((%%REGd, %1, 2),(%0, %1, 8)    ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,%%mm6,%%mm7)
+DERING_CORE((%0, %1, 8)    ,(%%REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%mm6,%%mm7)
+
+                "1:                        \n\t"
+                : : "r" (src), "r" ((long)stride), "m" (c->pQPb), "m"(c->pQPb2)
+                : "%"REG_a, "%"REG_d, "%"REG_c
+        );
+#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+        int y;
+        int min=255;
+        int max=0;
+        int avg;
+        uint8_t *p;
+        int s[10];
+        const int QP2= c->QP/2 + 1;
+
+        for(y=1; y<9; y++)
+        {
+                int x;
+                p= src + stride*y;
+                for(x=1; x<9; x++)
+                {
+                        p++;
+                        if(*p > max) max= *p;
+                        if(*p < min) min= *p;
+                }
+        }
+        avg= (min + max + 1)>>1;
+
+        if(max - min <deringThreshold) return;
+
+        for(y=0; y<10; y++)
+        {
+                int t = 0;
+
+                if(src[stride*y + 0] > avg) t+= 1;
+                if(src[stride*y + 1] > avg) t+= 2;
+                if(src[stride*y + 2] > avg) t+= 4;
+                if(src[stride*y + 3] > avg) t+= 8;
+                if(src[stride*y + 4] > avg) t+= 16;
+                if(src[stride*y + 5] > avg) t+= 32;
+                if(src[stride*y + 6] > avg) t+= 64;
+                if(src[stride*y + 7] > avg) t+= 128;
+                if(src[stride*y + 8] > avg) t+= 256;
+                if(src[stride*y + 9] > avg) t+= 512;
+
+                t |= (~t)<<16;
+                t &= (t<<1) & (t>>1);
+                s[y] = t;
+        }
+
+        for(y=1; y<9; y++)
+        {
+                int t = s[y-1] & s[y] & s[y+1];
+                t|= t>>16;
+                s[y-1]= t;
+        }
+
+        for(y=1; y<9; y++)
+        {
+                int x;
+                int t = s[y-1];
+
+                p= src + stride*y;
+                for(x=1; x<9; x++)
+                {
+                        p++;
+                        if(t & (1<<x))
+                        {
+                                int f= (*(p-stride-1)) + 2*(*(p-stride)) + (*(p-stride+1))
+                                      +2*(*(p     -1)) + 4*(*p         ) + 2*(*(p     +1))
+                                      +(*(p+stride-1)) + 2*(*(p+stride)) + (*(p+stride+1));
+                                f= (f + 8)>>4;
 
 #ifdef DEBUG_DERING_THRESHOLD
-				asm volatile("emms\n\t":);
-				{
-				static long long numPixels=0;
-				if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++;
-//				if((max-min)<20 || (max-min)*QP<200)
-//				if((max-min)*QP < 500)
-//				if(max-min<QP/2)
-				if(max-min < 20)
-				{
-					static int numSkiped=0;
-					static int errorSum=0;
-					static int worstQP=0;
-					static int worstRange=0;
-					static int worstDiff=0;
-					int diff= (f - *p);
-					int absDiff= ABS(diff);
-					int error= diff*diff;
-
-					if(x==1 || x==8 || y==1 || y==8) continue;
-
-					numSkiped++;
-					if(absDiff > worstDiff)
-					{
-						worstDiff= absDiff;
-						worstQP= QP;
-						worstRange= max-min;
-					}
-					errorSum+= error;
-
-					if(1024LL*1024LL*1024LL % numSkiped == 0)
-					{
-						printf( "sum:%1.3f, skip:%d, wQP:%d, "
-							"wRange:%d, wDiff:%d, relSkip:%1.3f\n",
-							(float)errorSum/numSkiped, numSkiped, worstQP, worstRange,
-							worstDiff, (float)numSkiped/numPixels);
-					}
-				}
-				}
+                                asm volatile("emms\n\t":);
+                                {
+                                static long long numPixels=0;
+                                if(x!=1 && x!=8 && y!=1 && y!=8) numPixels++;
+//                                if((max-min)<20 || (max-min)*QP<200)
+//                                if((max-min)*QP < 500)
+//                                if(max-min<QP/2)
+                                if(max-min < 20)
+                                {
+                                        static int numSkiped=0;
+                                        static int errorSum=0;
+                                        static int worstQP=0;
+                                        static int worstRange=0;
+                                        static int worstDiff=0;
+                                        int diff= (f - *p);
+                                        int absDiff= ABS(diff);
+                                        int error= diff*diff;
+
+                                        if(x==1 || x==8 || y==1 || y==8) continue;
+
+                                        numSkiped++;
+                                        if(absDiff > worstDiff)
+                                        {
+                                                worstDiff= absDiff;
+                                                worstQP= QP;
+                                                worstRange= max-min;
+                                        }
+                                        errorSum+= error;
+
+                                        if(1024LL*1024LL*1024LL % numSkiped == 0)
+                                        {
+                                                printf( "sum:%1.3f, skip:%d, wQP:%d, "
+                                                        "wRange:%d, wDiff:%d, relSkip:%1.3f\n",
+                                                        (float)errorSum/numSkiped, numSkiped, worstQP, worstRange,
+                                                        worstDiff, (float)numSkiped/numPixels);
+                                        }
+                                }
+                                }
 #endif
-				if     (*p + QP2 < f) *p= *p + QP2;
-				else if(*p - QP2 > f) *p= *p - QP2;
-				else *p=f;
-			}
-		}
-	}
+                                if     (*p + QP2 < f) *p= *p + QP2;
+                                else if(*p - QP2 > f) *p= *p - QP2;
+                                else *p=f;
+                        }
+                }
+        }
 #ifdef DEBUG_DERING_THRESHOLD
-	if(max-min < 20)
-	{
-		for(y=1; y<9; y++)
-		{
-			int x;
-			int t = 0;
-			p= src + stride*y;
-			for(x=1; x<9; x++)
-			{
-				p++;
-				*p = MIN(*p + 20, 255);
-			}
-		}
-//		src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255;
-	}
-#endif
+        if(max-min < 20)
+        {
+                for(y=1; y<9; y++)
+                {
+                        int x;
+                        int t = 0;
+                        p= src + stride*y;
+                        for(x=1; x<9; x++)
+                        {
+                                p++;
+                                *p = MIN(*p + 20, 255);
+                        }
+                }
+//                src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255;
+        }
 #endif
+#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
 }
 #endif //HAVE_ALTIVEC
 
@@ -1549,46 +1549,46 @@ DERING_CORE((%0, %1, 8),(%%REGd, %1, 4) ,%%mm2,%%mm4,%%mm0,%%mm3,%%mm5,%%mm1,%%m
 static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride)
 {
 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
-	src+= 4*stride;
-	asm volatile(
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_c"		\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	ecx	ecx+%1	ecx+2%1	%0+8%1	ecx+4%1
-
-		"movq (%0), %%mm0				\n\t"
-		"movq (%%"REG_a", %1), %%mm1			\n\t"
-		PAVGB(%%mm1, %%mm0)
-		"movq %%mm0, (%%"REG_a")			\n\t"
-		"movq (%0, %1, 4), %%mm0			\n\t"
-		PAVGB(%%mm0, %%mm1)
-		"movq %%mm1, (%%"REG_a", %1, 2)			\n\t"
-		"movq (%%"REG_c", %1), %%mm1			\n\t"
-		PAVGB(%%mm1, %%mm0)
-		"movq %%mm0, (%%"REG_c")			\n\t"
-		"movq (%0, %1, 8), %%mm0			\n\t"
-		PAVGB(%%mm0, %%mm1)
-		"movq %%mm1, (%%"REG_c", %1, 2)			\n\t"
-
-		: : "r" (src), "r" ((long)stride)
-		: "%"REG_a, "%"REG_c
-	);
+        src+= 4*stride;
+        asm volatile(
+                "lea (%0, %1), %%"REG_a"                \n\t"
+                "lea (%%"REG_a", %1, 4), %%"REG_c"      \n\t"
+//      0       1       2       3       4       5       6       7       8       9
+//      %0      eax     eax+%1  eax+2%1 %0+4%1  ecx     ecx+%1  ecx+2%1 %0+8%1  ecx+4%1
+
+                "movq (%0), %%mm0                       \n\t"
+                "movq (%%"REG_a", %1), %%mm1            \n\t"
+                PAVGB(%%mm1, %%mm0)
+                "movq %%mm0, (%%"REG_a")                \n\t"
+                "movq (%0, %1, 4), %%mm0                \n\t"
+                PAVGB(%%mm0, %%mm1)
+                "movq %%mm1, (%%"REG_a", %1, 2)         \n\t"
+                "movq (%%"REG_c", %1), %%mm1            \n\t"
+                PAVGB(%%mm1, %%mm0)
+                "movq %%mm0, (%%"REG_c")                \n\t"
+                "movq (%0, %1, 8), %%mm0                \n\t"
+                PAVGB(%%mm0, %%mm1)
+                "movq %%mm1, (%%"REG_c", %1, 2)         \n\t"
+
+                : : "r" (src), "r" ((long)stride)
+                : "%"REG_a, "%"REG_c
+        );
 #else
-	int a, b, x;
-	src+= 4*stride;
-
-	for(x=0; x<2; x++){
-		a= *(uint32_t*)&src[stride*0];
-		b= *(uint32_t*)&src[stride*2];
-		*(uint32_t*)&src[stride*1]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
-		a= *(uint32_t*)&src[stride*4];
-		*(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
-		b= *(uint32_t*)&src[stride*6];
-		*(uint32_t*)&src[stride*5]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
-		a= *(uint32_t*)&src[stride*8];
-		*(uint32_t*)&src[stride*7]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
-		src += 4;
-	}
+        int a, b, x;
+        src+= 4*stride;
+
+        for(x=0; x<2; x++){
+                a= *(uint32_t*)&src[stride*0];
+                b= *(uint32_t*)&src[stride*2];
+                *(uint32_t*)&src[stride*1]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
+                a= *(uint32_t*)&src[stride*4];
+                *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
+                b= *(uint32_t*)&src[stride*6];
+                *(uint32_t*)&src[stride*5]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
+                a= *(uint32_t*)&src[stride*8];
+                *(uint32_t*)&src[stride*7]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
+                src += 4;
+        }
 #endif
 }
 
@@ -1602,59 +1602,59 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid
 static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride)
 {
 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
-	src+= stride*3;
-	asm volatile(
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_d"		\n\t"
-		"lea (%%"REG_d", %1, 4), %%"REG_c"		\n\t"
-		"add %1, %%"REG_c"				\n\t"
-		"pxor %%mm7, %%mm7				\n\t"
-//	0	1	2	3	4	5	6	7	8	9	10
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1 ecx
+        src+= stride*3;
+        asm volatile(
+                "lea (%0, %1), %%"REG_a"                \n\t"
+                "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
+                "lea (%%"REG_d", %1, 4), %%"REG_c"      \n\t"
+                "add %1, %%"REG_c"                      \n\t"
+                "pxor %%mm7, %%mm7                      \n\t"
+//      0       1       2       3       4       5       6       7       8       9       10
+//      %0      eax     eax+%1  eax+2%1 %0+4%1  edx     edx+%1  edx+2%1 %0+8%1  edx+4%1 ecx
 
 #define REAL_DEINT_CUBIC(a,b,c,d,e)\
-		"movq " #a ", %%mm0				\n\t"\
-		"movq " #b ", %%mm1				\n\t"\
-		"movq " #d ", %%mm2				\n\t"\
-		"movq " #e ", %%mm3				\n\t"\
-		PAVGB(%%mm2, %%mm1)					/* (b+d) /2 */\
-		PAVGB(%%mm3, %%mm0)					/* a(a+e) /2 */\
-		"movq %%mm0, %%mm2				\n\t"\
-		"punpcklbw %%mm7, %%mm0				\n\t"\
-		"punpckhbw %%mm7, %%mm2				\n\t"\
-		"movq %%mm1, %%mm3				\n\t"\
-		"punpcklbw %%mm7, %%mm1				\n\t"\
-		"punpckhbw %%mm7, %%mm3				\n\t"\
-		"psubw %%mm1, %%mm0				\n\t"	/* L(a+e - (b+d))/2 */\
-		"psubw %%mm3, %%mm2				\n\t"	/* H(a+e - (b+d))/2 */\
-		"psraw $3, %%mm0				\n\t"	/* L(a+e - (b+d))/16 */\
-		"psraw $3, %%mm2				\n\t"	/* H(a+e - (b+d))/16 */\
-		"psubw %%mm0, %%mm1				\n\t"	/* L(9b + 9d - a - e)/16 */\
-		"psubw %%mm2, %%mm3				\n\t"	/* H(9b + 9d - a - e)/16 */\
-		"packuswb %%mm3, %%mm1				\n\t"\
-		"movq %%mm1, " #c "				\n\t"
+                "movq " #a ", %%mm0                     \n\t"\
+                "movq " #b ", %%mm1                     \n\t"\
+                "movq " #d ", %%mm2                     \n\t"\
+                "movq " #e ", %%mm3                     \n\t"\
+                PAVGB(%%mm2, %%mm1)                             /* (b+d) /2 */\
+                PAVGB(%%mm3, %%mm0)                             /* a(a+e) /2 */\
+                "movq %%mm0, %%mm2                      \n\t"\
+                "punpcklbw %%mm7, %%mm0                 \n\t"\
+                "punpckhbw %%mm7, %%mm2                 \n\t"\
+                "movq %%mm1, %%mm3                      \n\t"\
+                "punpcklbw %%mm7, %%mm1                 \n\t"\
+                "punpckhbw %%mm7, %%mm3                 \n\t"\
+                "psubw %%mm1, %%mm0                     \n\t"   /* L(a+e - (b+d))/2 */\
+                "psubw %%mm3, %%mm2                     \n\t"   /* H(a+e - (b+d))/2 */\
+                "psraw $3, %%mm0                        \n\t"   /* L(a+e - (b+d))/16 */\
+                "psraw $3, %%mm2                        \n\t"   /* H(a+e - (b+d))/16 */\
+                "psubw %%mm0, %%mm1                     \n\t"   /* L(9b + 9d - a - e)/16 */\
+                "psubw %%mm2, %%mm3                     \n\t"   /* H(9b + 9d - a - e)/16 */\
+                "packuswb %%mm3, %%mm1                  \n\t"\
+                "movq %%mm1, " #c "                     \n\t"
 #define DEINT_CUBIC(a,b,c,d,e)  REAL_DEINT_CUBIC(a,b,c,d,e)
 
-DEINT_CUBIC((%0), (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4), (%%REGd, %1))
-DEINT_CUBIC((%%REGa, %1), (%0, %1, 4), (%%REGd), (%%REGd, %1), (%0, %1, 8))
-DEINT_CUBIC((%0, %1, 4), (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8), (%%REGc))
-DEINT_CUBIC((%%REGd, %1), (%0, %1, 8), (%%REGd, %1, 4), (%%REGc), (%%REGc, %1, 2))
-
-		: : "r" (src), "r" ((long)stride)
-		: "%"REG_a, "%"REG_d, "%"REG_c
-	);
-#else
-	int x;
-	src+= stride*3;
-	for(x=0; x<8; x++)
-	{
-		src[stride*3] = CLIP((-src[0]        + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4);
-		src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4);
-		src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4);
-		src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
-		src++;
-	}
-#endif
+DEINT_CUBIC((%0)        , (%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd, %1))
+DEINT_CUBIC((%%REGa, %1), (%0, %1, 4) , (%%REGd)       , (%%REGd, %1), (%0, %1, 8))
+DEINT_CUBIC((%0, %1, 4) , (%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGc))
+DEINT_CUBIC((%%REGd, %1), (%0, %1, 8) , (%%REGd, %1, 4), (%%REGc)    , (%%REGc, %1, 2))
+
+                : : "r" (src), "r" ((long)stride)
+                : "%"REG_a, "%"REG_d, "%"REG_c
+        );
+#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+        int x;
+        src+= stride*3;
+        for(x=0; x<8; x++)
+        {
+                src[stride*3] = CLIP((-src[0]        + 9*src[stride*2] + 9*src[stride*4] - src[stride*6])>>4);
+                src[stride*5] = CLIP((-src[stride*2] + 9*src[stride*4] + 9*src[stride*6] - src[stride*8])>>4);
+                src[stride*7] = CLIP((-src[stride*4] + 9*src[stride*6] + 9*src[stride*8] - src[stride*10])>>4);
+                src[stride*9] = CLIP((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4);
+                src++;
+        }
+#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
 }
 
 /**
@@ -1667,74 +1667,74 @@ DEINT_CUBIC((%%REGd, %1), (%0, %1, 8), (%%REGd, %1, 4), (%%REGc), (%%REGc, %1, 2
 static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp)
 {
 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
-	src+= stride*4;
-	asm volatile(
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_d"		\n\t"
-		"pxor %%mm7, %%mm7				\n\t"
-		"movq (%2), %%mm0				\n\t"
-//	0	1	2	3	4	5	6	7	8	9	10
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1 ecx
+        src+= stride*4;
+        asm volatile(
+                "lea (%0, %1), %%"REG_a"                \n\t"
+                "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
+                "pxor %%mm7, %%mm7                      \n\t"
+                "movq (%2), %%mm0                       \n\t"
+//      0       1       2       3       4       5       6       7       8       9       10
+//      %0      eax     eax+%1  eax+2%1 %0+4%1  edx     edx+%1  edx+2%1 %0+8%1  edx+4%1 ecx
 
 #define REAL_DEINT_FF(a,b,c,d)\
-		"movq " #a ", %%mm1				\n\t"\
-		"movq " #b ", %%mm2				\n\t"\
-		"movq " #c ", %%mm3				\n\t"\
-		"movq " #d ", %%mm4				\n\t"\
-		PAVGB(%%mm3, %%mm1)					\
-		PAVGB(%%mm4, %%mm0)					\
-		"movq %%mm0, %%mm3				\n\t"\
-		"punpcklbw %%mm7, %%mm0				\n\t"\
-		"punpckhbw %%mm7, %%mm3				\n\t"\
-		"movq %%mm1, %%mm4				\n\t"\
-		"punpcklbw %%mm7, %%mm1				\n\t"\
-		"punpckhbw %%mm7, %%mm4				\n\t"\
-		"psllw $2, %%mm1				\n\t"\
-		"psllw $2, %%mm4				\n\t"\
-		"psubw %%mm0, %%mm1				\n\t"\
-		"psubw %%mm3, %%mm4				\n\t"\
-		"movq %%mm2, %%mm5				\n\t"\
-		"movq %%mm2, %%mm0				\n\t"\
-		"punpcklbw %%mm7, %%mm2				\n\t"\
-		"punpckhbw %%mm7, %%mm5				\n\t"\
-		"paddw %%mm2, %%mm1				\n\t"\
-		"paddw %%mm5, %%mm4				\n\t"\
-		"psraw $2, %%mm1				\n\t"\
-		"psraw $2, %%mm4				\n\t"\
-		"packuswb %%mm4, %%mm1				\n\t"\
-		"movq %%mm1, " #b "				\n\t"\
+                "movq " #a ", %%mm1                     \n\t"\
+                "movq " #b ", %%mm2                     \n\t"\
+                "movq " #c ", %%mm3                     \n\t"\
+                "movq " #d ", %%mm4                     \n\t"\
+                PAVGB(%%mm3, %%mm1)                          \
+                PAVGB(%%mm4, %%mm0)                          \
+                "movq %%mm0, %%mm3                      \n\t"\
+                "punpcklbw %%mm7, %%mm0                 \n\t"\
+                "punpckhbw %%mm7, %%mm3                 \n\t"\
+                "movq %%mm1, %%mm4                      \n\t"\
+                "punpcklbw %%mm7, %%mm1                 \n\t"\
+                "punpckhbw %%mm7, %%mm4                 \n\t"\
+                "psllw $2, %%mm1                        \n\t"\
+                "psllw $2, %%mm4                        \n\t"\
+                "psubw %%mm0, %%mm1                     \n\t"\
+                "psubw %%mm3, %%mm4                     \n\t"\
+                "movq %%mm2, %%mm5                      \n\t"\
+                "movq %%mm2, %%mm0                      \n\t"\
+                "punpcklbw %%mm7, %%mm2                 \n\t"\
+                "punpckhbw %%mm7, %%mm5                 \n\t"\
+                "paddw %%mm2, %%mm1                     \n\t"\
+                "paddw %%mm5, %%mm4                     \n\t"\
+                "psraw $2, %%mm1                        \n\t"\
+                "psraw $2, %%mm4                        \n\t"\
+                "packuswb %%mm4, %%mm1                  \n\t"\
+                "movq %%mm1, " #b "                     \n\t"\
 
 #define DEINT_FF(a,b,c,d)  REAL_DEINT_FF(a,b,c,d)
 
-DEINT_FF((%0)       ,  (%%REGa)       , (%%REGa, %1), (%%REGa, %1, 2))
-DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4),  (%%REGd)       )
-DEINT_FF((%0, %1, 4),  (%%REGd)       , (%%REGd, %1), (%%REGd, %1, 2))
-DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8),  (%%REGd, %1, 4))
-
-		"movq %%mm0, (%2)				\n\t"
-		: : "r" (src), "r" ((long)stride), "r"(tmp)
-		: "%"REG_a, "%"REG_d
-	);
-#else
-	int x;
-	src+= stride*4;
-	for(x=0; x<8; x++)
-	{
-		int t1= tmp[x];
-		int t2= src[stride*1];
-
-		src[stride*1]= CLIP((-t1 + 4*src[stride*0] + 2*t2 + 4*src[stride*2] - src[stride*3] + 4)>>3);
-		t1= src[stride*4];
-		src[stride*3]= CLIP((-t2 + 4*src[stride*2] + 2*t1 + 4*src[stride*4] - src[stride*5] + 4)>>3);
-		t2= src[stride*6];
-		src[stride*5]= CLIP((-t1 + 4*src[stride*4] + 2*t2 + 4*src[stride*6] - src[stride*7] + 4)>>3);
-		t1= src[stride*8];
-		src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3);
-		tmp[x]= t1;
-
-		src++;
-	}
-#endif
+DEINT_FF((%0)        , (%%REGa)       , (%%REGa, %1), (%%REGa, %1, 2))
+DEINT_FF((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4) , (%%REGd)       )
+DEINT_FF((%0, %1, 4) , (%%REGd)       , (%%REGd, %1), (%%REGd, %1, 2))
+DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8) , (%%REGd, %1, 4))
+
+                "movq %%mm0, (%2)                       \n\t"
+                : : "r" (src), "r" ((long)stride), "r"(tmp)
+                : "%"REG_a, "%"REG_d
+        );
+#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+        int x;
+        src+= stride*4;
+        for(x=0; x<8; x++)
+        {
+                int t1= tmp[x];
+                int t2= src[stride*1];
+
+                src[stride*1]= CLIP((-t1 + 4*src[stride*0] + 2*t2 + 4*src[stride*2] - src[stride*3] + 4)>>3);
+                t1= src[stride*4];
+                src[stride*3]= CLIP((-t2 + 4*src[stride*2] + 2*t1 + 4*src[stride*4] - src[stride*5] + 4)>>3);
+                t2= src[stride*6];
+                src[stride*5]= CLIP((-t1 + 4*src[stride*4] + 2*t2 + 4*src[stride*6] - src[stride*7] + 4)>>3);
+                t1= src[stride*8];
+                src[stride*7]= CLIP((-t2 + 4*src[stride*6] + 2*t1 + 4*src[stride*8] - src[stride*9] + 4)>>3);
+                tmp[x]= t1;
+
+                src++;
+        }
+#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
 }
 
 /**
@@ -1747,48 +1747,48 @@ DEINT_FF((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8),  (%%REGd, %1, 4))
 static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2)
 {
 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
-	src+= stride*4;
-	asm volatile(
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_d"		\n\t"
-		"pxor %%mm7, %%mm7				\n\t"
-		"movq (%2), %%mm0				\n\t"
-		"movq (%3), %%mm1				\n\t"
-//	0	1	2	3	4	5	6	7	8	9	10
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1 ecx
+        src+= stride*4;
+        asm volatile(
+                "lea (%0, %1), %%"REG_a"                \n\t"
+                "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
+                "pxor %%mm7, %%mm7                      \n\t"
+                "movq (%2), %%mm0                       \n\t"
+                "movq (%3), %%mm1                       \n\t"
+//      0       1       2       3       4       5       6       7       8       9       10
+//      %0      eax     eax+%1  eax+2%1 %0+4%1  edx     edx+%1  edx+2%1 %0+8%1  edx+4%1 ecx
 
 #define REAL_DEINT_L5(t1,t2,a,b,c)\
-		"movq " #a ", %%mm2				\n\t"\
-		"movq " #b ", %%mm3				\n\t"\
-		"movq " #c ", %%mm4				\n\t"\
-		PAVGB(t2, %%mm3)					\
-		PAVGB(t1, %%mm4)					\
-		"movq %%mm2, %%mm5				\n\t"\
-		"movq %%mm2, " #t1 "				\n\t"\
-		"punpcklbw %%mm7, %%mm2				\n\t"\
-		"punpckhbw %%mm7, %%mm5				\n\t"\
-		"movq %%mm2, %%mm6				\n\t"\
-		"paddw %%mm2, %%mm2				\n\t"\
-		"paddw %%mm6, %%mm2				\n\t"\
-		"movq %%mm5, %%mm6				\n\t"\
-		"paddw %%mm5, %%mm5				\n\t"\
-		"paddw %%mm6, %%mm5				\n\t"\
-		"movq %%mm3, %%mm6				\n\t"\
-		"punpcklbw %%mm7, %%mm3				\n\t"\
-		"punpckhbw %%mm7, %%mm6				\n\t"\
-		"paddw %%mm3, %%mm3				\n\t"\
-		"paddw %%mm6, %%mm6				\n\t"\
-		"paddw %%mm3, %%mm2				\n\t"\
-		"paddw %%mm6, %%mm5				\n\t"\
-		"movq %%mm4, %%mm6				\n\t"\
-		"punpcklbw %%mm7, %%mm4				\n\t"\
-		"punpckhbw %%mm7, %%mm6				\n\t"\
-		"psubw %%mm4, %%mm2				\n\t"\
-		"psubw %%mm6, %%mm5				\n\t"\
-		"psraw $2, %%mm2				\n\t"\
-		"psraw $2, %%mm5				\n\t"\
-		"packuswb %%mm5, %%mm2				\n\t"\
-		"movq %%mm2, " #a "				\n\t"\
+                "movq " #a ", %%mm2                     \n\t"\
+                "movq " #b ", %%mm3                     \n\t"\
+                "movq " #c ", %%mm4                     \n\t"\
+                PAVGB(t2, %%mm3)                             \
+                PAVGB(t1, %%mm4)                             \
+                "movq %%mm2, %%mm5                      \n\t"\
+                "movq %%mm2, " #t1 "                    \n\t"\
+                "punpcklbw %%mm7, %%mm2                 \n\t"\
+                "punpckhbw %%mm7, %%mm5                 \n\t"\
+                "movq %%mm2, %%mm6                      \n\t"\
+                "paddw %%mm2, %%mm2                     \n\t"\
+                "paddw %%mm6, %%mm2                     \n\t"\
+                "movq %%mm5, %%mm6                      \n\t"\
+                "paddw %%mm5, %%mm5                     \n\t"\
+                "paddw %%mm6, %%mm5                     \n\t"\
+                "movq %%mm3, %%mm6                      \n\t"\
+                "punpcklbw %%mm7, %%mm3                 \n\t"\
+                "punpckhbw %%mm7, %%mm6                 \n\t"\
+                "paddw %%mm3, %%mm3                     \n\t"\
+                "paddw %%mm6, %%mm6                     \n\t"\
+                "paddw %%mm3, %%mm2                     \n\t"\
+                "paddw %%mm6, %%mm5                     \n\t"\
+                "movq %%mm4, %%mm6                      \n\t"\
+                "punpcklbw %%mm7, %%mm4                 \n\t"\
+                "punpckhbw %%mm7, %%mm6                 \n\t"\
+                "psubw %%mm4, %%mm2                     \n\t"\
+                "psubw %%mm6, %%mm5                     \n\t"\
+                "psraw $2, %%mm2                        \n\t"\
+                "psraw $2, %%mm5                        \n\t"\
+                "packuswb %%mm5, %%mm2                  \n\t"\
+                "movq %%mm2, " #a "                     \n\t"\
 
 #define DEINT_L5(t1,t2,a,b,c)  REAL_DEINT_L5(t1,t2,a,b,c)
 
@@ -1796,47 +1796,47 @@ DEINT_L5(%%mm0, %%mm1, (%0)           , (%%REGa)       , (%%REGa, %1)   )
 DEINT_L5(%%mm1, %%mm0, (%%REGa)       , (%%REGa, %1)   , (%%REGa, %1, 2))
 DEINT_L5(%%mm0, %%mm1, (%%REGa, %1)   , (%%REGa, %1, 2), (%0, %1, 4)   )
 DEINT_L5(%%mm1, %%mm0, (%%REGa, %1, 2), (%0, %1, 4)    , (%%REGd)       )
-DEINT_L5(%%mm0, %%mm1, (%0, %1, 4)    , (%%REGd)       , (%%REGd, %1)   )  
+DEINT_L5(%%mm0, %%mm1, (%0, %1, 4)    , (%%REGd)       , (%%REGd, %1)   )
 DEINT_L5(%%mm1, %%mm0, (%%REGd)       , (%%REGd, %1)   , (%%REGd, %1, 2))
 DEINT_L5(%%mm0, %%mm1, (%%REGd, %1)   , (%%REGd, %1, 2), (%0, %1, 8)   )
 DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8)    , (%%REGd, %1, 4))
 
-		"movq %%mm0, (%2)				\n\t"
-		"movq %%mm1, (%3)				\n\t"
-		: : "r" (src), "r" ((long)stride), "r"(tmp), "r"(tmp2)
-		: "%"REG_a, "%"REG_d
-	);
-#else
-	int x;
-	src+= stride*4;
-	for(x=0; x<8; x++)
-	{
-		int t1= tmp[x];
-		int t2= tmp2[x];
-		int t3= src[0];
-
-		src[stride*0]= CLIP((-(t1 + src[stride*2]) + 2*(t2 + src[stride*1]) + 6*t3 + 4)>>3);
-		t1= src[stride*1];
-		src[stride*1]= CLIP((-(t2 + src[stride*3]) + 2*(t3 + src[stride*2]) + 6*t1 + 4)>>3);
-		t2= src[stride*2];
-		src[stride*2]= CLIP((-(t3 + src[stride*4]) + 2*(t1 + src[stride*3]) + 6*t2 + 4)>>3);
-		t3= src[stride*3];
-		src[stride*3]= CLIP((-(t1 + src[stride*5]) + 2*(t2 + src[stride*4]) + 6*t3 + 4)>>3);
-		t1= src[stride*4];
-		src[stride*4]= CLIP((-(t2 + src[stride*6]) + 2*(t3 + src[stride*5]) + 6*t1 + 4)>>3);
-		t2= src[stride*5];
-		src[stride*5]= CLIP((-(t3 + src[stride*7]) + 2*(t1 + src[stride*6]) + 6*t2 + 4)>>3);
-		t3= src[stride*6];
-		src[stride*6]= CLIP((-(t1 + src[stride*8]) + 2*(t2 + src[stride*7]) + 6*t3 + 4)>>3);
-		t1= src[stride*7];
-		src[stride*7]= CLIP((-(t2 + src[stride*9]) + 2*(t3 + src[stride*8]) + 6*t1 + 4)>>3);
-
-		tmp[x]= t3;
-		tmp2[x]= t1;
-
-		src++;
-	}
-#endif
+                "movq %%mm0, (%2)                       \n\t"
+                "movq %%mm1, (%3)                       \n\t"
+                : : "r" (src), "r" ((long)stride), "r"(tmp), "r"(tmp2)
+                : "%"REG_a, "%"REG_d
+        );
+#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+        int x;
+        src+= stride*4;
+        for(x=0; x<8; x++)
+        {
+                int t1= tmp[x];
+                int t2= tmp2[x];
+                int t3= src[0];
+
+                src[stride*0]= CLIP((-(t1 + src[stride*2]) + 2*(t2 + src[stride*1]) + 6*t3 + 4)>>3);
+                t1= src[stride*1];
+                src[stride*1]= CLIP((-(t2 + src[stride*3]) + 2*(t3 + src[stride*2]) + 6*t1 + 4)>>3);
+                t2= src[stride*2];
+                src[stride*2]= CLIP((-(t3 + src[stride*4]) + 2*(t1 + src[stride*3]) + 6*t2 + 4)>>3);
+                t3= src[stride*3];
+                src[stride*3]= CLIP((-(t1 + src[stride*5]) + 2*(t2 + src[stride*4]) + 6*t3 + 4)>>3);
+                t1= src[stride*4];
+                src[stride*4]= CLIP((-(t2 + src[stride*6]) + 2*(t3 + src[stride*5]) + 6*t1 + 4)>>3);
+                t2= src[stride*5];
+                src[stride*5]= CLIP((-(t3 + src[stride*7]) + 2*(t1 + src[stride*6]) + 6*t2 + 4)>>3);
+                t3= src[stride*6];
+                src[stride*6]= CLIP((-(t1 + src[stride*8]) + 2*(t2 + src[stride*7]) + 6*t3 + 4)>>3);
+                t1= src[stride*7];
+                src[stride*7]= CLIP((-(t2 + src[stride*9]) + 2*(t3 + src[stride*8]) + 6*t1 + 4)>>3);
+
+                tmp[x]= t3;
+                tmp2[x]= t1;
+
+                src++;
+        }
+#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
 }
 
 /**
@@ -1849,96 +1849,96 @@ DEINT_L5(%%mm1, %%mm0, (%%REGd, %1, 2), (%0, %1, 8)    , (%%REGd, %1, 4))
 static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp)
 {
 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
-	src+= 4*stride;
-	asm volatile(
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_d"		\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1
-
-		"movq (%2), %%mm0				\n\t" // L0
-		"movq (%%"REG_a"), %%mm1			\n\t" // L2
-		PAVGB(%%mm1, %%mm0)				      // L0+L2
-		"movq (%0), %%mm2				\n\t" // L1
-		PAVGB(%%mm2, %%mm0)
-		"movq %%mm0, (%0)				\n\t"
-		"movq (%%"REG_a", %1), %%mm0			\n\t" // L3
-		PAVGB(%%mm0, %%mm2)				      // L1+L3
-		PAVGB(%%mm1, %%mm2)				      // 2L2 + L1 + L3
-		"movq %%mm2, (%%"REG_a")			\n\t"
-		"movq (%%"REG_a", %1, 2), %%mm2			\n\t" // L4
-		PAVGB(%%mm2, %%mm1)				      // L2+L4
-		PAVGB(%%mm0, %%mm1)				      // 2L3 + L2 + L4
-		"movq %%mm1, (%%"REG_a", %1)			\n\t"
-		"movq (%0, %1, 4), %%mm1			\n\t" // L5
-		PAVGB(%%mm1, %%mm0)				      // L3+L5
-		PAVGB(%%mm2, %%mm0)				      // 2L4 + L3 + L5
-		"movq %%mm0, (%%"REG_a", %1, 2)			\n\t"
-		"movq (%%"REG_d"), %%mm0			\n\t" // L6
-		PAVGB(%%mm0, %%mm2)				      // L4+L6
-		PAVGB(%%mm1, %%mm2)				      // 2L5 + L4 + L6
-		"movq %%mm2, (%0, %1, 4)			\n\t"
-		"movq (%%"REG_d", %1), %%mm2			\n\t" // L7
-		PAVGB(%%mm2, %%mm1)				      // L5+L7
-		PAVGB(%%mm0, %%mm1)				      // 2L6 + L5 + L7
-		"movq %%mm1, (%%"REG_d")			\n\t"
-		"movq (%%"REG_d", %1, 2), %%mm1			\n\t" // L8
-		PAVGB(%%mm1, %%mm0)				      // L6+L8
-		PAVGB(%%mm2, %%mm0)				      // 2L7 + L6 + L8
-		"movq %%mm0, (%%"REG_d", %1)			\n\t"
-		"movq (%0, %1, 8), %%mm0			\n\t" // L9
-		PAVGB(%%mm0, %%mm2)				      // L7+L9
-		PAVGB(%%mm1, %%mm2)				      // 2L8 + L7 + L9
-		"movq %%mm2, (%%"REG_d", %1, 2)			\n\t"
-		"movq %%mm1, (%2)				\n\t"
-
-		: : "r" (src), "r" ((long)stride), "r" (tmp)
-		: "%"REG_a, "%"REG_d
-	);
-#else
-	int a, b, c, x;
-	src+= 4*stride;
-
-	for(x=0; x<2; x++){
-		a= *(uint32_t*)&tmp[stride*0];
-		b= *(uint32_t*)&src[stride*0];
-		c= *(uint32_t*)&src[stride*1];
-		a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
-		*(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
-
-		a= *(uint32_t*)&src[stride*2];
-		b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
-		*(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
-
-		b= *(uint32_t*)&src[stride*3];
-		c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
-		*(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
-
-		c= *(uint32_t*)&src[stride*4];
-		a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
-		*(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
-
-		a= *(uint32_t*)&src[stride*5];
-		b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
-		*(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
-
-		b= *(uint32_t*)&src[stride*6];
-		c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
-		*(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
-
-		c= *(uint32_t*)&src[stride*7];
-		a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
-		*(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
-
-		a= *(uint32_t*)&src[stride*8];
-		b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
-		*(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
-
-		*(uint32_t*)&tmp[stride*0]= c;
-		src += 4;
-		tmp += 4;
-	}
-#endif
+        src+= 4*stride;
+        asm volatile(
+                "lea (%0, %1), %%"REG_a"                \n\t"
+                "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
+//      0       1       2       3       4       5       6       7       8       9
+//      %0      eax     eax+%1  eax+2%1 %0+4%1  edx     edx+%1  edx+2%1 %0+8%1  edx+4%1
+
+                "movq (%2), %%mm0                       \n\t" // L0
+                "movq (%%"REG_a"), %%mm1                \n\t" // L2
+                PAVGB(%%mm1, %%mm0)                           // L0+L2
+                "movq (%0), %%mm2                       \n\t" // L1
+                PAVGB(%%mm2, %%mm0)
+                "movq %%mm0, (%0)                       \n\t"
+                "movq (%%"REG_a", %1), %%mm0            \n\t" // L3
+                PAVGB(%%mm0, %%mm2)                           // L1+L3
+                PAVGB(%%mm1, %%mm2)                           // 2L2 + L1 + L3
+                "movq %%mm2, (%%"REG_a")                \n\t"
+                "movq (%%"REG_a", %1, 2), %%mm2         \n\t" // L4
+                PAVGB(%%mm2, %%mm1)                           // L2+L4
+                PAVGB(%%mm0, %%mm1)                           // 2L3 + L2 + L4
+                "movq %%mm1, (%%"REG_a", %1)            \n\t"
+                "movq (%0, %1, 4), %%mm1                \n\t" // L5
+                PAVGB(%%mm1, %%mm0)                           // L3+L5
+                PAVGB(%%mm2, %%mm0)                           // 2L4 + L3 + L5
+                "movq %%mm0, (%%"REG_a", %1, 2)         \n\t"
+                "movq (%%"REG_d"), %%mm0                \n\t" // L6
+                PAVGB(%%mm0, %%mm2)                           // L4+L6
+                PAVGB(%%mm1, %%mm2)                           // 2L5 + L4 + L6
+                "movq %%mm2, (%0, %1, 4)                \n\t"
+                "movq (%%"REG_d", %1), %%mm2            \n\t" // L7
+                PAVGB(%%mm2, %%mm1)                           // L5+L7
+                PAVGB(%%mm0, %%mm1)                           // 2L6 + L5 + L7
+                "movq %%mm1, (%%"REG_d")                \n\t"
+                "movq (%%"REG_d", %1, 2), %%mm1         \n\t" // L8
+                PAVGB(%%mm1, %%mm0)                           // L6+L8
+                PAVGB(%%mm2, %%mm0)                           // 2L7 + L6 + L8
+                "movq %%mm0, (%%"REG_d", %1)            \n\t"
+                "movq (%0, %1, 8), %%mm0                \n\t" // L9
+                PAVGB(%%mm0, %%mm2)                           // L7+L9
+                PAVGB(%%mm1, %%mm2)                           // 2L8 + L7 + L9
+                "movq %%mm2, (%%"REG_d", %1, 2)         \n\t"
+                "movq %%mm1, (%2)                       \n\t"
+
+                : : "r" (src), "r" ((long)stride), "r" (tmp)
+                : "%"REG_a, "%"REG_d
+        );
+#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+        int a, b, c, x;
+        src+= 4*stride;
+
+        for(x=0; x<2; x++){
+                a= *(uint32_t*)&tmp[stride*0];
+                b= *(uint32_t*)&src[stride*0];
+                c= *(uint32_t*)&src[stride*1];
+                a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
+                *(uint32_t*)&src[stride*0]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
+
+                a= *(uint32_t*)&src[stride*2];
+                b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
+                *(uint32_t*)&src[stride*1]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
+
+                b= *(uint32_t*)&src[stride*3];
+                c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
+                *(uint32_t*)&src[stride*2]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
+
+                c= *(uint32_t*)&src[stride*4];
+                a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
+                *(uint32_t*)&src[stride*3]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
+
+                a= *(uint32_t*)&src[stride*5];
+                b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
+                *(uint32_t*)&src[stride*4]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
+
+                b= *(uint32_t*)&src[stride*6];
+                c= (b&c) + (((b^c)&0xFEFEFEFEUL)>>1);
+                *(uint32_t*)&src[stride*5]= (c|a) - (((c^a)&0xFEFEFEFEUL)>>1);
+
+                c= *(uint32_t*)&src[stride*7];
+                a= (a&c) + (((a^c)&0xFEFEFEFEUL)>>1);
+                *(uint32_t*)&src[stride*6]= (a|b) - (((a^b)&0xFEFEFEFEUL)>>1);
+
+                a= *(uint32_t*)&src[stride*8];
+                b= (a&b) + (((a^b)&0xFEFEFEFEUL)>>1);
+                *(uint32_t*)&src[stride*7]= (c|b) - (((c^b)&0xFEFEFEFEUL)>>1);
+
+                *(uint32_t*)&tmp[stride*0]= c;
+                src += 4;
+                tmp += 4;
+        }
+#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
 }
 
 /**
@@ -1950,120 +1950,120 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin
 static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
 {
 #ifdef HAVE_MMX
-	src+= 4*stride;
+        src+= 4*stride;
 #ifdef HAVE_MMX2
-	asm volatile(
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_d"		\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1
-
-		"movq (%0), %%mm0				\n\t" //
-		"movq (%%"REG_a", %1), %%mm2			\n\t" //
-		"movq (%%"REG_a"), %%mm1			\n\t" //
-		"movq %%mm0, %%mm3				\n\t"
-		"pmaxub %%mm1, %%mm0				\n\t" //
-		"pminub %%mm3, %%mm1				\n\t" //
-		"pmaxub %%mm2, %%mm1				\n\t" //
-		"pminub %%mm1, %%mm0				\n\t"
-		"movq %%mm0, (%%"REG_a")			\n\t"
-
-		"movq (%0, %1, 4), %%mm0			\n\t" //
-		"movq (%%"REG_a", %1, 2), %%mm1			\n\t" //
-		"movq %%mm2, %%mm3				\n\t"
-		"pmaxub %%mm1, %%mm2				\n\t" //
-		"pminub %%mm3, %%mm1				\n\t" //
-		"pmaxub %%mm0, %%mm1				\n\t" //
-		"pminub %%mm1, %%mm2				\n\t"
-		"movq %%mm2, (%%"REG_a", %1, 2)			\n\t"
-
-		"movq (%%"REG_d"), %%mm2			\n\t" //
-		"movq (%%"REG_d", %1), %%mm1			\n\t" //
-		"movq %%mm2, %%mm3				\n\t"
-		"pmaxub %%mm0, %%mm2				\n\t" //
-		"pminub %%mm3, %%mm0				\n\t" //
-		"pmaxub %%mm1, %%mm0				\n\t" //
-		"pminub %%mm0, %%mm2				\n\t"
-		"movq %%mm2, (%%"REG_d")			\n\t"
-
-		"movq (%%"REG_d", %1, 2), %%mm2			\n\t" //
-		"movq (%0, %1, 8), %%mm0			\n\t" //
-		"movq %%mm2, %%mm3				\n\t"
-		"pmaxub %%mm0, %%mm2				\n\t" //
-		"pminub %%mm3, %%mm0				\n\t" //
-		"pmaxub %%mm1, %%mm0				\n\t" //
-		"pminub %%mm0, %%mm2				\n\t"
-		"movq %%mm2, (%%"REG_d", %1, 2)			\n\t"
-
-
-		: : "r" (src), "r" ((long)stride)
-		: "%"REG_a, "%"REG_d
-	);
+        asm volatile(
+                "lea (%0, %1), %%"REG_a"                \n\t"
+                "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
+//      0       1       2       3       4       5       6       7       8       9
+//      %0      eax     eax+%1  eax+2%1 %0+4%1  edx     edx+%1  edx+2%1 %0+8%1  edx+4%1
+
+                "movq (%0), %%mm0                       \n\t" //
+                "movq (%%"REG_a", %1), %%mm2            \n\t" //
+                "movq (%%"REG_a"), %%mm1                \n\t" //
+                "movq %%mm0, %%mm3                      \n\t"
+                "pmaxub %%mm1, %%mm0                    \n\t" //
+                "pminub %%mm3, %%mm1                    \n\t" //
+                "pmaxub %%mm2, %%mm1                    \n\t" //
+                "pminub %%mm1, %%mm0                    \n\t"
+                "movq %%mm0, (%%"REG_a")                \n\t"
+
+                "movq (%0, %1, 4), %%mm0                \n\t" //
+                "movq (%%"REG_a", %1, 2), %%mm1         \n\t" //
+                "movq %%mm2, %%mm3                      \n\t"
+                "pmaxub %%mm1, %%mm2                    \n\t" //
+                "pminub %%mm3, %%mm1                    \n\t" //
+                "pmaxub %%mm0, %%mm1                    \n\t" //
+                "pminub %%mm1, %%mm2                    \n\t"
+                "movq %%mm2, (%%"REG_a", %1, 2)         \n\t"
+
+                "movq (%%"REG_d"), %%mm2                \n\t" //
+                "movq (%%"REG_d", %1), %%mm1            \n\t" //
+                "movq %%mm2, %%mm3                      \n\t"
+                "pmaxub %%mm0, %%mm2                    \n\t" //
+                "pminub %%mm3, %%mm0                    \n\t" //
+                "pmaxub %%mm1, %%mm0                    \n\t" //
+                "pminub %%mm0, %%mm2                    \n\t"
+                "movq %%mm2, (%%"REG_d")                \n\t"
+
+                "movq (%%"REG_d", %1, 2), %%mm2         \n\t" //
+                "movq (%0, %1, 8), %%mm0                \n\t" //
+                "movq %%mm2, %%mm3                      \n\t"
+                "pmaxub %%mm0, %%mm2                    \n\t" //
+                "pminub %%mm3, %%mm0                    \n\t" //
+                "pmaxub %%mm1, %%mm0                    \n\t" //
+                "pminub %%mm0, %%mm2                    \n\t"
+                "movq %%mm2, (%%"REG_d", %1, 2)         \n\t"
+
+
+                : : "r" (src), "r" ((long)stride)
+                : "%"REG_a, "%"REG_d
+        );
 
 #else // MMX without MMX2
-	asm volatile(
-		"lea (%0, %1), %%"REG_a"			\n\t"
-		"lea (%%"REG_a", %1, 4), %%"REG_d"		\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1
-		"pxor %%mm7, %%mm7				\n\t"
+        asm volatile(
+                "lea (%0, %1), %%"REG_a"                \n\t"
+                "lea (%%"REG_a", %1, 4), %%"REG_d"      \n\t"
+//      0       1       2       3       4       5       6       7       8       9
+//      %0      eax     eax+%1  eax+2%1 %0+4%1  edx     edx+%1  edx+2%1 %0+8%1  edx+4%1
+                "pxor %%mm7, %%mm7                      \n\t"
 
 #define REAL_MEDIAN(a,b,c)\
-		"movq " #a ", %%mm0				\n\t"\
-		"movq " #b ", %%mm2				\n\t"\
-		"movq " #c ", %%mm1				\n\t"\
-		"movq %%mm0, %%mm3				\n\t"\
-		"movq %%mm1, %%mm4				\n\t"\
-		"movq %%mm2, %%mm5				\n\t"\
-		"psubusb %%mm1, %%mm3				\n\t"\
-		"psubusb %%mm2, %%mm4				\n\t"\
-		"psubusb %%mm0, %%mm5				\n\t"\
-		"pcmpeqb %%mm7, %%mm3				\n\t"\
-		"pcmpeqb %%mm7, %%mm4				\n\t"\
-		"pcmpeqb %%mm7, %%mm5				\n\t"\
-		"movq %%mm3, %%mm6				\n\t"\
-		"pxor %%mm4, %%mm3				\n\t"\
-		"pxor %%mm5, %%mm4				\n\t"\
-		"pxor %%mm6, %%mm5				\n\t"\
-		"por %%mm3, %%mm1				\n\t"\
-		"por %%mm4, %%mm2				\n\t"\
-		"por %%mm5, %%mm0				\n\t"\
-		"pand %%mm2, %%mm0				\n\t"\
-		"pand %%mm1, %%mm0				\n\t"\
-		"movq %%mm0, " #b "				\n\t"
+                "movq " #a ", %%mm0                     \n\t"\
+                "movq " #b ", %%mm2                     \n\t"\
+                "movq " #c ", %%mm1                     \n\t"\
+                "movq %%mm0, %%mm3                      \n\t"\
+                "movq %%mm1, %%mm4                      \n\t"\
+                "movq %%mm2, %%mm5                      \n\t"\
+                "psubusb %%mm1, %%mm3                   \n\t"\
+                "psubusb %%mm2, %%mm4                   \n\t"\
+                "psubusb %%mm0, %%mm5                   \n\t"\
+                "pcmpeqb %%mm7, %%mm3                   \n\t"\
+                "pcmpeqb %%mm7, %%mm4                   \n\t"\
+                "pcmpeqb %%mm7, %%mm5                   \n\t"\
+                "movq %%mm3, %%mm6                      \n\t"\
+                "pxor %%mm4, %%mm3                      \n\t"\
+                "pxor %%mm5, %%mm4                      \n\t"\
+                "pxor %%mm6, %%mm5                      \n\t"\
+                "por %%mm3, %%mm1                       \n\t"\
+                "por %%mm4, %%mm2                       \n\t"\
+                "por %%mm5, %%mm0                       \n\t"\
+                "pand %%mm2, %%mm0                      \n\t"\
+                "pand %%mm1, %%mm0                      \n\t"\
+                "movq %%mm0, " #b "                     \n\t"
 #define MEDIAN(a,b,c)  REAL_MEDIAN(a,b,c)
 
-MEDIAN((%0), (%%REGa), (%%REGa, %1))
+MEDIAN((%0)        , (%%REGa)       , (%%REGa, %1))
 MEDIAN((%%REGa, %1), (%%REGa, %1, 2), (%0, %1, 4))
-MEDIAN((%0, %1, 4), (%%REGd), (%%REGd, %1))
+MEDIAN((%0, %1, 4) , (%%REGd)       , (%%REGd, %1))
 MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
 
-		: : "r" (src), "r" ((long)stride)
-		: "%"REG_a, "%"REG_d
-	);
-#endif // MMX
-#else
-	int x, y;
-	src+= 4*stride;
-	// FIXME - there should be a way to do a few columns in parallel like w/mmx
-	for(x=0; x<8; x++)
-	{
-		uint8_t *colsrc = src;
-		for (y=0; y<4; y++)
-		{
-			int a, b, c, d, e, f;
-			a = colsrc[0       ];
-			b = colsrc[stride  ];
-			c = colsrc[stride*2];
-			d = (a-b)>>31;
-			e = (b-c)>>31;
-			f = (c-a)>>31;
-			colsrc[stride  ] = (a|(d^f)) & (b|(d^e)) & (c|(e^f));
-			colsrc += stride*2;
-		}
-		src++;
-	}
-#endif
+                : : "r" (src), "r" ((long)stride)
+                : "%"REG_a, "%"REG_d
+        );
+#endif //HAVE_MMX2
+#else //HAVE_MMX
+        int x, y;
+        src+= 4*stride;
+        // FIXME - there should be a way to do a few columns in parallel like w/mmx
+        for(x=0; x<8; x++)
+        {
+                uint8_t *colsrc = src;
+                for (y=0; y<4; y++)
+                {
+                        int a, b, c, d, e, f;
+                        a = colsrc[0       ];
+                        b = colsrc[stride  ];
+                        c = colsrc[stride*2];
+                        d = (a-b)>>31;
+                        e = (b-c)>>31;
+                        f = (c-a)>>31;
+                        colsrc[stride  ] = (a|(d^f)) & (b|(d^e)) & (c|(e^f));
+                        colsrc += stride*2;
+                }
+                src++;
+        }
+#endif //HAVE_MMX
 }
 
 #ifdef HAVE_MMX
@@ -2072,84 +2072,84 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
  */
 static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src, int srcStride)
 {
-	asm(
-		"lea (%0, %1), %%"REG_a"	\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1
-		"movq (%0), %%mm0		\n\t" // 12345678
-		"movq (%%"REG_a"), %%mm1	\n\t" // abcdefgh
-		"movq %%mm0, %%mm2		\n\t" // 12345678
-		"punpcklbw %%mm1, %%mm0		\n\t" // 1a2b3c4d
-		"punpckhbw %%mm1, %%mm2		\n\t" // 5e6f7g8h
-
-		"movq (%%"REG_a", %1), %%mm1	\n\t"
-		"movq (%%"REG_a", %1, 2), %%mm3	\n\t"
-		"movq %%mm1, %%mm4		\n\t"
-		"punpcklbw %%mm3, %%mm1		\n\t"
-		"punpckhbw %%mm3, %%mm4		\n\t"
-
-		"movq %%mm0, %%mm3		\n\t"
-		"punpcklwd %%mm1, %%mm0		\n\t"
-		"punpckhwd %%mm1, %%mm3		\n\t"
-		"movq %%mm2, %%mm1		\n\t"
-		"punpcklwd %%mm4, %%mm2		\n\t"
-		"punpckhwd %%mm4, %%mm1		\n\t"
-
-		"movd %%mm0, 128(%2)		\n\t"
-		"psrlq $32, %%mm0		\n\t"
-		"movd %%mm0, 144(%2)		\n\t"
-		"movd %%mm3, 160(%2)		\n\t"
-		"psrlq $32, %%mm3		\n\t"
-		"movd %%mm3, 176(%2)		\n\t"
-		"movd %%mm3, 48(%3)		\n\t"
-		"movd %%mm2, 192(%2)		\n\t"
-		"movd %%mm2, 64(%3)		\n\t"
-		"psrlq $32, %%mm2		\n\t"
-		"movd %%mm2, 80(%3)		\n\t"
-		"movd %%mm1, 96(%3)		\n\t"
-		"psrlq $32, %%mm1		\n\t"
-		"movd %%mm1, 112(%3)		\n\t"
-
-		"lea (%%"REG_a", %1, 4), %%"REG_a"	\n\t"
-		
-		"movq (%0, %1, 4), %%mm0	\n\t" // 12345678
-		"movq (%%"REG_a"), %%mm1	\n\t" // abcdefgh
-		"movq %%mm0, %%mm2		\n\t" // 12345678
-		"punpcklbw %%mm1, %%mm0		\n\t" // 1a2b3c4d
-		"punpckhbw %%mm1, %%mm2		\n\t" // 5e6f7g8h
-
-		"movq (%%"REG_a", %1), %%mm1	\n\t"
-		"movq (%%"REG_a", %1, 2), %%mm3	\n\t"
-		"movq %%mm1, %%mm4		\n\t"
-		"punpcklbw %%mm3, %%mm1		\n\t"
-		"punpckhbw %%mm3, %%mm4		\n\t"
-
-		"movq %%mm0, %%mm3		\n\t"
-		"punpcklwd %%mm1, %%mm0		\n\t"
-		"punpckhwd %%mm1, %%mm3		\n\t"
-		"movq %%mm2, %%mm1		\n\t"
-		"punpcklwd %%mm4, %%mm2		\n\t"
-		"punpckhwd %%mm4, %%mm1		\n\t"
-
-		"movd %%mm0, 132(%2)		\n\t"
-		"psrlq $32, %%mm0		\n\t"
-		"movd %%mm0, 148(%2)		\n\t"
-		"movd %%mm3, 164(%2)		\n\t"
-		"psrlq $32, %%mm3		\n\t"
-		"movd %%mm3, 180(%2)		\n\t"
-		"movd %%mm3, 52(%3)		\n\t"
-		"movd %%mm2, 196(%2)		\n\t"
-		"movd %%mm2, 68(%3)		\n\t"
-		"psrlq $32, %%mm2		\n\t"
-		"movd %%mm2, 84(%3)		\n\t"
-		"movd %%mm1, 100(%3)		\n\t"
-		"psrlq $32, %%mm1		\n\t"
-		"movd %%mm1, 116(%3)		\n\t"
-
-
-	:: "r" (src), "r" ((long)srcStride), "r" (dst1), "r" (dst2)
-	: "%"REG_a
-	);
+        asm(
+                "lea (%0, %1), %%"REG_a"                \n\t"
+//      0       1       2       3       4       5       6       7       8       9
+//      %0      eax     eax+%1  eax+2%1 %0+4%1  edx     edx+%1  edx+2%1 %0+8%1  edx+4%1
+                "movq (%0), %%mm0                       \n\t" // 12345678
+                "movq (%%"REG_a"), %%mm1                \n\t" // abcdefgh
+                "movq %%mm0, %%mm2                      \n\t" // 12345678
+                "punpcklbw %%mm1, %%mm0                 \n\t" // 1a2b3c4d
+                "punpckhbw %%mm1, %%mm2                 \n\t" // 5e6f7g8h
+
+                "movq (%%"REG_a", %1), %%mm1            \n\t"
+                "movq (%%"REG_a", %1, 2), %%mm3         \n\t"
+                "movq %%mm1, %%mm4                      \n\t"
+                "punpcklbw %%mm3, %%mm1                 \n\t"
+                "punpckhbw %%mm3, %%mm4                 \n\t"
+
+                "movq %%mm0, %%mm3                      \n\t"
+                "punpcklwd %%mm1, %%mm0                 \n\t"
+                "punpckhwd %%mm1, %%mm3                 \n\t"
+                "movq %%mm2, %%mm1                      \n\t"
+                "punpcklwd %%mm4, %%mm2                 \n\t"
+                "punpckhwd %%mm4, %%mm1                 \n\t"
+
+                "movd %%mm0, 128(%2)                    \n\t"
+                "psrlq $32, %%mm0                       \n\t"
+                "movd %%mm0, 144(%2)                    \n\t"
+                "movd %%mm3, 160(%2)                    \n\t"
+                "psrlq $32, %%mm3                       \n\t"
+                "movd %%mm3, 176(%2)                    \n\t"
+                "movd %%mm3, 48(%3)                     \n\t"
+                "movd %%mm2, 192(%2)                    \n\t"
+                "movd %%mm2, 64(%3)                     \n\t"
+                "psrlq $32, %%mm2                       \n\t"
+                "movd %%mm2, 80(%3)                     \n\t"
+                "movd %%mm1, 96(%3)                     \n\t"
+                "psrlq $32, %%mm1                       \n\t"
+                "movd %%mm1, 112(%3)                    \n\t"
+
+                "lea (%%"REG_a", %1, 4), %%"REG_a"      \n\t"
+
+                "movq (%0, %1, 4), %%mm0                \n\t" // 12345678
+                "movq (%%"REG_a"), %%mm1                \n\t" // abcdefgh
+                "movq %%mm0, %%mm2                      \n\t" // 12345678
+                "punpcklbw %%mm1, %%mm0                 \n\t" // 1a2b3c4d
+                "punpckhbw %%mm1, %%mm2                 \n\t" // 5e6f7g8h
+
+                "movq (%%"REG_a", %1), %%mm1            \n\t"
+                "movq (%%"REG_a", %1, 2), %%mm3         \n\t"
+                "movq %%mm1, %%mm4                      \n\t"
+                "punpcklbw %%mm3, %%mm1                 \n\t"
+                "punpckhbw %%mm3, %%mm4                 \n\t"
+
+                "movq %%mm0, %%mm3                      \n\t"
+                "punpcklwd %%mm1, %%mm0                 \n\t"
+                "punpckhwd %%mm1, %%mm3                 \n\t"
+                "movq %%mm2, %%mm1                      \n\t"
+                "punpcklwd %%mm4, %%mm2                 \n\t"
+                "punpckhwd %%mm4, %%mm1                 \n\t"
+
+                "movd %%mm0, 132(%2)                    \n\t"
+                "psrlq $32, %%mm0                       \n\t"
+                "movd %%mm0, 148(%2)                    \n\t"
+                "movd %%mm3, 164(%2)                    \n\t"
+                "psrlq $32, %%mm3                       \n\t"
+                "movd %%mm3, 180(%2)                    \n\t"
+                "movd %%mm3, 52(%3)                     \n\t"
+                "movd %%mm2, 196(%2)                    \n\t"
+                "movd %%mm2, 68(%3)                     \n\t"
+                "psrlq $32, %%mm2                       \n\t"
+                "movd %%mm2, 84(%3)                     \n\t"
+                "movd %%mm1, 100(%3)                    \n\t"
+                "psrlq $32, %%mm1                       \n\t"
+                "movd %%mm1, 116(%3)                    \n\t"
+
+
+        :: "r" (src), "r" ((long)srcStride), "r" (dst1), "r" (dst2)
+        : "%"REG_a
+        );
 }
 
 /**
@@ -2157,414 +2157,414 @@ static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, uint8_t *src
  */
 static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, uint8_t *src)
 {
-	asm(
-		"lea (%0, %1), %%"REG_a"	\n\t"
-		"lea (%%"REG_a",%1,4), %%"REG_d"\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	edx	edx+%1	edx+2%1	%0+8%1	edx+4%1
-		"movq (%2), %%mm0		\n\t" // 12345678
-		"movq 16(%2), %%mm1		\n\t" // abcdefgh
-		"movq %%mm0, %%mm2		\n\t" // 12345678
-		"punpcklbw %%mm1, %%mm0		\n\t" // 1a2b3c4d
-		"punpckhbw %%mm1, %%mm2		\n\t" // 5e6f7g8h
-
-		"movq 32(%2), %%mm1		\n\t"
-		"movq 48(%2), %%mm3		\n\t"
-		"movq %%mm1, %%mm4		\n\t"
-		"punpcklbw %%mm3, %%mm1		\n\t"
-		"punpckhbw %%mm3, %%mm4		\n\t"
-
-		"movq %%mm0, %%mm3		\n\t"
-		"punpcklwd %%mm1, %%mm0		\n\t"
-		"punpckhwd %%mm1, %%mm3		\n\t"
-		"movq %%mm2, %%mm1		\n\t"
-		"punpcklwd %%mm4, %%mm2		\n\t"
-		"punpckhwd %%mm4, %%mm1		\n\t"
-
-		"movd %%mm0, (%0)		\n\t"
-		"psrlq $32, %%mm0		\n\t"
-		"movd %%mm0, (%%"REG_a")	\n\t"
-		"movd %%mm3, (%%"REG_a", %1)	\n\t"
-		"psrlq $32, %%mm3		\n\t"
-		"movd %%mm3, (%%"REG_a", %1, 2)	\n\t"
-		"movd %%mm2, (%0, %1, 4)	\n\t"
-		"psrlq $32, %%mm2		\n\t"
-		"movd %%mm2, (%%"REG_d")	\n\t"
-		"movd %%mm1, (%%"REG_d", %1)	\n\t"
-		"psrlq $32, %%mm1		\n\t"
-		"movd %%mm1, (%%"REG_d", %1, 2)	\n\t"
-
-
-		"movq 64(%2), %%mm0		\n\t" // 12345678
-		"movq 80(%2), %%mm1		\n\t" // abcdefgh
-		"movq %%mm0, %%mm2		\n\t" // 12345678
-		"punpcklbw %%mm1, %%mm0		\n\t" // 1a2b3c4d
-		"punpckhbw %%mm1, %%mm2		\n\t" // 5e6f7g8h
-
-		"movq 96(%2), %%mm1		\n\t"
-		"movq 112(%2), %%mm3		\n\t"
-		"movq %%mm1, %%mm4		\n\t"
-		"punpcklbw %%mm3, %%mm1		\n\t"
-		"punpckhbw %%mm3, %%mm4		\n\t"
-
-		"movq %%mm0, %%mm3		\n\t"
-		"punpcklwd %%mm1, %%mm0		\n\t"
-		"punpckhwd %%mm1, %%mm3		\n\t"
-		"movq %%mm2, %%mm1		\n\t"
-		"punpcklwd %%mm4, %%mm2		\n\t"
-		"punpckhwd %%mm4, %%mm1		\n\t"
-
-		"movd %%mm0, 4(%0)		\n\t"
-		"psrlq $32, %%mm0		\n\t"
-		"movd %%mm0, 4(%%"REG_a")		\n\t"
-		"movd %%mm3, 4(%%"REG_a", %1)	\n\t"
-		"psrlq $32, %%mm3		\n\t"
-		"movd %%mm3, 4(%%"REG_a", %1, 2)	\n\t"
-		"movd %%mm2, 4(%0, %1, 4)	\n\t"
-		"psrlq $32, %%mm2		\n\t"
-		"movd %%mm2, 4(%%"REG_d")		\n\t"
-		"movd %%mm1, 4(%%"REG_d", %1)	\n\t"
-		"psrlq $32, %%mm1		\n\t"
-		"movd %%mm1, 4(%%"REG_d", %1, 2)	\n\t"
-
-	:: "r" (dst), "r" ((long)dstStride), "r" (src)
-	: "%"REG_a, "%"REG_d
-	);
+        asm(
+                "lea (%0, %1), %%"REG_a"                \n\t"
+                "lea (%%"REG_a",%1,4), %%"REG_d"        \n\t"
+//      0       1       2       3       4       5       6       7       8       9
+//      %0      eax     eax+%1  eax+2%1 %0+4%1  edx     edx+%1  edx+2%1 %0+8%1  edx+4%1
+                "movq (%2), %%mm0                       \n\t" // 12345678
+                "movq 16(%2), %%mm1                     \n\t" // abcdefgh
+                "movq %%mm0, %%mm2                      \n\t" // 12345678
+                "punpcklbw %%mm1, %%mm0                 \n\t" // 1a2b3c4d
+                "punpckhbw %%mm1, %%mm2                 \n\t" // 5e6f7g8h
+
+                "movq 32(%2), %%mm1                     \n\t"
+                "movq 48(%2), %%mm3                     \n\t"
+                "movq %%mm1, %%mm4                      \n\t"
+                "punpcklbw %%mm3, %%mm1                 \n\t"
+                "punpckhbw %%mm3, %%mm4                 \n\t"
+
+                "movq %%mm0, %%mm3                      \n\t"
+                "punpcklwd %%mm1, %%mm0                 \n\t"
+                "punpckhwd %%mm1, %%mm3                 \n\t"
+                "movq %%mm2, %%mm1                      \n\t"
+                "punpcklwd %%mm4, %%mm2                 \n\t"
+                "punpckhwd %%mm4, %%mm1                 \n\t"
+
+                "movd %%mm0, (%0)                       \n\t"
+                "psrlq $32, %%mm0                       \n\t"
+                "movd %%mm0, (%%"REG_a")                \n\t"
+                "movd %%mm3, (%%"REG_a", %1)            \n\t"
+                "psrlq $32, %%mm3                       \n\t"
+                "movd %%mm3, (%%"REG_a", %1, 2)         \n\t"
+                "movd %%mm2, (%0, %1, 4)                \n\t"
+                "psrlq $32, %%mm2                       \n\t"
+                "movd %%mm2, (%%"REG_d")                \n\t"
+                "movd %%mm1, (%%"REG_d", %1)            \n\t"
+                "psrlq $32, %%mm1                       \n\t"
+                "movd %%mm1, (%%"REG_d", %1, 2)         \n\t"
+
+
+                "movq 64(%2), %%mm0                     \n\t" // 12345678
+                "movq 80(%2), %%mm1                     \n\t" // abcdefgh
+                "movq %%mm0, %%mm2                      \n\t" // 12345678
+                "punpcklbw %%mm1, %%mm0                 \n\t" // 1a2b3c4d
+                "punpckhbw %%mm1, %%mm2                 \n\t" // 5e6f7g8h
+
+                "movq 96(%2), %%mm1                     \n\t"
+                "movq 112(%2), %%mm3                    \n\t"
+                "movq %%mm1, %%mm4                      \n\t"
+                "punpcklbw %%mm3, %%mm1                 \n\t"
+                "punpckhbw %%mm3, %%mm4                 \n\t"
+
+                "movq %%mm0, %%mm3                      \n\t"
+                "punpcklwd %%mm1, %%mm0                 \n\t"
+                "punpckhwd %%mm1, %%mm3                 \n\t"
+                "movq %%mm2, %%mm1                      \n\t"
+                "punpcklwd %%mm4, %%mm2                 \n\t"
+                "punpckhwd %%mm4, %%mm1                 \n\t"
+
+                "movd %%mm0, 4(%0)                      \n\t"
+                "psrlq $32, %%mm0                       \n\t"
+                "movd %%mm0, 4(%%"REG_a")               \n\t"
+                "movd %%mm3, 4(%%"REG_a", %1)           \n\t"
+                "psrlq $32, %%mm3                       \n\t"
+                "movd %%mm3, 4(%%"REG_a", %1, 2)        \n\t"
+                "movd %%mm2, 4(%0, %1, 4)               \n\t"
+                "psrlq $32, %%mm2                       \n\t"
+                "movd %%mm2, 4(%%"REG_d")               \n\t"
+                "movd %%mm1, 4(%%"REG_d", %1)           \n\t"
+                "psrlq $32, %%mm1                       \n\t"
+                "movd %%mm1, 4(%%"REG_d", %1, 2)        \n\t"
+
+        :: "r" (dst), "r" ((long)dstStride), "r" (src)
+        : "%"REG_a, "%"REG_d
+        );
 }
-#endif
+#endif //HAVE_MMX
 //static long test=0;
 
 #ifndef HAVE_ALTIVEC
 static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
-				    uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise)
+                                    uint8_t *tempBlured, uint32_t *tempBluredPast, int *maxNoise)
 {
-	// to save a register (FIXME do this outside of the loops)
-	tempBluredPast[127]= maxNoise[0];
-	tempBluredPast[128]= maxNoise[1];
-	tempBluredPast[129]= maxNoise[2];
-        
+        // to save a register (FIXME do this outside of the loops)
+        tempBluredPast[127]= maxNoise[0];
+        tempBluredPast[128]= maxNoise[1];
+        tempBluredPast[129]= maxNoise[2];
+
 #define FAST_L2_DIFF
 //#define L1_DIFF //u should change the thresholds too if u try that one
 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
-	asm volatile(
-		"lea (%2, %2, 2), %%"REG_a"			\n\t" // 3*stride
-		"lea (%2, %2, 4), %%"REG_d"			\n\t" // 5*stride
-		"lea (%%"REG_d", %2, 2), %%"REG_c"		\n\t" // 7*stride
-//	0	1	2	3	4	5	6	7	8	9
-//	%x	%x+%2	%x+2%2	%x+eax	%x+4%2	%x+edx	%x+2eax	%x+ecx	%x+8%2
+        asm volatile(
+                "lea (%2, %2, 2), %%"REG_a"             \n\t" // 3*stride
+                "lea (%2, %2, 4), %%"REG_d"             \n\t" // 5*stride
+                "lea (%%"REG_d", %2, 2), %%"REG_c"      \n\t" // 7*stride
+//      0       1       2       3       4       5       6       7       8       9
+//      %x      %x+%2   %x+2%2  %x+eax  %x+4%2  %x+edx  %x+2eax %x+ecx  %x+8%2
 //FIXME reorder?
 #ifdef L1_DIFF //needs mmx2
-		"movq (%0), %%mm0				\n\t" // L0
-		"psadbw (%1), %%mm0				\n\t" // |L0-R0|
-		"movq (%0, %2), %%mm1				\n\t" // L1
-		"psadbw (%1, %2), %%mm1				\n\t" // |L1-R1|
-		"movq (%0, %2, 2), %%mm2			\n\t" // L2
-		"psadbw (%1, %2, 2), %%mm2			\n\t" // |L2-R2|
-		"movq (%0, %%"REG_a"), %%mm3			\n\t" // L3
-		"psadbw (%1, %%"REG_a"), %%mm3			\n\t" // |L3-R3|
-
-		"movq (%0, %2, 4), %%mm4			\n\t" // L4
-		"paddw %%mm1, %%mm0				\n\t"
-		"psadbw (%1, %2, 4), %%mm4			\n\t" // |L4-R4|
-		"movq (%0, %%"REG_d"), %%mm5			\n\t" // L5
-		"paddw %%mm2, %%mm0				\n\t"
-		"psadbw (%1, %%"REG_d"), %%mm5			\n\t" // |L5-R5|
-		"movq (%0, %%"REG_a", 2), %%mm6			\n\t" // L6
-		"paddw %%mm3, %%mm0				\n\t"
-		"psadbw (%1, %%"REG_a", 2), %%mm6		\n\t" // |L6-R6|
-		"movq (%0, %%"REG_c"), %%mm7			\n\t" // L7
-		"paddw %%mm4, %%mm0				\n\t"
-		"psadbw (%1, %%"REG_c"), %%mm7			\n\t" // |L7-R7|
-		"paddw %%mm5, %%mm6				\n\t"
-		"paddw %%mm7, %%mm6				\n\t"
-		"paddw %%mm6, %%mm0				\n\t"
-#else
+                "movq (%0), %%mm0                       \n\t" // L0
+                "psadbw (%1), %%mm0                     \n\t" // |L0-R0|
+                "movq (%0, %2), %%mm1                   \n\t" // L1
+                "psadbw (%1, %2), %%mm1                 \n\t" // |L1-R1|
+                "movq (%0, %2, 2), %%mm2                \n\t" // L2
+                "psadbw (%1, %2, 2), %%mm2              \n\t" // |L2-R2|
+                "movq (%0, %%"REG_a"), %%mm3            \n\t" // L3
+                "psadbw (%1, %%"REG_a"), %%mm3          \n\t" // |L3-R3|
+
+                "movq (%0, %2, 4), %%mm4                \n\t" // L4
+                "paddw %%mm1, %%mm0                     \n\t"
+                "psadbw (%1, %2, 4), %%mm4              \n\t" // |L4-R4|
+                "movq (%0, %%"REG_d"), %%mm5            \n\t" // L5
+                "paddw %%mm2, %%mm0                     \n\t"
+                "psadbw (%1, %%"REG_d"), %%mm5          \n\t" // |L5-R5|
+                "movq (%0, %%"REG_a", 2), %%mm6         \n\t" // L6
+                "paddw %%mm3, %%mm0                     \n\t"
+                "psadbw (%1, %%"REG_a", 2), %%mm6       \n\t" // |L6-R6|
+                "movq (%0, %%"REG_c"), %%mm7            \n\t" // L7
+                "paddw %%mm4, %%mm0                     \n\t"
+                "psadbw (%1, %%"REG_c"), %%mm7          \n\t" // |L7-R7|
+                "paddw %%mm5, %%mm6                     \n\t"
+                "paddw %%mm7, %%mm6                     \n\t"
+                "paddw %%mm6, %%mm0                     \n\t"
+#else //L1_DIFF
 #if defined (FAST_L2_DIFF)
-		"pcmpeqb %%mm7, %%mm7				\n\t"
-		"movq "MANGLE(b80)", %%mm6			\n\t"
-		"pxor %%mm0, %%mm0				\n\t"
+                "pcmpeqb %%mm7, %%mm7                   \n\t"
+                "movq "MANGLE(b80)", %%mm6              \n\t"
+                "pxor %%mm0, %%mm0                      \n\t"
 #define REAL_L2_DIFF_CORE(a, b)\
-		"movq " #a ", %%mm5				\n\t"\
-		"movq " #b ", %%mm2				\n\t"\
-		"pxor %%mm7, %%mm2				\n\t"\
-		PAVGB(%%mm2, %%mm5)\
-		"paddb %%mm6, %%mm5				\n\t"\
-		"movq %%mm5, %%mm2				\n\t"\
-		"psllw $8, %%mm5				\n\t"\
-		"pmaddwd %%mm5, %%mm5				\n\t"\
-		"pmaddwd %%mm2, %%mm2				\n\t"\
-		"paddd %%mm2, %%mm5				\n\t"\
-		"psrld $14, %%mm5				\n\t"\
-		"paddd %%mm5, %%mm0				\n\t"
-
-#else
-		"pxor %%mm7, %%mm7				\n\t"
-		"pxor %%mm0, %%mm0				\n\t"
+                "movq " #a ", %%mm5                     \n\t"\
+                "movq " #b ", %%mm2                     \n\t"\
+                "pxor %%mm7, %%mm2                      \n\t"\
+                PAVGB(%%mm2, %%mm5)\
+                "paddb %%mm6, %%mm5                     \n\t"\
+                "movq %%mm5, %%mm2                      \n\t"\
+                "psllw $8, %%mm5                        \n\t"\
+                "pmaddwd %%mm5, %%mm5                   \n\t"\
+                "pmaddwd %%mm2, %%mm2                   \n\t"\
+                "paddd %%mm2, %%mm5                     \n\t"\
+                "psrld $14, %%mm5                       \n\t"\
+                "paddd %%mm5, %%mm0                     \n\t"
+
+#else //defined (FAST_L2_DIFF)
+                "pxor %%mm7, %%mm7                      \n\t"
+                "pxor %%mm0, %%mm0                      \n\t"
 #define REAL_L2_DIFF_CORE(a, b)\
-		"movq " #a ", %%mm5				\n\t"\
-		"movq " #b ", %%mm2				\n\t"\
-		"movq %%mm5, %%mm1				\n\t"\
-		"movq %%mm2, %%mm3				\n\t"\
-		"punpcklbw %%mm7, %%mm5				\n\t"\
-		"punpckhbw %%mm7, %%mm1				\n\t"\
-		"punpcklbw %%mm7, %%mm2				\n\t"\
-		"punpckhbw %%mm7, %%mm3				\n\t"\
-		"psubw %%mm2, %%mm5				\n\t"\
-		"psubw %%mm3, %%mm1				\n\t"\
-		"pmaddwd %%mm5, %%mm5				\n\t"\
-		"pmaddwd %%mm1, %%mm1				\n\t"\
-		"paddd %%mm1, %%mm5				\n\t"\
-		"paddd %%mm5, %%mm0				\n\t"
-
-#endif
+                "movq " #a ", %%mm5                     \n\t"\
+                "movq " #b ", %%mm2                     \n\t"\
+                "movq %%mm5, %%mm1                      \n\t"\
+                "movq %%mm2, %%mm3                      \n\t"\
+                "punpcklbw %%mm7, %%mm5                 \n\t"\
+                "punpckhbw %%mm7, %%mm1                 \n\t"\
+                "punpcklbw %%mm7, %%mm2                 \n\t"\
+                "punpckhbw %%mm7, %%mm3                 \n\t"\
+                "psubw %%mm2, %%mm5                     \n\t"\
+                "psubw %%mm3, %%mm1                     \n\t"\
+                "pmaddwd %%mm5, %%mm5                   \n\t"\
+                "pmaddwd %%mm1, %%mm1                   \n\t"\
+                "paddd %%mm1, %%mm5                     \n\t"\
+                "paddd %%mm5, %%mm0                     \n\t"
+
+#endif //defined (FAST_L2_DIFF)
 
 #define L2_DIFF_CORE(a, b)  REAL_L2_DIFF_CORE(a, b)
 
-L2_DIFF_CORE((%0), (%1))
-L2_DIFF_CORE((%0, %2), (%1, %2))
-L2_DIFF_CORE((%0, %2, 2), (%1, %2, 2))
-L2_DIFF_CORE((%0, %%REGa), (%1, %%REGa))
-L2_DIFF_CORE((%0, %2, 4), (%1, %2, 4))
-L2_DIFF_CORE((%0, %%REGd), (%1, %%REGd))
+L2_DIFF_CORE((%0)          , (%1))
+L2_DIFF_CORE((%0, %2)      , (%1, %2))
+L2_DIFF_CORE((%0, %2, 2)   , (%1, %2, 2))
+L2_DIFF_CORE((%0, %%REGa)  , (%1, %%REGa))
+L2_DIFF_CORE((%0, %2, 4)   , (%1, %2, 4))
+L2_DIFF_CORE((%0, %%REGd)  , (%1, %%REGd))
 L2_DIFF_CORE((%0, %%REGa,2), (%1, %%REGa,2))
-L2_DIFF_CORE((%0, %%REGc), (%1, %%REGc))
-
-#endif
-
-		"movq %%mm0, %%mm4				\n\t"
-		"psrlq $32, %%mm0				\n\t"
-		"paddd %%mm0, %%mm4				\n\t"
-		"movd %%mm4, %%ecx				\n\t"
-		"shll $2, %%ecx					\n\t"
-		"mov %3, %%"REG_d"				\n\t"
-		"addl -4(%%"REG_d"), %%ecx			\n\t"
-		"addl 4(%%"REG_d"), %%ecx			\n\t"
-		"addl -1024(%%"REG_d"), %%ecx			\n\t"
-		"addl $4, %%ecx					\n\t"
-		"addl 1024(%%"REG_d"), %%ecx			\n\t"
-		"shrl $3, %%ecx					\n\t"
-		"movl %%ecx, (%%"REG_d")			\n\t"
-
-//		"mov %3, %%"REG_c"				\n\t"
-//		"mov %%"REG_c", test				\n\t"
-//		"jmp 4f \n\t"
-		"cmpl 512(%%"REG_d"), %%ecx			\n\t"
-		" jb 2f						\n\t"
-		"cmpl 516(%%"REG_d"), %%ecx			\n\t"
-		" jb 1f						\n\t"
-
-		"lea (%%"REG_a", %2, 2), %%"REG_d"		\n\t" // 5*stride
-		"lea (%%"REG_d", %2, 2), %%"REG_c"		\n\t" // 7*stride
-		"movq (%0), %%mm0				\n\t" // L0
-		"movq (%0, %2), %%mm1				\n\t" // L1
-		"movq (%0, %2, 2), %%mm2			\n\t" // L2
-		"movq (%0, %%"REG_a"), %%mm3			\n\t" // L3
-		"movq (%0, %2, 4), %%mm4			\n\t" // L4
-		"movq (%0, %%"REG_d"), %%mm5			\n\t" // L5
-		"movq (%0, %%"REG_a", 2), %%mm6			\n\t" // L6
-		"movq (%0, %%"REG_c"), %%mm7			\n\t" // L7
-		"movq %%mm0, (%1)				\n\t" // L0
-		"movq %%mm1, (%1, %2)				\n\t" // L1
-		"movq %%mm2, (%1, %2, 2)			\n\t" // L2
-		"movq %%mm3, (%1, %%"REG_a")			\n\t" // L3
-		"movq %%mm4, (%1, %2, 4)			\n\t" // L4
-		"movq %%mm5, (%1, %%"REG_d")			\n\t" // L5
-		"movq %%mm6, (%1, %%"REG_a", 2)			\n\t" // L6
-		"movq %%mm7, (%1, %%"REG_c")			\n\t" // L7
-		"jmp 4f						\n\t"
-
-		"1:						\n\t"
-		"lea (%%"REG_a", %2, 2), %%"REG_d"		\n\t" // 5*stride
-		"lea (%%"REG_d", %2, 2), %%"REG_c"		\n\t" // 7*stride
-		"movq (%0), %%mm0				\n\t" // L0
-		PAVGB((%1), %%mm0)				      // L0
-		"movq (%0, %2), %%mm1				\n\t" // L1
-		PAVGB((%1, %2), %%mm1)				      // L1
-		"movq (%0, %2, 2), %%mm2			\n\t" // L2
-		PAVGB((%1, %2, 2), %%mm2)			      // L2
-		"movq (%0, %%"REG_a"), %%mm3			\n\t" // L3
-		PAVGB((%1, %%REGa), %%mm3)			      // L3
-		"movq (%0, %2, 4), %%mm4			\n\t" // L4
-		PAVGB((%1, %2, 4), %%mm4)			      // L4
-		"movq (%0, %%"REG_d"), %%mm5			\n\t" // L5
-		PAVGB((%1, %%REGd), %%mm5)			      // L5
-		"movq (%0, %%"REG_a", 2), %%mm6			\n\t" // L6
-		PAVGB((%1, %%REGa, 2), %%mm6)			      // L6
-		"movq (%0, %%"REG_c"), %%mm7			\n\t" // L7
-		PAVGB((%1, %%REGc), %%mm7)			      // L7
-		"movq %%mm0, (%1)				\n\t" // R0
-		"movq %%mm1, (%1, %2)				\n\t" // R1
-		"movq %%mm2, (%1, %2, 2)			\n\t" // R2
-		"movq %%mm3, (%1, %%"REG_a")			\n\t" // R3
-		"movq %%mm4, (%1, %2, 4)			\n\t" // R4
-		"movq %%mm5, (%1, %%"REG_d")			\n\t" // R5
-		"movq %%mm6, (%1, %%"REG_a", 2)			\n\t" // R6
-		"movq %%mm7, (%1, %%"REG_c")			\n\t" // R7
-		"movq %%mm0, (%0)				\n\t" // L0
-		"movq %%mm1, (%0, %2)				\n\t" // L1
-		"movq %%mm2, (%0, %2, 2)			\n\t" // L2
-		"movq %%mm3, (%0, %%"REG_a")			\n\t" // L3
-		"movq %%mm4, (%0, %2, 4)			\n\t" // L4
-		"movq %%mm5, (%0, %%"REG_d")			\n\t" // L5
-		"movq %%mm6, (%0, %%"REG_a", 2)			\n\t" // L6
-		"movq %%mm7, (%0, %%"REG_c")			\n\t" // L7
-		"jmp 4f						\n\t"
-
-		"2:						\n\t"
-		"cmpl 508(%%"REG_d"), %%ecx			\n\t"
-		" jb 3f						\n\t"
-
-		"lea (%%"REG_a", %2, 2), %%"REG_d"		\n\t" // 5*stride
-		"lea (%%"REG_d", %2, 2), %%"REG_c"		\n\t" // 7*stride
-		"movq (%0), %%mm0				\n\t" // L0
-		"movq (%0, %2), %%mm1				\n\t" // L1
-		"movq (%0, %2, 2), %%mm2			\n\t" // L2
-		"movq (%0, %%"REG_a"), %%mm3			\n\t" // L3
-		"movq (%1), %%mm4				\n\t" // R0
-		"movq (%1, %2), %%mm5				\n\t" // R1
-		"movq (%1, %2, 2), %%mm6			\n\t" // R2
-		"movq (%1, %%"REG_a"), %%mm7			\n\t" // R3
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		"movq %%mm0, (%1)				\n\t" // R0
-		"movq %%mm1, (%1, %2)				\n\t" // R1
-		"movq %%mm2, (%1, %2, 2)			\n\t" // R2
-		"movq %%mm3, (%1, %%"REG_a")			\n\t" // R3
-		"movq %%mm0, (%0)				\n\t" // L0
-		"movq %%mm1, (%0, %2)				\n\t" // L1
-		"movq %%mm2, (%0, %2, 2)			\n\t" // L2
-		"movq %%mm3, (%0, %%"REG_a")			\n\t" // L3
-
-		"movq (%0, %2, 4), %%mm0			\n\t" // L4
-		"movq (%0, %%"REG_d"), %%mm1			\n\t" // L5
-		"movq (%0, %%"REG_a", 2), %%mm2			\n\t" // L6
-		"movq (%0, %%"REG_c"), %%mm3			\n\t" // L7
-		"movq (%1, %2, 4), %%mm4			\n\t" // R4
-		"movq (%1, %%"REG_d"), %%mm5			\n\t" // R5
-		"movq (%1, %%"REG_a", 2), %%mm6			\n\t" // R6
-		"movq (%1, %%"REG_c"), %%mm7			\n\t" // R7
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		"movq %%mm0, (%1, %2, 4)			\n\t" // R4
-		"movq %%mm1, (%1, %%"REG_d")			\n\t" // R5
-		"movq %%mm2, (%1, %%"REG_a", 2)			\n\t" // R6
-		"movq %%mm3, (%1, %%"REG_c")			\n\t" // R7
-		"movq %%mm0, (%0, %2, 4)			\n\t" // L4
-		"movq %%mm1, (%0, %%"REG_d")			\n\t" // L5
-		"movq %%mm2, (%0, %%"REG_a", 2)			\n\t" // L6
-		"movq %%mm3, (%0, %%"REG_c")			\n\t" // L7
-		"jmp 4f						\n\t"
-
-		"3:						\n\t"
-		"lea (%%"REG_a", %2, 2), %%"REG_d"		\n\t" // 5*stride
-		"lea (%%"REG_d", %2, 2), %%"REG_c"		\n\t" // 7*stride
-		"movq (%0), %%mm0				\n\t" // L0
-		"movq (%0, %2), %%mm1				\n\t" // L1
-		"movq (%0, %2, 2), %%mm2			\n\t" // L2
-		"movq (%0, %%"REG_a"), %%mm3			\n\t" // L3
-		"movq (%1), %%mm4				\n\t" // R0
-		"movq (%1, %2), %%mm5				\n\t" // R1
-		"movq (%1, %2, 2), %%mm6			\n\t" // R2
-		"movq (%1, %%"REG_a"), %%mm7			\n\t" // R3
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		"movq %%mm0, (%1)				\n\t" // R0
-		"movq %%mm1, (%1, %2)				\n\t" // R1
-		"movq %%mm2, (%1, %2, 2)			\n\t" // R2
-		"movq %%mm3, (%1, %%"REG_a")			\n\t" // R3
-		"movq %%mm0, (%0)				\n\t" // L0
-		"movq %%mm1, (%0, %2)				\n\t" // L1
-		"movq %%mm2, (%0, %2, 2)			\n\t" // L2
-		"movq %%mm3, (%0, %%"REG_a")			\n\t" // L3
-
-		"movq (%0, %2, 4), %%mm0			\n\t" // L4
-		"movq (%0, %%"REG_d"), %%mm1			\n\t" // L5
-		"movq (%0, %%"REG_a", 2), %%mm2			\n\t" // L6
-		"movq (%0, %%"REG_c"), %%mm3			\n\t" // L7
-		"movq (%1, %2, 4), %%mm4			\n\t" // R4
-		"movq (%1, %%"REG_d"), %%mm5			\n\t" // R5
-		"movq (%1, %%"REG_a", 2), %%mm6			\n\t" // R6
-		"movq (%1, %%"REG_c"), %%mm7			\n\t" // R7
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		PAVGB(%%mm4, %%mm0)
-		PAVGB(%%mm5, %%mm1)
-		PAVGB(%%mm6, %%mm2)
-		PAVGB(%%mm7, %%mm3)
-		"movq %%mm0, (%1, %2, 4)			\n\t" // R4
-		"movq %%mm1, (%1, %%"REG_d")			\n\t" // R5
-		"movq %%mm2, (%1, %%"REG_a", 2)			\n\t" // R6
-		"movq %%mm3, (%1, %%"REG_c")			\n\t" // R7
-		"movq %%mm0, (%0, %2, 4)			\n\t" // L4
-		"movq %%mm1, (%0, %%"REG_d")			\n\t" // L5
-		"movq %%mm2, (%0, %%"REG_a", 2)			\n\t" // L6
-		"movq %%mm3, (%0, %%"REG_c")			\n\t" // L7
-
-		"4:						\n\t"
-
-		:: "r" (src), "r" (tempBlured), "r"((long)stride), "m" (tempBluredPast)
-		: "%"REG_a, "%"REG_d, "%"REG_c, "memory"
-		);
+L2_DIFF_CORE((%0, %%REGc)  , (%1, %%REGc))
+
+#endif //L1_DIFF
+
+                "movq %%mm0, %%mm4                      \n\t"
+                "psrlq $32, %%mm0                       \n\t"
+                "paddd %%mm0, %%mm4                     \n\t"
+                "movd %%mm4, %%ecx                      \n\t"
+                "shll $2, %%ecx                         \n\t"
+                "mov %3, %%"REG_d"                      \n\t"
+                "addl -4(%%"REG_d"), %%ecx              \n\t"
+                "addl 4(%%"REG_d"), %%ecx               \n\t"
+                "addl -1024(%%"REG_d"), %%ecx           \n\t"
+                "addl $4, %%ecx                         \n\t"
+                "addl 1024(%%"REG_d"), %%ecx            \n\t"
+                "shrl $3, %%ecx                         \n\t"
+                "movl %%ecx, (%%"REG_d")                \n\t"
+
+//                "mov %3, %%"REG_c"                      \n\t"
+//                "mov %%"REG_c", test                    \n\t"
+//                "jmp 4f                                 \n\t"
+                "cmpl 512(%%"REG_d"), %%ecx             \n\t"
+                " jb 2f                                 \n\t"
+                "cmpl 516(%%"REG_d"), %%ecx             \n\t"
+                " jb 1f                                 \n\t"
+
+                "lea (%%"REG_a", %2, 2), %%"REG_d"      \n\t" // 5*stride
+                "lea (%%"REG_d", %2, 2), %%"REG_c"      \n\t" // 7*stride
+                "movq (%0), %%mm0                       \n\t" // L0
+                "movq (%0, %2), %%mm1                   \n\t" // L1
+                "movq (%0, %2, 2), %%mm2                \n\t" // L2
+                "movq (%0, %%"REG_a"), %%mm3            \n\t" // L3
+                "movq (%0, %2, 4), %%mm4                \n\t" // L4
+                "movq (%0, %%"REG_d"), %%mm5            \n\t" // L5
+                "movq (%0, %%"REG_a", 2), %%mm6         \n\t" // L6
+                "movq (%0, %%"REG_c"), %%mm7            \n\t" // L7
+                "movq %%mm0, (%1)                       \n\t" // L0
+                "movq %%mm1, (%1, %2)                   \n\t" // L1
+                "movq %%mm2, (%1, %2, 2)                \n\t" // L2
+                "movq %%mm3, (%1, %%"REG_a")            \n\t" // L3
+                "movq %%mm4, (%1, %2, 4)                \n\t" // L4
+                "movq %%mm5, (%1, %%"REG_d")            \n\t" // L5
+                "movq %%mm6, (%1, %%"REG_a", 2)         \n\t" // L6
+                "movq %%mm7, (%1, %%"REG_c")            \n\t" // L7
+                "jmp 4f                                 \n\t"
+
+                "1:                                     \n\t"
+                "lea (%%"REG_a", %2, 2), %%"REG_d"      \n\t" // 5*stride
+                "lea (%%"REG_d", %2, 2), %%"REG_c"      \n\t" // 7*stride
+                "movq (%0), %%mm0                       \n\t" // L0
+                PAVGB((%1), %%mm0)                            // L0
+                "movq (%0, %2), %%mm1                   \n\t" // L1
+                PAVGB((%1, %2), %%mm1)                        // L1
+                "movq (%0, %2, 2), %%mm2                \n\t" // L2
+                PAVGB((%1, %2, 2), %%mm2)                     // L2
+                "movq (%0, %%"REG_a"), %%mm3            \n\t" // L3
+                PAVGB((%1, %%REGa), %%mm3)                    // L3
+                "movq (%0, %2, 4), %%mm4                \n\t" // L4
+                PAVGB((%1, %2, 4), %%mm4)                     // L4
+                "movq (%0, %%"REG_d"), %%mm5            \n\t" // L5
+                PAVGB((%1, %%REGd), %%mm5)                    // L5
+                "movq (%0, %%"REG_a", 2), %%mm6         \n\t" // L6
+                PAVGB((%1, %%REGa, 2), %%mm6)                 // L6
+                "movq (%0, %%"REG_c"), %%mm7            \n\t" // L7
+                PAVGB((%1, %%REGc), %%mm7)                    // L7
+                "movq %%mm0, (%1)                       \n\t" // R0
+                "movq %%mm1, (%1, %2)                   \n\t" // R1
+                "movq %%mm2, (%1, %2, 2)                \n\t" // R2
+                "movq %%mm3, (%1, %%"REG_a")            \n\t" // R3
+                "movq %%mm4, (%1, %2, 4)                \n\t" // R4
+                "movq %%mm5, (%1, %%"REG_d")            \n\t" // R5
+                "movq %%mm6, (%1, %%"REG_a", 2)         \n\t" // R6
+                "movq %%mm7, (%1, %%"REG_c")            \n\t" // R7
+                "movq %%mm0, (%0)                       \n\t" // L0
+                "movq %%mm1, (%0, %2)                   \n\t" // L1
+                "movq %%mm2, (%0, %2, 2)                \n\t" // L2
+                "movq %%mm3, (%0, %%"REG_a")            \n\t" // L3
+                "movq %%mm4, (%0, %2, 4)                \n\t" // L4
+                "movq %%mm5, (%0, %%"REG_d")            \n\t" // L5
+                "movq %%mm6, (%0, %%"REG_a", 2)         \n\t" // L6
+                "movq %%mm7, (%0, %%"REG_c")            \n\t" // L7
+                "jmp 4f                                 \n\t"
+
+                "2:                                     \n\t"
+                "cmpl 508(%%"REG_d"), %%ecx             \n\t"
+                " jb 3f                                 \n\t"
+
+                "lea (%%"REG_a", %2, 2), %%"REG_d"      \n\t" // 5*stride
+                "lea (%%"REG_d", %2, 2), %%"REG_c"      \n\t" // 7*stride
+                "movq (%0), %%mm0                       \n\t" // L0
+                "movq (%0, %2), %%mm1                   \n\t" // L1
+                "movq (%0, %2, 2), %%mm2                \n\t" // L2
+                "movq (%0, %%"REG_a"), %%mm3            \n\t" // L3
+                "movq (%1), %%mm4                       \n\t" // R0
+                "movq (%1, %2), %%mm5                   \n\t" // R1
+                "movq (%1, %2, 2), %%mm6                \n\t" // R2
+                "movq (%1, %%"REG_a"), %%mm7            \n\t" // R3
+                PAVGB(%%mm4, %%mm0)
+                PAVGB(%%mm5, %%mm1)
+                PAVGB(%%mm6, %%mm2)
+                PAVGB(%%mm7, %%mm3)
+                PAVGB(%%mm4, %%mm0)
+                PAVGB(%%mm5, %%mm1)
+                PAVGB(%%mm6, %%mm2)
+                PAVGB(%%mm7, %%mm3)
+                "movq %%mm0, (%1)                       \n\t" // R0
+                "movq %%mm1, (%1, %2)                   \n\t" // R1
+                "movq %%mm2, (%1, %2, 2)                \n\t" // R2
+                "movq %%mm3, (%1, %%"REG_a")            \n\t" // R3
+                "movq %%mm0, (%0)                       \n\t" // L0
+                "movq %%mm1, (%0, %2)                   \n\t" // L1
+                "movq %%mm2, (%0, %2, 2)                \n\t" // L2
+                "movq %%mm3, (%0, %%"REG_a")            \n\t" // L3
+
+                "movq (%0, %2, 4), %%mm0                \n\t" // L4
+                "movq (%0, %%"REG_d"), %%mm1            \n\t" // L5
+                "movq (%0, %%"REG_a", 2), %%mm2         \n\t" // L6
+                "movq (%0, %%"REG_c"), %%mm3            \n\t" // L7
+                "movq (%1, %2, 4), %%mm4                \n\t" // R4
+                "movq (%1, %%"REG_d"), %%mm5            \n\t" // R5
+                "movq (%1, %%"REG_a", 2), %%mm6         \n\t" // R6
+                "movq (%1, %%"REG_c"), %%mm7            \n\t" // R7
+                PAVGB(%%mm4, %%mm0)
+                PAVGB(%%mm5, %%mm1)
+                PAVGB(%%mm6, %%mm2)
+                PAVGB(%%mm7, %%mm3)
+                PAVGB(%%mm4, %%mm0)
+                PAVGB(%%mm5, %%mm1)
+                PAVGB(%%mm6, %%mm2)
+                PAVGB(%%mm7, %%mm3)
+                "movq %%mm0, (%1, %2, 4)                \n\t" // R4
+                "movq %%mm1, (%1, %%"REG_d")            \n\t" // R5
+                "movq %%mm2, (%1, %%"REG_a", 2)         \n\t" // R6
+                "movq %%mm3, (%1, %%"REG_c")            \n\t" // R7
+                "movq %%mm0, (%0, %2, 4)                \n\t" // L4
+                "movq %%mm1, (%0, %%"REG_d")            \n\t" // L5
+                "movq %%mm2, (%0, %%"REG_a", 2)         \n\t" // L6
+                "movq %%mm3, (%0, %%"REG_c")            \n\t" // L7
+                "jmp 4f                                 \n\t"
+
+                "3:                                     \n\t"
+                "lea (%%"REG_a", %2, 2), %%"REG_d"      \n\t" // 5*stride
+                "lea (%%"REG_d", %2, 2), %%"REG_c"      \n\t" // 7*stride
+                "movq (%0), %%mm0                       \n\t" // L0
+                "movq (%0, %2), %%mm1                   \n\t" // L1
+                "movq (%0, %2, 2), %%mm2                \n\t" // L2
+                "movq (%0, %%"REG_a"), %%mm3            \n\t" // L3
+                "movq (%1), %%mm4                       \n\t" // R0
+                "movq (%1, %2), %%mm5                   \n\t" // R1
+                "movq (%1, %2, 2), %%mm6                \n\t" // R2
+                "movq (%1, %%"REG_a"), %%mm7            \n\t" // R3
+                PAVGB(%%mm4, %%mm0)
+                PAVGB(%%mm5, %%mm1)
+                PAVGB(%%mm6, %%mm2)
+                PAVGB(%%mm7, %%mm3)
+                PAVGB(%%mm4, %%mm0)
+                PAVGB(%%mm5, %%mm1)
+                PAVGB(%%mm6, %%mm2)
+                PAVGB(%%mm7, %%mm3)
+                PAVGB(%%mm4, %%mm0)
+                PAVGB(%%mm5, %%mm1)
+                PAVGB(%%mm6, %%mm2)
+                PAVGB(%%mm7, %%mm3)
+                "movq %%mm0, (%1)                       \n\t" // R0
+                "movq %%mm1, (%1, %2)                   \n\t" // R1
+                "movq %%mm2, (%1, %2, 2)                \n\t" // R2
+                "movq %%mm3, (%1, %%"REG_a")            \n\t" // R3
+                "movq %%mm0, (%0)                       \n\t" // L0
+                "movq %%mm1, (%0, %2)                   \n\t" // L1
+                "movq %%mm2, (%0, %2, 2)                \n\t" // L2
+                "movq %%mm3, (%0, %%"REG_a")            \n\t" // L3
+
+                "movq (%0, %2, 4), %%mm0                \n\t" // L4
+                "movq (%0, %%"REG_d"), %%mm1            \n\t" // L5
+                "movq (%0, %%"REG_a", 2), %%mm2         \n\t" // L6
+                "movq (%0, %%"REG_c"), %%mm3            \n\t" // L7
+                "movq (%1, %2, 4), %%mm4                \n\t" // R4
+                "movq (%1, %%"REG_d"), %%mm5            \n\t" // R5
+                "movq (%1, %%"REG_a", 2), %%mm6         \n\t" // R6
+                "movq (%1, %%"REG_c"), %%mm7            \n\t" // R7
+                PAVGB(%%mm4, %%mm0)
+                PAVGB(%%mm5, %%mm1)
+                PAVGB(%%mm6, %%mm2)
+                PAVGB(%%mm7, %%mm3)
+                PAVGB(%%mm4, %%mm0)
+                PAVGB(%%mm5, %%mm1)
+                PAVGB(%%mm6, %%mm2)
+                PAVGB(%%mm7, %%mm3)
+                PAVGB(%%mm4, %%mm0)
+                PAVGB(%%mm5, %%mm1)
+                PAVGB(%%mm6, %%mm2)
+                PAVGB(%%mm7, %%mm3)
+                "movq %%mm0, (%1, %2, 4)                \n\t" // R4
+                "movq %%mm1, (%1, %%"REG_d")            \n\t" // R5
+                "movq %%mm2, (%1, %%"REG_a", 2)         \n\t" // R6
+                "movq %%mm3, (%1, %%"REG_c")            \n\t" // R7
+                "movq %%mm0, (%0, %2, 4)                \n\t" // L4
+                "movq %%mm1, (%0, %%"REG_d")            \n\t" // L5
+                "movq %%mm2, (%0, %%"REG_a", 2)         \n\t" // L6
+                "movq %%mm3, (%0, %%"REG_c")            \n\t" // L7
+
+                "4:                                     \n\t"
+
+                :: "r" (src), "r" (tempBlured), "r"((long)stride), "m" (tempBluredPast)
+                : "%"REG_a, "%"REG_d, "%"REG_c, "memory"
+                );
 //printf("%d\n", test);
-#else
+#else //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
 {
-	int y;
-	int d=0;
-//	int sysd=0;
-	int i;
-
-	for(y=0; y<8; y++)
-	{
-		int x;
-		for(x=0; x<8; x++)
-		{
-			int ref= tempBlured[ x + y*stride ];
-			int cur= src[ x + y*stride ];
-			int d1=ref - cur;
-//			if(x==0 || x==7) d1+= d1>>1;
-//			if(y==0 || y==7) d1+= d1>>1;
-//			d+= ABS(d1);
-			d+= d1*d1;
-//			sysd+= d1;
-		}
-	}
-	i=d;
-	d= 	(
-		4*d
-		+(*(tempBluredPast-256))
-		+(*(tempBluredPast-1))+ (*(tempBluredPast+1))
-		+(*(tempBluredPast+256))
-		+4)>>3;
-	*tempBluredPast=i;
-//	((*tempBluredPast)*3 + d + 2)>>2;
+        int y;
+        int d=0;
+//        int sysd=0;
+        int i;
+
+        for(y=0; y<8; y++)
+        {
+                int x;
+                for(x=0; x<8; x++)
+                {
+                        int ref= tempBlured[ x + y*stride ];
+                        int cur= src[ x + y*stride ];
+                        int d1=ref - cur;
+//                        if(x==0 || x==7) d1+= d1>>1;
+//                        if(y==0 || y==7) d1+= d1>>1;
+//                        d+= ABS(d1);
+                        d+= d1*d1;
+//                        sysd+= d1;
+                }
+        }
+        i=d;
+        d=         (
+                4*d
+                +(*(tempBluredPast-256))
+                +(*(tempBluredPast-1))+ (*(tempBluredPast+1))
+                +(*(tempBluredPast+256))
+                +4)>>3;
+        *tempBluredPast=i;
+//        ((*tempBluredPast)*3 + d + 2)>>2;
 
 //printf("%d %d %d\n", maxNoise[0], maxNoise[1], maxNoise[2]);
 /*
@@ -2574,70 +2574,70 @@ Switch between
 64 48 36 27 20 15 11 (33) (approx)
 64 56 49 43 37 33 29 (200) (approx)
 */
-	if(d > maxNoise[1])
-	{
-		if(d < maxNoise[2])
-		{
-			for(y=0; y<8; y++)
-			{
-				int x;
-				for(x=0; x<8; x++)
-				{
-					int ref= tempBlured[ x + y*stride ];
-					int cur= src[ x + y*stride ];
-					tempBlured[ x + y*stride ]=
-					src[ x + y*stride ]=
-						(ref + cur + 1)>>1;
-				}
-			}
-		}
-		else
-		{
-			for(y=0; y<8; y++)
-			{
-				int x;
-				for(x=0; x<8; x++)
-				{
-					tempBlured[ x + y*stride ]= src[ x + y*stride ];
-				}
-			}
-		}
-	}
-	else
-	{
-		if(d < maxNoise[0])
-		{
-			for(y=0; y<8; y++)
-			{
-				int x;
-				for(x=0; x<8; x++)
-				{
-					int ref= tempBlured[ x + y*stride ];
-					int cur= src[ x + y*stride ];
-					tempBlured[ x + y*stride ]=
-					src[ x + y*stride ]=
-						(ref*7 + cur + 4)>>3;
-				}
-			}
-		}
-		else
-		{
-			for(y=0; y<8; y++)
-			{
-				int x;
-				for(x=0; x<8; x++)
-				{
-					int ref= tempBlured[ x + y*stride ];
-					int cur= src[ x + y*stride ];
-					tempBlured[ x + y*stride ]=
-					src[ x + y*stride ]=
-						(ref*3 + cur + 2)>>2;
-				}
-			}
-		}
-	}
+        if(d > maxNoise[1])
+        {
+                if(d < maxNoise[2])
+                {
+                        for(y=0; y<8; y++)
+                        {
+                                int x;
+                                for(x=0; x<8; x++)
+                                {
+                                        int ref= tempBlured[ x + y*stride ];
+                                        int cur= src[ x + y*stride ];
+                                        tempBlured[ x + y*stride ]=
+                                        src[ x + y*stride ]=
+                                                (ref + cur + 1)>>1;
+                                }
+                        }
+                }
+                else
+                {
+                        for(y=0; y<8; y++)
+                        {
+                                int x;
+                                for(x=0; x<8; x++)
+                                {
+                                        tempBlured[ x + y*stride ]= src[ x + y*stride ];
+                                }
+                        }
+                }
+        }
+        else
+        {
+                if(d < maxNoise[0])
+                {
+                        for(y=0; y<8; y++)
+                        {
+                                int x;
+                                for(x=0; x<8; x++)
+                                {
+                                        int ref= tempBlured[ x + y*stride ];
+                                        int cur= src[ x + y*stride ];
+                                        tempBlured[ x + y*stride ]=
+                                        src[ x + y*stride ]=
+                                                (ref*7 + cur + 4)>>3;
+                                }
+                        }
+                }
+                else
+                {
+                        for(y=0; y<8; y++)
+                        {
+                                int x;
+                                for(x=0; x<8; x++)
+                                {
+                                        int ref= tempBlured[ x + y*stride ];
+                                        int cur= src[ x + y*stride ];
+                                        tempBlured[ x + y*stride ]=
+                                        src[ x + y*stride ]=
+                                                (ref*3 + cur + 2)>>2;
+                                }
+                        }
+                }
+        }
 }
-#endif
+#endif //defined (HAVE_MMX2) || defined (HAVE_3DNOW)
 }
 #endif //HAVE_ALTIVEC
 
@@ -2646,531 +2646,531 @@ Switch between
  * accurate deblock filter
  */
 static always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int stride, PPContext *c){
-	int64_t dc_mask, eq_mask, both_masks;
-	int64_t sums[10*8*2];
-	src+= step*3; // src points to begin of the 8x8 Block
+        int64_t dc_mask, eq_mask, both_masks;
+        int64_t sums[10*8*2];
+        src+= step*3; // src points to begin of the 8x8 Block
 //START_TIMER
 asm volatile(
-		"movq %0, %%mm7					\n\t" 
-		"movq %1, %%mm6					\n\t" 
+                "movq %0, %%mm7                         \n\t"
+                "movq %1, %%mm6                         \n\t"
                 : : "m" (c->mmxDcOffset[c->nonBQP]),  "m" (c->mmxDcThreshold[c->nonBQP])
                 );
-                
+
 asm volatile(
-		"lea (%2, %3), %%"REG_a"			\n\t"
-//	0	1	2	3	4	5	6	7	8	9
-//	%1	eax	eax+%2	eax+2%2	%1+4%2	ecx	ecx+%2	ecx+2%2	%1+8%2	ecx+4%2
-
-		"movq (%2), %%mm0				\n\t"
-		"movq (%%"REG_a"), %%mm1			\n\t"
-                "movq %%mm1, %%mm3				\n\t"
-                "movq %%mm1, %%mm4				\n\t"
-		"psubb %%mm1, %%mm0				\n\t" // mm0 = differnece
-		"paddb %%mm7, %%mm0				\n\t"
-		"pcmpgtb %%mm6, %%mm0				\n\t"
-
-		"movq (%%"REG_a",%3), %%mm2			\n\t"
+                "lea (%2, %3), %%"REG_a"                \n\t"
+//      0       1       2       3       4       5       6       7       8       9
+//      %1      eax     eax+%2  eax+2%2 %1+4%2  ecx     ecx+%2  ecx+2%2 %1+8%2  ecx+4%2
+
+                "movq (%2), %%mm0                       \n\t"
+                "movq (%%"REG_a"), %%mm1                \n\t"
+                "movq %%mm1, %%mm3                      \n\t"
+                "movq %%mm1, %%mm4                      \n\t"
+                "psubb %%mm1, %%mm0                     \n\t" // mm0 = differnece
+                "paddb %%mm7, %%mm0                     \n\t"
+                "pcmpgtb %%mm6, %%mm0                   \n\t"
+
+                "movq (%%"REG_a",%3), %%mm2             \n\t"
                 PMAXUB(%%mm2, %%mm4)
                 PMINUB(%%mm2, %%mm3, %%mm5)
-		"psubb %%mm2, %%mm1				\n\t"
-		"paddb %%mm7, %%mm1				\n\t"
-		"pcmpgtb %%mm6, %%mm1				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
+                "psubb %%mm2, %%mm1                     \n\t"
+                "paddb %%mm7, %%mm1                     \n\t"
+                "pcmpgtb %%mm6, %%mm1                   \n\t"
+                "paddb %%mm1, %%mm0                     \n\t"
 
-		"movq (%%"REG_a", %3, 2), %%mm1			\n\t"
+                "movq (%%"REG_a", %3, 2), %%mm1         \n\t"
                 PMAXUB(%%mm1, %%mm4)
                 PMINUB(%%mm1, %%mm3, %%mm5)
-		"psubb %%mm1, %%mm2				\n\t"
-		"paddb %%mm7, %%mm2				\n\t"
-		"pcmpgtb %%mm6, %%mm2				\n\t"
-		"paddb %%mm2, %%mm0				\n\t"
-		
-		"lea (%%"REG_a", %3, 4), %%"REG_a"		\n\t"
-
-		"movq (%2, %3, 4), %%mm2			\n\t"
+                "psubb %%mm1, %%mm2                     \n\t"
+                "paddb %%mm7, %%mm2                     \n\t"
+                "pcmpgtb %%mm6, %%mm2                   \n\t"
+                "paddb %%mm2, %%mm0                     \n\t"
+
+                "lea (%%"REG_a", %3, 4), %%"REG_a"      \n\t"
+
+                "movq (%2, %3, 4), %%mm2                \n\t"
                 PMAXUB(%%mm2, %%mm4)
                 PMINUB(%%mm2, %%mm3, %%mm5)
-		"psubb %%mm2, %%mm1				\n\t"
-		"paddb %%mm7, %%mm1				\n\t"
-		"pcmpgtb %%mm6, %%mm1				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
+                "psubb %%mm2, %%mm1                     \n\t"
+                "paddb %%mm7, %%mm1                     \n\t"
+                "pcmpgtb %%mm6, %%mm1                   \n\t"
+                "paddb %%mm1, %%mm0                     \n\t"
 
-		"movq (%%"REG_a"), %%mm1			\n\t"
+                "movq (%%"REG_a"), %%mm1                \n\t"
                 PMAXUB(%%mm1, %%mm4)
                 PMINUB(%%mm1, %%mm3, %%mm5)
-		"psubb %%mm1, %%mm2				\n\t"
-		"paddb %%mm7, %%mm2				\n\t"
-		"pcmpgtb %%mm6, %%mm2				\n\t"
-		"paddb %%mm2, %%mm0				\n\t"
+                "psubb %%mm1, %%mm2                     \n\t"
+                "paddb %%mm7, %%mm2                     \n\t"
+                "pcmpgtb %%mm6, %%mm2                   \n\t"
+                "paddb %%mm2, %%mm0                     \n\t"
 
-		"movq (%%"REG_a", %3), %%mm2			\n\t"
+                "movq (%%"REG_a", %3), %%mm2            \n\t"
                 PMAXUB(%%mm2, %%mm4)
                 PMINUB(%%mm2, %%mm3, %%mm5)
-		"psubb %%mm2, %%mm1				\n\t"
-		"paddb %%mm7, %%mm1				\n\t"
-		"pcmpgtb %%mm6, %%mm1				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
+                "psubb %%mm2, %%mm1                     \n\t"
+                "paddb %%mm7, %%mm1                     \n\t"
+                "pcmpgtb %%mm6, %%mm1                   \n\t"
+                "paddb %%mm1, %%mm0                     \n\t"
 
-		"movq (%%"REG_a", %3, 2), %%mm1			\n\t"
+                "movq (%%"REG_a", %3, 2), %%mm1         \n\t"
                 PMAXUB(%%mm1, %%mm4)
                 PMINUB(%%mm1, %%mm3, %%mm5)
-		"psubb %%mm1, %%mm2				\n\t"
-		"paddb %%mm7, %%mm2				\n\t"
-		"pcmpgtb %%mm6, %%mm2				\n\t"
-		"paddb %%mm2, %%mm0				\n\t"
+                "psubb %%mm1, %%mm2                     \n\t"
+                "paddb %%mm7, %%mm2                     \n\t"
+                "pcmpgtb %%mm6, %%mm2                   \n\t"
+                "paddb %%mm2, %%mm0                     \n\t"
 
-		"movq (%2, %3, 8), %%mm2			\n\t"
+                "movq (%2, %3, 8), %%mm2                \n\t"
                 PMAXUB(%%mm2, %%mm4)
                 PMINUB(%%mm2, %%mm3, %%mm5)
-		"psubb %%mm2, %%mm1				\n\t"
-		"paddb %%mm7, %%mm1				\n\t"
-		"pcmpgtb %%mm6, %%mm1				\n\t"
-		"paddb %%mm1, %%mm0				\n\t"
-
-		"movq (%%"REG_a", %3, 4), %%mm1			\n\t"
-		"psubb %%mm1, %%mm2				\n\t"
-		"paddb %%mm7, %%mm2				\n\t"
-		"pcmpgtb %%mm6, %%mm2				\n\t"
-		"paddb %%mm2, %%mm0				\n\t"
-		"psubusb %%mm3, %%mm4				\n\t"
-
-		"pxor %%mm6, %%mm6				\n\t"
-                "movq %4, %%mm7					\n\t" // QP,..., QP
-		"paddusb %%mm7, %%mm7				\n\t" // 2QP ... 2QP
-		"psubusb %%mm4, %%mm7				\n\t" // Diff >=2QP -> 0
-		"pcmpeqb %%mm6, %%mm7				\n\t" // Diff < 2QP -> 0
-		"pcmpeqb %%mm6, %%mm7				\n\t" // Diff < 2QP -> 0
-		"movq %%mm7, %1					\n\t"
-
-		"movq %5, %%mm7					\n\t"
-		"punpcklbw %%mm7, %%mm7				\n\t"
-		"punpcklbw %%mm7, %%mm7				\n\t"
-		"punpcklbw %%mm7, %%mm7				\n\t"
-		"psubb %%mm0, %%mm6				\n\t"
-		"pcmpgtb %%mm7, %%mm6				\n\t"
-		"movq %%mm6, %0					\n\t"
-
-		: "=m" (eq_mask), "=m" (dc_mask)
-		: "r" (src), "r" ((long)step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold)
-		: "%"REG_a
-		);
-
-	both_masks = dc_mask & eq_mask;
-
-	if(both_masks){
-		long offset= -8*step;
-		int64_t *temp_sums= sums;
-
-		asm volatile(
-		"movq %2, %%mm0					\n\t"  // QP,..., QP
-		"pxor %%mm4, %%mm4				\n\t"
-
-		"movq (%0), %%mm6				\n\t"
-		"movq (%0, %1), %%mm5				\n\t"
-		"movq %%mm5, %%mm1				\n\t"
-		"movq %%mm6, %%mm2				\n\t"
-		"psubusb %%mm6, %%mm5				\n\t"
-		"psubusb %%mm1, %%mm2				\n\t"
-		"por %%mm5, %%mm2				\n\t" // ABS Diff of lines
-		"psubusb %%mm2, %%mm0				\n\t" // diff >= QP -> 0
-		"pcmpeqb %%mm4, %%mm0				\n\t" // diff >= QP -> FF
-
-		"pxor %%mm6, %%mm1				\n\t"
-		"pand %%mm0, %%mm1				\n\t"
-		"pxor %%mm1, %%mm6				\n\t"
-		// 0:QP  6:First
-
-		"movq (%0, %1, 8), %%mm5			\n\t"
-		"add %1, %0					\n\t" // %0 points to line 1 not 0
-		"movq (%0, %1, 8), %%mm7			\n\t"
-		"movq %%mm5, %%mm1				\n\t"
-		"movq %%mm7, %%mm2				\n\t"
-		"psubusb %%mm7, %%mm5				\n\t"
-		"psubusb %%mm1, %%mm2				\n\t"
-		"por %%mm5, %%mm2				\n\t" // ABS Diff of lines
-		"movq %2, %%mm0					\n\t"  // QP,..., QP
-		"psubusb %%mm2, %%mm0				\n\t" // diff >= QP -> 0
-		"pcmpeqb %%mm4, %%mm0				\n\t" // diff >= QP -> FF
-
-		"pxor %%mm7, %%mm1				\n\t"
-		"pand %%mm0, %%mm1				\n\t"
-		"pxor %%mm1, %%mm7				\n\t"
-		
-		"movq %%mm6, %%mm5				\n\t"
-		"punpckhbw %%mm4, %%mm6				\n\t"
-		"punpcklbw %%mm4, %%mm5				\n\t"
-		// 4:0 5/6:First 7:Last
-
-		"movq %%mm5, %%mm0				\n\t"
-		"movq %%mm6, %%mm1				\n\t"
-		"psllw $2, %%mm0				\n\t"
-		"psllw $2, %%mm1				\n\t"
-		"paddw "MANGLE(w04)", %%mm0			\n\t"
-		"paddw "MANGLE(w04)", %%mm1			\n\t"
+                "psubb %%mm2, %%mm1                     \n\t"
+                "paddb %%mm7, %%mm1                     \n\t"
+                "pcmpgtb %%mm6, %%mm1                   \n\t"
+                "paddb %%mm1, %%mm0                     \n\t"
+
+                "movq (%%"REG_a", %3, 4), %%mm1         \n\t"
+                "psubb %%mm1, %%mm2                     \n\t"
+                "paddb %%mm7, %%mm2                     \n\t"
+                "pcmpgtb %%mm6, %%mm2                   \n\t"
+                "paddb %%mm2, %%mm0                     \n\t"
+                "psubusb %%mm3, %%mm4                   \n\t"
+
+                "pxor %%mm6, %%mm6                      \n\t"
+                "movq %4, %%mm7                         \n\t" // QP,..., QP
+                "paddusb %%mm7, %%mm7                   \n\t" // 2QP ... 2QP
+                "psubusb %%mm4, %%mm7                   \n\t" // Diff >=2QP -> 0
+                "pcmpeqb %%mm6, %%mm7                   \n\t" // Diff < 2QP -> 0
+                "pcmpeqb %%mm6, %%mm7                   \n\t" // Diff < 2QP -> 0
+                "movq %%mm7, %1                         \n\t"
+
+                "movq %5, %%mm7                         \n\t"
+                "punpcklbw %%mm7, %%mm7                 \n\t"
+                "punpcklbw %%mm7, %%mm7                 \n\t"
+                "punpcklbw %%mm7, %%mm7                 \n\t"
+                "psubb %%mm0, %%mm6                     \n\t"
+                "pcmpgtb %%mm7, %%mm6                   \n\t"
+                "movq %%mm6, %0                         \n\t"
+
+                : "=m" (eq_mask), "=m" (dc_mask)
+                : "r" (src), "r" ((long)step), "m" (c->pQPb), "m"(c->ppMode.flatnessThreshold)
+                : "%"REG_a
+                );
+
+        both_masks = dc_mask & eq_mask;
+
+        if(both_masks){
+                long offset= -8*step;
+                int64_t *temp_sums= sums;
+
+                asm volatile(
+                "movq %2, %%mm0                         \n\t"  // QP,..., QP
+                "pxor %%mm4, %%mm4                      \n\t"
+
+                "movq (%0), %%mm6                       \n\t"
+                "movq (%0, %1), %%mm5                   \n\t"
+                "movq %%mm5, %%mm1                      \n\t"
+                "movq %%mm6, %%mm2                      \n\t"
+                "psubusb %%mm6, %%mm5                   \n\t"
+                "psubusb %%mm1, %%mm2                   \n\t"
+                "por %%mm5, %%mm2                       \n\t" // ABS Diff of lines
+                "psubusb %%mm2, %%mm0                   \n\t" // diff >= QP -> 0
+                "pcmpeqb %%mm4, %%mm0                   \n\t" // diff >= QP -> FF
+
+                "pxor %%mm6, %%mm1                      \n\t"
+                "pand %%mm0, %%mm1                      \n\t"
+                "pxor %%mm1, %%mm6                      \n\t"
+                // 0:QP  6:First
+
+                "movq (%0, %1, 8), %%mm5                \n\t"
+                "add %1, %0                             \n\t" // %0 points to line 1 not 0
+                "movq (%0, %1, 8), %%mm7                \n\t"
+                "movq %%mm5, %%mm1                      \n\t"
+                "movq %%mm7, %%mm2                      \n\t"
+                "psubusb %%mm7, %%mm5                   \n\t"
+                "psubusb %%mm1, %%mm2                   \n\t"
+                "por %%mm5, %%mm2                       \n\t" // ABS Diff of lines
+                "movq %2, %%mm0                         \n\t"  // QP,..., QP
+                "psubusb %%mm2, %%mm0                   \n\t" // diff >= QP -> 0
+                "pcmpeqb %%mm4, %%mm0                   \n\t" // diff >= QP -> FF
+
+                "pxor %%mm7, %%mm1                      \n\t"
+                "pand %%mm0, %%mm1                      \n\t"
+                "pxor %%mm1, %%mm7                      \n\t"
+
+                "movq %%mm6, %%mm5                      \n\t"
+                "punpckhbw %%mm4, %%mm6                 \n\t"
+                "punpcklbw %%mm4, %%mm5                 \n\t"
+                // 4:0 5/6:First 7:Last
+
+                "movq %%mm5, %%mm0                      \n\t"
+                "movq %%mm6, %%mm1                      \n\t"
+                "psllw $2, %%mm0                        \n\t"
+                "psllw $2, %%mm1                        \n\t"
+                "paddw "MANGLE(w04)", %%mm0             \n\t"
+                "paddw "MANGLE(w04)", %%mm1             \n\t"
 
 #define NEXT\
-		"movq (%0), %%mm2				\n\t"\
-		"movq (%0), %%mm3				\n\t"\
-		"add %1, %0					\n\t"\
-		"punpcklbw %%mm4, %%mm2				\n\t"\
-		"punpckhbw %%mm4, %%mm3				\n\t"\
-		"paddw %%mm2, %%mm0				\n\t"\
-		"paddw %%mm3, %%mm1				\n\t"
+                "movq (%0), %%mm2                       \n\t"\
+                "movq (%0), %%mm3                       \n\t"\
+                "add %1, %0                             \n\t"\
+                "punpcklbw %%mm4, %%mm2                 \n\t"\
+                "punpckhbw %%mm4, %%mm3                 \n\t"\
+                "paddw %%mm2, %%mm0                     \n\t"\
+                "paddw %%mm3, %%mm1                     \n\t"
 
 #define PREV\
-		"movq (%0), %%mm2				\n\t"\
-		"movq (%0), %%mm3				\n\t"\
-		"add %1, %0					\n\t"\
-		"punpcklbw %%mm4, %%mm2				\n\t"\
-		"punpckhbw %%mm4, %%mm3				\n\t"\
-		"psubw %%mm2, %%mm0				\n\t"\
-		"psubw %%mm3, %%mm1				\n\t"
-
-				
-		NEXT //0
-		NEXT //1
-		NEXT //2
-		"movq %%mm0, (%3)				\n\t"
-		"movq %%mm1, 8(%3)				\n\t"
-
-		NEXT //3
-		"psubw %%mm5, %%mm0				\n\t"
-		"psubw %%mm6, %%mm1				\n\t"
-		"movq %%mm0, 16(%3)				\n\t"
-		"movq %%mm1, 24(%3)				\n\t"
-
-		NEXT //4
-		"psubw %%mm5, %%mm0				\n\t"
-		"psubw %%mm6, %%mm1				\n\t"
-		"movq %%mm0, 32(%3)				\n\t"
-		"movq %%mm1, 40(%3)				\n\t"
-
-		NEXT //5
-		"psubw %%mm5, %%mm0				\n\t"
-		"psubw %%mm6, %%mm1				\n\t"
-		"movq %%mm0, 48(%3)				\n\t"
-		"movq %%mm1, 56(%3)				\n\t"
-
-		NEXT //6
-		"psubw %%mm5, %%mm0				\n\t"
-		"psubw %%mm6, %%mm1				\n\t"
-		"movq %%mm0, 64(%3)				\n\t"
-		"movq %%mm1, 72(%3)				\n\t"
-
-		"movq %%mm7, %%mm6				\n\t"
-		"punpckhbw %%mm4, %%mm7				\n\t"
-		"punpcklbw %%mm4, %%mm6				\n\t"
-		
-		NEXT //7
-		"mov %4, %0					\n\t"
-		"add %1, %0					\n\t"
-		PREV //0
-		"movq %%mm0, 80(%3)				\n\t"
-		"movq %%mm1, 88(%3)				\n\t"
-
-		PREV //1
-		"paddw %%mm6, %%mm0				\n\t"
-		"paddw %%mm7, %%mm1				\n\t"
-		"movq %%mm0, 96(%3)				\n\t"
-		"movq %%mm1, 104(%3)				\n\t"
-		
-		PREV //2
-		"paddw %%mm6, %%mm0				\n\t"
-		"paddw %%mm7, %%mm1				\n\t"
-		"movq %%mm0, 112(%3)				\n\t"
-		"movq %%mm1, 120(%3)				\n\t"
-
-		PREV //3
-		"paddw %%mm6, %%mm0				\n\t"
-		"paddw %%mm7, %%mm1				\n\t"
-		"movq %%mm0, 128(%3)				\n\t"
-		"movq %%mm1, 136(%3)				\n\t"
-
-		PREV //4
-		"paddw %%mm6, %%mm0				\n\t"
-		"paddw %%mm7, %%mm1				\n\t"
-		"movq %%mm0, 144(%3)				\n\t"
-		"movq %%mm1, 152(%3)				\n\t"
-
-		"mov %4, %0					\n\t" //FIXME
-
-		: "+&r"(src)
-		: "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src)
-		);
-
-		src+= step; // src points to begin of the 8x8 Block
-
-		asm volatile(
-		"movq %4, %%mm6					\n\t"
-		"pcmpeqb %%mm5, %%mm5				\n\t"
-		"pxor %%mm6, %%mm5				\n\t"
-		"pxor %%mm7, %%mm7				\n\t"
-
-		"1:						\n\t"
-		"movq (%1), %%mm0				\n\t"
-		"movq 8(%1), %%mm1				\n\t"
-		"paddw 32(%1), %%mm0				\n\t"
-		"paddw 40(%1), %%mm1				\n\t"
-		"movq (%0, %3), %%mm2				\n\t"
-		"movq %%mm2, %%mm3				\n\t"
-		"movq %%mm2, %%mm4				\n\t"
-		"punpcklbw %%mm7, %%mm2				\n\t"
-		"punpckhbw %%mm7, %%mm3				\n\t"
-		"paddw %%mm2, %%mm0				\n\t"
-		"paddw %%mm3, %%mm1				\n\t"
-		"paddw %%mm2, %%mm0				\n\t"
-		"paddw %%mm3, %%mm1				\n\t"
-		"psrlw $4, %%mm0				\n\t"
-		"psrlw $4, %%mm1				\n\t"
-		"packuswb %%mm1, %%mm0				\n\t"
-		"pand %%mm6, %%mm0				\n\t"
-		"pand %%mm5, %%mm4				\n\t"
-		"por %%mm4, %%mm0				\n\t"
-		"movq %%mm0, (%0, %3)				\n\t"
-		"add $16, %1					\n\t"
-		"add %2, %0					\n\t"
-		" js 1b						\n\t"
-
-		: "+r"(offset), "+r"(temp_sums)
-		: "r" ((long)step), "r"(src - offset), "m"(both_masks)
-		);
-	}else
-		src+= step; // src points to begin of the 8x8 Block
-
-	if(eq_mask != -1LL){
-		uint8_t *temp_src= src;
-		asm volatile(
-		"pxor %%mm7, %%mm7				\n\t"
-		"lea -40(%%"REG_SP"), %%"REG_c"			\n\t" // make space for 4 8-byte vars
-		"and "ALIGN_MASK", %%"REG_c"			\n\t" // align
-//	0	1	2	3	4	5	6	7	8	9
-//	%0	eax	eax+%1	eax+2%1	%0+4%1	ecx	ecx+%1	ecx+2%1	%1+8%1	ecx+4%1
-
-		"movq (%0), %%mm0				\n\t"
-		"movq %%mm0, %%mm1				\n\t"
-		"punpcklbw %%mm7, %%mm0				\n\t" // low part of line 0
-		"punpckhbw %%mm7, %%mm1				\n\t" // high part of line 0
-
-		"movq (%0, %1), %%mm2				\n\t"
-		"lea (%0, %1, 2), %%"REG_a"			\n\t"
-		"movq %%mm2, %%mm3				\n\t"
-		"punpcklbw %%mm7, %%mm2				\n\t" // low part of line 1
-		"punpckhbw %%mm7, %%mm3				\n\t" // high part of line 1
-
-		"movq (%%"REG_a"), %%mm4			\n\t"
-		"movq %%mm4, %%mm5				\n\t"
-		"punpcklbw %%mm7, %%mm4				\n\t" // low part of line 2
-		"punpckhbw %%mm7, %%mm5				\n\t" // high part of line 2
-
-		"paddw %%mm0, %%mm0				\n\t" // 2L0
-		"paddw %%mm1, %%mm1				\n\t" // 2H0
-		"psubw %%mm4, %%mm2				\n\t" // L1 - L2
-		"psubw %%mm5, %%mm3				\n\t" // H1 - H2
-		"psubw %%mm2, %%mm0				\n\t" // 2L0 - L1 + L2
-		"psubw %%mm3, %%mm1				\n\t" // 2H0 - H1 + H2
-
-		"psllw $2, %%mm2				\n\t" // 4L1 - 4L2
-		"psllw $2, %%mm3				\n\t" // 4H1 - 4H2
-		"psubw %%mm2, %%mm0				\n\t" // 2L0 - 5L1 + 5L2
-		"psubw %%mm3, %%mm1				\n\t" // 2H0 - 5H1 + 5H2
-
-		"movq (%%"REG_a", %1), %%mm2			\n\t"
-		"movq %%mm2, %%mm3				\n\t"
-		"punpcklbw %%mm7, %%mm2				\n\t" // L3
-		"punpckhbw %%mm7, %%mm3				\n\t" // H3
-
-		"psubw %%mm2, %%mm0				\n\t" // 2L0 - 5L1 + 5L2 - L3
-		"psubw %%mm3, %%mm1				\n\t" // 2H0 - 5H1 + 5H2 - H3
-		"psubw %%mm2, %%mm0				\n\t" // 2L0 - 5L1 + 5L2 - 2L3
-		"psubw %%mm3, %%mm1				\n\t" // 2H0 - 5H1 + 5H2 - 2H3
-		"movq %%mm0, (%%"REG_c")			\n\t" // 2L0 - 5L1 + 5L2 - 2L3
-		"movq %%mm1, 8(%%"REG_c")			\n\t" // 2H0 - 5H1 + 5H2 - 2H3
-
-		"movq (%%"REG_a", %1, 2), %%mm0			\n\t"
-		"movq %%mm0, %%mm1				\n\t"
-		"punpcklbw %%mm7, %%mm0				\n\t" // L4
-		"punpckhbw %%mm7, %%mm1				\n\t" // H4
-
-		"psubw %%mm0, %%mm2				\n\t" // L3 - L4
-		"psubw %%mm1, %%mm3				\n\t" // H3 - H4
-		"movq %%mm2, 16(%%"REG_c")			\n\t" // L3 - L4
-		"movq %%mm3, 24(%%"REG_c")			\n\t" // H3 - H4
-		"paddw %%mm4, %%mm4				\n\t" // 2L2
-		"paddw %%mm5, %%mm5				\n\t" // 2H2
-		"psubw %%mm2, %%mm4				\n\t" // 2L2 - L3 + L4
-		"psubw %%mm3, %%mm5				\n\t" // 2H2 - H3 + H4
-
-		"lea (%%"REG_a", %1), %0			\n\t"
-		"psllw $2, %%mm2				\n\t" // 4L3 - 4L4
-		"psllw $2, %%mm3				\n\t" // 4H3 - 4H4
-		"psubw %%mm2, %%mm4				\n\t" // 2L2 - 5L3 + 5L4
-		"psubw %%mm3, %%mm5				\n\t" // 2H2 - 5H3 + 5H4
+                "movq (%0), %%mm2                       \n\t"\
+                "movq (%0), %%mm3                       \n\t"\
+                "add %1, %0                             \n\t"\
+                "punpcklbw %%mm4, %%mm2                 \n\t"\
+                "punpckhbw %%mm4, %%mm3                 \n\t"\
+                "psubw %%mm2, %%mm0                     \n\t"\
+                "psubw %%mm3, %%mm1                     \n\t"
+
+
+                NEXT //0
+                NEXT //1
+                NEXT //2
+                "movq %%mm0, (%3)                       \n\t"
+                "movq %%mm1, 8(%3)                      \n\t"
+
+                NEXT //3
+                "psubw %%mm5, %%mm0                     \n\t"
+                "psubw %%mm6, %%mm1                     \n\t"
+                "movq %%mm0, 16(%3)                     \n\t"
+                "movq %%mm1, 24(%3)                     \n\t"
+
+                NEXT //4
+                "psubw %%mm5, %%mm0                     \n\t"
+                "psubw %%mm6, %%mm1                     \n\t"
+                "movq %%mm0, 32(%3)                     \n\t"
+                "movq %%mm1, 40(%3)                     \n\t"
+
+                NEXT //5
+                "psubw %%mm5, %%mm0                     \n\t"
+                "psubw %%mm6, %%mm1                     \n\t"
+                "movq %%mm0, 48(%3)                     \n\t"
+                "movq %%mm1, 56(%3)                     \n\t"
+
+                NEXT //6
+                "psubw %%mm5, %%mm0                     \n\t"
+                "psubw %%mm6, %%mm1                     \n\t"
+                "movq %%mm0, 64(%3)                     \n\t"
+                "movq %%mm1, 72(%3)                     \n\t"
+
+                "movq %%mm7, %%mm6                      \n\t"
+                "punpckhbw %%mm4, %%mm7                 \n\t"
+                "punpcklbw %%mm4, %%mm6                 \n\t"
+
+                NEXT //7
+                "mov %4, %0                             \n\t"
+                "add %1, %0                             \n\t"
+                PREV //0
+                "movq %%mm0, 80(%3)                     \n\t"
+                "movq %%mm1, 88(%3)                     \n\t"
+
+                PREV //1
+                "paddw %%mm6, %%mm0                     \n\t"
+                "paddw %%mm7, %%mm1                     \n\t"
+                "movq %%mm0, 96(%3)                     \n\t"
+                "movq %%mm1, 104(%3)                    \n\t"
+
+                PREV //2
+                "paddw %%mm6, %%mm0                     \n\t"
+                "paddw %%mm7, %%mm1                     \n\t"
+                "movq %%mm0, 112(%3)                    \n\t"
+                "movq %%mm1, 120(%3)                    \n\t"
+
+                PREV //3
+                "paddw %%mm6, %%mm0                     \n\t"
+                "paddw %%mm7, %%mm1                     \n\t"
+                "movq %%mm0, 128(%3)                    \n\t"
+                "movq %%mm1, 136(%3)                    \n\t"
+
+                PREV //4
+                "paddw %%mm6, %%mm0                     \n\t"
+                "paddw %%mm7, %%mm1                     \n\t"
+                "movq %%mm0, 144(%3)                    \n\t"
+                "movq %%mm1, 152(%3)                    \n\t"
+
+                "mov %4, %0                             \n\t" //FIXME
+
+                : "+&r"(src)
+                : "r" ((long)step), "m" (c->pQPb), "r"(sums), "g"(src)
+                );
+
+                src+= step; // src points to begin of the 8x8 Block
+
+                asm volatile(
+                "movq %4, %%mm6                         \n\t"
+                "pcmpeqb %%mm5, %%mm5                   \n\t"
+                "pxor %%mm6, %%mm5                      \n\t"
+                "pxor %%mm7, %%mm7                      \n\t"
+
+                "1:                                     \n\t"
+                "movq (%1), %%mm0                       \n\t"
+                "movq 8(%1), %%mm1                      \n\t"
+                "paddw 32(%1), %%mm0                    \n\t"
+                "paddw 40(%1), %%mm1                    \n\t"
+                "movq (%0, %3), %%mm2                   \n\t"
+                "movq %%mm2, %%mm3                      \n\t"
+                "movq %%mm2, %%mm4                      \n\t"
+                "punpcklbw %%mm7, %%mm2                 \n\t"
+                "punpckhbw %%mm7, %%mm3                 \n\t"
+                "paddw %%mm2, %%mm0                     \n\t"
+                "paddw %%mm3, %%mm1                     \n\t"
+                "paddw %%mm2, %%mm0                     \n\t"
+                "paddw %%mm3, %%mm1                     \n\t"
+                "psrlw $4, %%mm0                        \n\t"
+                "psrlw $4, %%mm1                        \n\t"
+                "packuswb %%mm1, %%mm0                  \n\t"
+                "pand %%mm6, %%mm0                      \n\t"
+                "pand %%mm5, %%mm4                      \n\t"
+                "por %%mm4, %%mm0                       \n\t"
+                "movq %%mm0, (%0, %3)                   \n\t"
+                "add $16, %1                            \n\t"
+                "add %2, %0                             \n\t"
+                " js 1b                                 \n\t"
+
+                : "+r"(offset), "+r"(temp_sums)
+                : "r" ((long)step), "r"(src - offset), "m"(both_masks)
+                );
+        }else
+                src+= step; // src points to begin of the 8x8 Block
+
+        if(eq_mask != -1LL){
+                uint8_t *temp_src= src;
+                asm volatile(
+                "pxor %%mm7, %%mm7                      \n\t"
+                "lea -40(%%"REG_SP"), %%"REG_c"         \n\t" // make space for 4 8-byte vars
+                "and "ALIGN_MASK", %%"REG_c"            \n\t" // align
+//      0       1       2       3       4       5       6       7       8       9
+//      %0      eax     eax+%1  eax+2%1 %0+4%1  ecx     ecx+%1  ecx+2%1 %1+8%1  ecx+4%1
+
+                "movq (%0), %%mm0                       \n\t"
+                "movq %%mm0, %%mm1                      \n\t"
+                "punpcklbw %%mm7, %%mm0                 \n\t" // low part of line 0
+                "punpckhbw %%mm7, %%mm1                 \n\t" // high part of line 0
+
+                "movq (%0, %1), %%mm2                   \n\t"
+                "lea (%0, %1, 2), %%"REG_a"             \n\t"
+                "movq %%mm2, %%mm3                      \n\t"
+                "punpcklbw %%mm7, %%mm2                 \n\t" // low part of line 1
+                "punpckhbw %%mm7, %%mm3                 \n\t" // high part of line 1
+
+                "movq (%%"REG_a"), %%mm4                \n\t"
+                "movq %%mm4, %%mm5                      \n\t"
+                "punpcklbw %%mm7, %%mm4                 \n\t" // low part of line 2
+                "punpckhbw %%mm7, %%mm5                 \n\t" // high part of line 2
+
+                "paddw %%mm0, %%mm0                     \n\t" // 2L0
+                "paddw %%mm1, %%mm1                     \n\t" // 2H0
+                "psubw %%mm4, %%mm2                     \n\t" // L1 - L2
+                "psubw %%mm5, %%mm3                     \n\t" // H1 - H2
+                "psubw %%mm2, %%mm0                     \n\t" // 2L0 - L1 + L2
+                "psubw %%mm3, %%mm1                     \n\t" // 2H0 - H1 + H2
+
+                "psllw $2, %%mm2                        \n\t" // 4L1 - 4L2
+                "psllw $2, %%mm3                        \n\t" // 4H1 - 4H2
+                "psubw %%mm2, %%mm0                     \n\t" // 2L0 - 5L1 + 5L2
+                "psubw %%mm3, %%mm1                     \n\t" // 2H0 - 5H1 + 5H2
+
+                "movq (%%"REG_a", %1), %%mm2            \n\t"
+                "movq %%mm2, %%mm3                      \n\t"
+                "punpcklbw %%mm7, %%mm2                 \n\t" // L3
+                "punpckhbw %%mm7, %%mm3                 \n\t" // H3
+
+                "psubw %%mm2, %%mm0                     \n\t" // 2L0 - 5L1 + 5L2 - L3
+                "psubw %%mm3, %%mm1                     \n\t" // 2H0 - 5H1 + 5H2 - H3
+                "psubw %%mm2, %%mm0                     \n\t" // 2L0 - 5L1 + 5L2 - 2L3
+                "psubw %%mm3, %%mm1                     \n\t" // 2H0 - 5H1 + 5H2 - 2H3
+                "movq %%mm0, (%%"REG_c")                \n\t" // 2L0 - 5L1 + 5L2 - 2L3
+                "movq %%mm1, 8(%%"REG_c")               \n\t" // 2H0 - 5H1 + 5H2 - 2H3
+
+                "movq (%%"REG_a", %1, 2), %%mm0         \n\t"
+                "movq %%mm0, %%mm1                      \n\t"
+                "punpcklbw %%mm7, %%mm0                 \n\t" // L4
+                "punpckhbw %%mm7, %%mm1                 \n\t" // H4
+
+                "psubw %%mm0, %%mm2                     \n\t" // L3 - L4
+                "psubw %%mm1, %%mm3                     \n\t" // H3 - H4
+                "movq %%mm2, 16(%%"REG_c")              \n\t" // L3 - L4
+                "movq %%mm3, 24(%%"REG_c")              \n\t" // H3 - H4
+                "paddw %%mm4, %%mm4                     \n\t" // 2L2
+                "paddw %%mm5, %%mm5                     \n\t" // 2H2
+                "psubw %%mm2, %%mm4                     \n\t" // 2L2 - L3 + L4
+                "psubw %%mm3, %%mm5                     \n\t" // 2H2 - H3 + H4
+
+                "lea (%%"REG_a", %1), %0                \n\t"
+                "psllw $2, %%mm2                        \n\t" // 4L3 - 4L4
+                "psllw $2, %%mm3                        \n\t" // 4H3 - 4H4
+                "psubw %%mm2, %%mm4                     \n\t" // 2L2 - 5L3 + 5L4
+                "psubw %%mm3, %%mm5                     \n\t" // 2H2 - 5H3 + 5H4
 //50 opcodes so far
-		"movq (%0, %1, 2), %%mm2			\n\t"
-		"movq %%mm2, %%mm3				\n\t"
-		"punpcklbw %%mm7, %%mm2				\n\t" // L5
-		"punpckhbw %%mm7, %%mm3				\n\t" // H5
-		"psubw %%mm2, %%mm4				\n\t" // 2L2 - 5L3 + 5L4 - L5
-		"psubw %%mm3, %%mm5				\n\t" // 2H2 - 5H3 + 5H4 - H5
-		"psubw %%mm2, %%mm4				\n\t" // 2L2 - 5L3 + 5L4 - 2L5
-		"psubw %%mm3, %%mm5				\n\t" // 2H2 - 5H3 + 5H4 - 2H5
-
-		"movq (%%"REG_a", %1, 4), %%mm6			\n\t"
-		"punpcklbw %%mm7, %%mm6				\n\t" // L6
-		"psubw %%mm6, %%mm2				\n\t" // L5 - L6
-		"movq (%%"REG_a", %1, 4), %%mm6			\n\t"
-		"punpckhbw %%mm7, %%mm6				\n\t" // H6
-		"psubw %%mm6, %%mm3				\n\t" // H5 - H6
-
-		"paddw %%mm0, %%mm0				\n\t" // 2L4
-		"paddw %%mm1, %%mm1				\n\t" // 2H4
-		"psubw %%mm2, %%mm0				\n\t" // 2L4 - L5 + L6
-		"psubw %%mm3, %%mm1				\n\t" // 2H4 - H5 + H6
-
-		"psllw $2, %%mm2				\n\t" // 4L5 - 4L6
-		"psllw $2, %%mm3				\n\t" // 4H5 - 4H6
-		"psubw %%mm2, %%mm0				\n\t" // 2L4 - 5L5 + 5L6
-		"psubw %%mm3, %%mm1				\n\t" // 2H4 - 5H5 + 5H6
-
-		"movq (%0, %1, 4), %%mm2			\n\t"
-		"movq %%mm2, %%mm3				\n\t"
-		"punpcklbw %%mm7, %%mm2				\n\t" // L7
-		"punpckhbw %%mm7, %%mm3				\n\t" // H7
-
-		"paddw %%mm2, %%mm2				\n\t" // 2L7
-		"paddw %%mm3, %%mm3				\n\t" // 2H7
-		"psubw %%mm2, %%mm0				\n\t" // 2L4 - 5L5 + 5L6 - 2L7
-		"psubw %%mm3, %%mm1				\n\t" // 2H4 - 5H5 + 5H6 - 2H7
-
-		"movq (%%"REG_c"), %%mm2			\n\t" // 2L0 - 5L1 + 5L2 - 2L3
-		"movq 8(%%"REG_c"), %%mm3			\n\t" // 2H0 - 5H1 + 5H2 - 2H3
+                "movq (%0, %1, 2), %%mm2                \n\t"
+                "movq %%mm2, %%mm3                      \n\t"
+                "punpcklbw %%mm7, %%mm2                 \n\t" // L5
+                "punpckhbw %%mm7, %%mm3                 \n\t" // H5
+                "psubw %%mm2, %%mm4                     \n\t" // 2L2 - 5L3 + 5L4 - L5
+                "psubw %%mm3, %%mm5                     \n\t" // 2H2 - 5H3 + 5H4 - H5
+                "psubw %%mm2, %%mm4                     \n\t" // 2L2 - 5L3 + 5L4 - 2L5
+                "psubw %%mm3, %%mm5                     \n\t" // 2H2 - 5H3 + 5H4 - 2H5
+
+                "movq (%%"REG_a", %1, 4), %%mm6         \n\t"
+                "punpcklbw %%mm7, %%mm6                 \n\t" // L6
+                "psubw %%mm6, %%mm2                     \n\t" // L5 - L6
+                "movq (%%"REG_a", %1, 4), %%mm6         \n\t"
+                "punpckhbw %%mm7, %%mm6                 \n\t" // H6
+                "psubw %%mm6, %%mm3                     \n\t" // H5 - H6
+
+                "paddw %%mm0, %%mm0                     \n\t" // 2L4
+                "paddw %%mm1, %%mm1                     \n\t" // 2H4
+                "psubw %%mm2, %%mm0                     \n\t" // 2L4 - L5 + L6
+                "psubw %%mm3, %%mm1                     \n\t" // 2H4 - H5 + H6
+
+                "psllw $2, %%mm2                        \n\t" // 4L5 - 4L6
+                "psllw $2, %%mm3                        \n\t" // 4H5 - 4H6
+                "psubw %%mm2, %%mm0                     \n\t" // 2L4 - 5L5 + 5L6
+                "psubw %%mm3, %%mm1                     \n\t" // 2H4 - 5H5 + 5H6
+
+                "movq (%0, %1, 4), %%mm2                \n\t"
+                "movq %%mm2, %%mm3                      \n\t"
+                "punpcklbw %%mm7, %%mm2                 \n\t" // L7
+                "punpckhbw %%mm7, %%mm3                 \n\t" // H7
+
+                "paddw %%mm2, %%mm2                     \n\t" // 2L7
+                "paddw %%mm3, %%mm3                     \n\t" // 2H7
+                "psubw %%mm2, %%mm0                     \n\t" // 2L4 - 5L5 + 5L6 - 2L7
+                "psubw %%mm3, %%mm1                     \n\t" // 2H4 - 5H5 + 5H6 - 2H7
+
+                "movq (%%"REG_c"), %%mm2                \n\t" // 2L0 - 5L1 + 5L2 - 2L3
+                "movq 8(%%"REG_c"), %%mm3               \n\t" // 2H0 - 5H1 + 5H2 - 2H3
 
 #ifdef HAVE_MMX2
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"psubw %%mm0, %%mm6				\n\t"
-		"pmaxsw %%mm6, %%mm0				\n\t" // |2L4 - 5L5 + 5L6 - 2L7|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"psubw %%mm1, %%mm6				\n\t"
-		"pmaxsw %%mm6, %%mm1				\n\t" // |2H4 - 5H5 + 5H6 - 2H7|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"psubw %%mm2, %%mm6				\n\t"
-		"pmaxsw %%mm6, %%mm2				\n\t" // |2L0 - 5L1 + 5L2 - 2L3|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"psubw %%mm3, %%mm6				\n\t"
-		"pmaxsw %%mm6, %%mm3				\n\t" // |2H0 - 5H1 + 5H2 - 2H3|
+                "movq %%mm7, %%mm6                      \n\t" // 0
+                "psubw %%mm0, %%mm6                     \n\t"
+                "pmaxsw %%mm6, %%mm0                    \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
+                "movq %%mm7, %%mm6                      \n\t" // 0
+                "psubw %%mm1, %%mm6                     \n\t"
+                "pmaxsw %%mm6, %%mm1                    \n\t" // |2H4 - 5H5 + 5H6 - 2H7|
+                "movq %%mm7, %%mm6                      \n\t" // 0
+                "psubw %%mm2, %%mm6                     \n\t"
+                "pmaxsw %%mm6, %%mm2                    \n\t" // |2L0 - 5L1 + 5L2 - 2L3|
+                "movq %%mm7, %%mm6                      \n\t" // 0
+                "psubw %%mm3, %%mm6                     \n\t"
+                "pmaxsw %%mm6, %%mm3                    \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
 #else
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm0, %%mm6				\n\t"
-		"pxor %%mm6, %%mm0				\n\t"
-		"psubw %%mm6, %%mm0				\n\t" // |2L4 - 5L5 + 5L6 - 2L7|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm1, %%mm6				\n\t"
-		"pxor %%mm6, %%mm1				\n\t"
-		"psubw %%mm6, %%mm1				\n\t" // |2H4 - 5H5 + 5H6 - 2H7|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm2, %%mm6				\n\t"
-		"pxor %%mm6, %%mm2				\n\t"
-		"psubw %%mm6, %%mm2				\n\t" // |2L0 - 5L1 + 5L2 - 2L3|
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm3, %%mm6				\n\t"
-		"pxor %%mm6, %%mm3				\n\t"
-		"psubw %%mm6, %%mm3				\n\t" // |2H0 - 5H1 + 5H2 - 2H3|
+                "movq %%mm7, %%mm6                      \n\t" // 0
+                "pcmpgtw %%mm0, %%mm6                   \n\t"
+                "pxor %%mm6, %%mm0                      \n\t"
+                "psubw %%mm6, %%mm0                     \n\t" // |2L4 - 5L5 + 5L6 - 2L7|
+                "movq %%mm7, %%mm6                      \n\t" // 0
+                "pcmpgtw %%mm1, %%mm6                   \n\t"
+                "pxor %%mm6, %%mm1                      \n\t"
+                "psubw %%mm6, %%mm1                     \n\t" // |2H4 - 5H5 + 5H6 - 2H7|
+                "movq %%mm7, %%mm6                      \n\t" // 0
+                "pcmpgtw %%mm2, %%mm6                   \n\t"
+                "pxor %%mm6, %%mm2                      \n\t"
+                "psubw %%mm6, %%mm2                     \n\t" // |2L0 - 5L1 + 5L2 - 2L3|
+                "movq %%mm7, %%mm6                      \n\t" // 0
+                "pcmpgtw %%mm3, %%mm6                   \n\t"
+                "pxor %%mm6, %%mm3                      \n\t"
+                "psubw %%mm6, %%mm3                     \n\t" // |2H0 - 5H1 + 5H2 - 2H3|
 #endif
 
 #ifdef HAVE_MMX2
-		"pminsw %%mm2, %%mm0				\n\t"
-		"pminsw %%mm3, %%mm1				\n\t"
+                "pminsw %%mm2, %%mm0                    \n\t"
+                "pminsw %%mm3, %%mm1                    \n\t"
 #else
-		"movq %%mm0, %%mm6				\n\t"
-		"psubusw %%mm2, %%mm6				\n\t"
-		"psubw %%mm6, %%mm0				\n\t"
-		"movq %%mm1, %%mm6				\n\t"
-		"psubusw %%mm3, %%mm6				\n\t"
-		"psubw %%mm6, %%mm1				\n\t"
+                "movq %%mm0, %%mm6                      \n\t"
+                "psubusw %%mm2, %%mm6                   \n\t"
+                "psubw %%mm6, %%mm0                     \n\t"
+                "movq %%mm1, %%mm6                      \n\t"
+                "psubusw %%mm3, %%mm6                   \n\t"
+                "psubw %%mm6, %%mm1                     \n\t"
 #endif
 
-		"movd %2, %%mm2					\n\t" // QP
-		"punpcklbw %%mm7, %%mm2				\n\t"
+                "movd %2, %%mm2                         \n\t" // QP
+                "punpcklbw %%mm7, %%mm2                 \n\t"
 
-		"movq %%mm7, %%mm6				\n\t" // 0
-		"pcmpgtw %%mm4, %%mm6				\n\t" // sign(2L2 - 5L3 + 5L4 - 2L5)
-		"pxor %%mm6, %%mm4				\n\t"
-		"psubw %%mm6, %%mm4				\n\t" // |2L2 - 5L3 + 5L4 - 2L5|
-		"pcmpgtw %%mm5, %%mm7				\n\t" // sign(2H2 - 5H3 + 5H4 - 2H5)
-		"pxor %%mm7, %%mm5				\n\t"
-		"psubw %%mm7, %%mm5				\n\t" // |2H2 - 5H3 + 5H4 - 2H5|
+                "movq %%mm7, %%mm6                      \n\t" // 0
+                "pcmpgtw %%mm4, %%mm6                   \n\t" // sign(2L2 - 5L3 + 5L4 - 2L5)
+                "pxor %%mm6, %%mm4                      \n\t"
+                "psubw %%mm6, %%mm4                     \n\t" // |2L2 - 5L3 + 5L4 - 2L5|
+                "pcmpgtw %%mm5, %%mm7                   \n\t" // sign(2H2 - 5H3 + 5H4 - 2H5)
+                "pxor %%mm7, %%mm5                      \n\t"
+                "psubw %%mm7, %%mm5                     \n\t" // |2H2 - 5H3 + 5H4 - 2H5|
 // 100 opcodes
-		"psllw $3, %%mm2				\n\t" // 8QP
-		"movq %%mm2, %%mm3				\n\t" // 8QP
-		"pcmpgtw %%mm4, %%mm2				\n\t"
-		"pcmpgtw %%mm5, %%mm3				\n\t"
-		"pand %%mm2, %%mm4				\n\t"
-		"pand %%mm3, %%mm5				\n\t"
-
-
-		"psubusw %%mm0, %%mm4				\n\t" // hd
-		"psubusw %%mm1, %%mm5				\n\t" // ld
-
-
-		"movq "MANGLE(w05)", %%mm2			\n\t" // 5
-		"pmullw %%mm2, %%mm4				\n\t"
-		"pmullw %%mm2, %%mm5				\n\t"
-		"movq "MANGLE(w20)", %%mm2			\n\t" // 32
-		"paddw %%mm2, %%mm4				\n\t"
-		"paddw %%mm2, %%mm5				\n\t"
-		"psrlw $6, %%mm4				\n\t"
-		"psrlw $6, %%mm5				\n\t"
-
-		"movq 16(%%"REG_c"), %%mm0			\n\t" // L3 - L4
-		"movq 24(%%"REG_c"), %%mm1			\n\t" // H3 - H4
-
-		"pxor %%mm2, %%mm2				\n\t"
-		"pxor %%mm3, %%mm3				\n\t"
-
-		"pcmpgtw %%mm0, %%mm2				\n\t" // sign (L3-L4)
-		"pcmpgtw %%mm1, %%mm3				\n\t" // sign (H3-H4)
-		"pxor %%mm2, %%mm0				\n\t"
-		"pxor %%mm3, %%mm1				\n\t"
-		"psubw %%mm2, %%mm0				\n\t" // |L3-L4|
-		"psubw %%mm3, %%mm1				\n\t" // |H3-H4|
-		"psrlw $1, %%mm0				\n\t" // |L3 - L4|/2
-		"psrlw $1, %%mm1				\n\t" // |H3 - H4|/2
-
-		"pxor %%mm6, %%mm2				\n\t"
-		"pxor %%mm7, %%mm3				\n\t"
-		"pand %%mm2, %%mm4				\n\t"
-		"pand %%mm3, %%mm5				\n\t"
+                "psllw $3, %%mm2                        \n\t" // 8QP
+                "movq %%mm2, %%mm3                      \n\t" // 8QP
+                "pcmpgtw %%mm4, %%mm2                   \n\t"
+                "pcmpgtw %%mm5, %%mm3                   \n\t"
+                "pand %%mm2, %%mm4                      \n\t"
+                "pand %%mm3, %%mm5                      \n\t"
+
+
+                "psubusw %%mm0, %%mm4                   \n\t" // hd
+                "psubusw %%mm1, %%mm5                   \n\t" // ld
+
+
+                "movq "MANGLE(w05)", %%mm2              \n\t" // 5
+                "pmullw %%mm2, %%mm4                    \n\t"
+                "pmullw %%mm2, %%mm5                    \n\t"
+                "movq "MANGLE(w20)", %%mm2              \n\t" // 32
+                "paddw %%mm2, %%mm4                     \n\t"
+                "paddw %%mm2, %%mm5                     \n\t"
+                "psrlw $6, %%mm4                        \n\t"
+                "psrlw $6, %%mm5                        \n\t"
+
+                "movq 16(%%"REG_c"), %%mm0              \n\t" // L3 - L4
+                "movq 24(%%"REG_c"), %%mm1              \n\t" // H3 - H4
+
+                "pxor %%mm2, %%mm2                      \n\t"
+                "pxor %%mm3, %%mm3                      \n\t"
+
+                "pcmpgtw %%mm0, %%mm2                   \n\t" // sign (L3-L4)
+                "pcmpgtw %%mm1, %%mm3                   \n\t" // sign (H3-H4)
+                "pxor %%mm2, %%mm0                      \n\t"
+                "pxor %%mm3, %%mm1                      \n\t"
+                "psubw %%mm2, %%mm0                     \n\t" // |L3-L4|
+                "psubw %%mm3, %%mm1                     \n\t" // |H3-H4|
+                "psrlw $1, %%mm0                        \n\t" // |L3 - L4|/2
+                "psrlw $1, %%mm1                        \n\t" // |H3 - H4|/2
+
+                "pxor %%mm6, %%mm2                      \n\t"
+                "pxor %%mm7, %%mm3                      \n\t"
+                "pand %%mm2, %%mm4                      \n\t"
+                "pand %%mm3, %%mm5                      \n\t"
 
 #ifdef HAVE_MMX2
-		"pminsw %%mm0, %%mm4				\n\t"
-		"pminsw %%mm1, %%mm5				\n\t"
+                "pminsw %%mm0, %%mm4                    \n\t"
+                "pminsw %%mm1, %%mm5                    \n\t"
 #else
-		"movq %%mm4, %%mm2				\n\t"
-		"psubusw %%mm0, %%mm2				\n\t"
-		"psubw %%mm2, %%mm4				\n\t"
-		"movq %%mm5, %%mm2				\n\t"
-		"psubusw %%mm1, %%mm2				\n\t"
-		"psubw %%mm2, %%mm5				\n\t"
+                "movq %%mm4, %%mm2                      \n\t"
+                "psubusw %%mm0, %%mm2                   \n\t"
+                "psubw %%mm2, %%mm4                     \n\t"
+                "movq %%mm5, %%mm2                      \n\t"
+                "psubusw %%mm1, %%mm2                   \n\t"
+                "psubw %%mm2, %%mm5                     \n\t"
 #endif
-		"pxor %%mm6, %%mm4				\n\t"
-		"pxor %%mm7, %%mm5				\n\t"
-		"psubw %%mm6, %%mm4				\n\t"
-		"psubw %%mm7, %%mm5				\n\t"
-		"packsswb %%mm5, %%mm4				\n\t"
-		"movq %3, %%mm1					\n\t"
-		"pandn %%mm4, %%mm1				\n\t"
-		"movq (%0), %%mm0				\n\t"
-		"paddb   %%mm1, %%mm0				\n\t"
-		"movq %%mm0, (%0)				\n\t"
-		"movq (%0, %1), %%mm0				\n\t"
-		"psubb %%mm1, %%mm0				\n\t"
-		"movq %%mm0, (%0, %1)				\n\t"
-
-		: "+r" (temp_src)
-		: "r" ((long)step), "m" (c->pQPb), "m"(eq_mask)
-		: "%"REG_a, "%"REG_c
-		);
-	}
+                "pxor %%mm6, %%mm4                      \n\t"
+                "pxor %%mm7, %%mm5                      \n\t"
+                "psubw %%mm6, %%mm4                     \n\t"
+                "psubw %%mm7, %%mm5                     \n\t"
+                "packsswb %%mm5, %%mm4                  \n\t"
+                "movq %3, %%mm1                         \n\t"
+                "pandn %%mm4, %%mm1                     \n\t"
+                "movq (%0), %%mm0                       \n\t"
+                "paddb   %%mm1, %%mm0                   \n\t"
+                "movq %%mm0, (%0)                       \n\t"
+                "movq (%0, %1), %%mm0                   \n\t"
+                "psubb %%mm1, %%mm0                     \n\t"
+                "movq %%mm0, (%0, %1)                   \n\t"
+
+                : "+r" (temp_src)
+                : "r" ((long)step), "m" (c->pQPb), "m"(eq_mask)
+                : "%"REG_a, "%"REG_c
+                );
+        }
 /*if(step==16){
     STOP_TIMER("step16")
 }else{
@@ -3180,7 +3180,7 @@ asm volatile(
 #endif //HAVE_MMX
 
 static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
-	QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c);
+        QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c);
 
 /**
  * Copies a block from src to dst and fixes the blacklevel
@@ -3189,131 +3189,131 @@ static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int
 #undef SCALED_CPY
 
 static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, uint8_t src[], int srcStride,
-	int levelFix, int64_t *packedOffsetAndScale)
+        int levelFix, int64_t *packedOffsetAndScale)
 {
 #ifndef HAVE_MMX
-	int i;
+        int i;
 #endif
-	if(levelFix)
-	{
+        if(levelFix)
+        {
 #ifdef HAVE_MMX
-					asm volatile(
-						"movq (%%"REG_a"), %%mm2	\n\t" // packedYOffset
-						"movq 8(%%"REG_a"), %%mm3	\n\t" // packedYScale
-						"lea (%2,%4), %%"REG_a"	\n\t"
-						"lea (%3,%5), %%"REG_d"	\n\t"
-						"pxor %%mm4, %%mm4	\n\t"
+                asm volatile(
+                        "movq (%%"REG_a"), %%mm2        \n\t" // packedYOffset
+                        "movq 8(%%"REG_a"), %%mm3       \n\t" // packedYScale
+                        "lea (%2,%4), %%"REG_a"         \n\t"
+                        "lea (%3,%5), %%"REG_d"         \n\t"
+                        "pxor %%mm4, %%mm4              \n\t"
 #ifdef HAVE_MMX2
-#define REAL_SCALED_CPY(src1, src2, dst1, dst2)					\
-						"movq " #src1 ", %%mm0	\n\t"\
-						"movq " #src1 ", %%mm5	\n\t"\
-						"movq " #src2 ", %%mm1	\n\t"\
-						"movq " #src2 ", %%mm6	\n\t"\
-						"punpcklbw %%mm0, %%mm0 \n\t"\
-						"punpckhbw %%mm5, %%mm5 \n\t"\
-						"punpcklbw %%mm1, %%mm1 \n\t"\
-						"punpckhbw %%mm6, %%mm6 \n\t"\
-						"pmulhuw %%mm3, %%mm0	\n\t"\
-						"pmulhuw %%mm3, %%mm5	\n\t"\
-						"pmulhuw %%mm3, %%mm1	\n\t"\
-						"pmulhuw %%mm3, %%mm6	\n\t"\
-						"psubw %%mm2, %%mm0	\n\t"\
-						"psubw %%mm2, %%mm5	\n\t"\
-						"psubw %%mm2, %%mm1	\n\t"\
-						"psubw %%mm2, %%mm6	\n\t"\
-						"packuswb %%mm5, %%mm0	\n\t"\
-						"packuswb %%mm6, %%mm1	\n\t"\
-						"movq %%mm0, " #dst1 "	\n\t"\
-						"movq %%mm1, " #dst2 "	\n\t"\
+#define REAL_SCALED_CPY(src1, src2, dst1, dst2)                                                \
+                        "movq " #src1 ", %%mm0          \n\t"\
+                        "movq " #src1 ", %%mm5          \n\t"\
+                        "movq " #src2 ", %%mm1          \n\t"\
+                        "movq " #src2 ", %%mm6          \n\t"\
+                        "punpcklbw %%mm0, %%mm0         \n\t"\
+                        "punpckhbw %%mm5, %%mm5         \n\t"\
+                        "punpcklbw %%mm1, %%mm1         \n\t"\
+                        "punpckhbw %%mm6, %%mm6         \n\t"\
+                        "pmulhuw %%mm3, %%mm0           \n\t"\
+                        "pmulhuw %%mm3, %%mm5           \n\t"\
+                        "pmulhuw %%mm3, %%mm1           \n\t"\
+                        "pmulhuw %%mm3, %%mm6           \n\t"\
+                        "psubw %%mm2, %%mm0             \n\t"\
+                        "psubw %%mm2, %%mm5             \n\t"\
+                        "psubw %%mm2, %%mm1             \n\t"\
+                        "psubw %%mm2, %%mm6             \n\t"\
+                        "packuswb %%mm5, %%mm0          \n\t"\
+                        "packuswb %%mm6, %%mm1          \n\t"\
+                        "movq %%mm0, " #dst1 "          \n\t"\
+                        "movq %%mm1, " #dst2 "          \n\t"\
 
 #else //HAVE_MMX2
-#define REAL_SCALED_CPY(src1, src2, dst1, dst2)					\
-						"movq " #src1 ", %%mm0	\n\t"\
-						"movq " #src1 ", %%mm5	\n\t"\
-						"punpcklbw %%mm4, %%mm0 \n\t"\
-						"punpckhbw %%mm4, %%mm5 \n\t"\
-						"psubw %%mm2, %%mm0	\n\t"\
-						"psubw %%mm2, %%mm5	\n\t"\
-						"movq " #src2 ", %%mm1	\n\t"\
-						"psllw $6, %%mm0	\n\t"\
-						"psllw $6, %%mm5	\n\t"\
-						"pmulhw %%mm3, %%mm0	\n\t"\
-						"movq " #src2 ", %%mm6	\n\t"\
-						"pmulhw %%mm3, %%mm5	\n\t"\
-						"punpcklbw %%mm4, %%mm1 \n\t"\
-						"punpckhbw %%mm4, %%mm6 \n\t"\
-						"psubw %%mm2, %%mm1	\n\t"\
-						"psubw %%mm2, %%mm6	\n\t"\
-						"psllw $6, %%mm1	\n\t"\
-						"psllw $6, %%mm6	\n\t"\
-						"pmulhw %%mm3, %%mm1	\n\t"\
-						"pmulhw %%mm3, %%mm6	\n\t"\
-						"packuswb %%mm5, %%mm0	\n\t"\
-						"packuswb %%mm6, %%mm1	\n\t"\
-						"movq %%mm0, " #dst1 "	\n\t"\
-						"movq %%mm1, " #dst2 "	\n\t"\
-
-#endif //!HAVE_MMX2
+#define REAL_SCALED_CPY(src1, src2, dst1, dst2)                                        \
+                        "movq " #src1 ", %%mm0          \n\t"\
+                        "movq " #src1 ", %%mm5          \n\t"\
+                        "punpcklbw %%mm4, %%mm0         \n\t"\
+                        "punpckhbw %%mm4, %%mm5         \n\t"\
+                        "psubw %%mm2, %%mm0             \n\t"\
+                        "psubw %%mm2, %%mm5             \n\t"\
+                        "movq " #src2 ", %%mm1          \n\t"\
+                        "psllw $6, %%mm0                \n\t"\
+                        "psllw $6, %%mm5                \n\t"\
+                        "pmulhw %%mm3, %%mm0            \n\t"\
+                        "movq " #src2 ", %%mm6          \n\t"\
+                        "pmulhw %%mm3, %%mm5            \n\t"\
+                        "punpcklbw %%mm4, %%mm1         \n\t"\
+                        "punpckhbw %%mm4, %%mm6         \n\t"\
+                        "psubw %%mm2, %%mm1             \n\t"\
+                        "psubw %%mm2, %%mm6             \n\t"\
+                        "psllw $6, %%mm1                \n\t"\
+                        "psllw $6, %%mm6                \n\t"\
+                        "pmulhw %%mm3, %%mm1            \n\t"\
+                        "pmulhw %%mm3, %%mm6            \n\t"\
+                        "packuswb %%mm5, %%mm0          \n\t"\
+                        "packuswb %%mm6, %%mm1          \n\t"\
+                        "movq %%mm0, " #dst1 "          \n\t"\
+                        "movq %%mm1, " #dst2 "          \n\t"\
+
+#endif //HAVE_MMX2
 #define SCALED_CPY(src1, src2, dst1, dst2)\
    REAL_SCALED_CPY(src1, src2, dst1, dst2)
 
 SCALED_CPY((%2)       , (%2, %4)      , (%3)       , (%3, %5))
 SCALED_CPY((%2, %4, 2), (%%REGa, %4, 2), (%3, %5, 2), (%%REGd, %5, 2))
 SCALED_CPY((%2, %4, 4), (%%REGa, %4, 4), (%3, %5, 4), (%%REGd, %5, 4))
-						"lea (%%"REG_a",%4,4), %%"REG_a"	\n\t"
-						"lea (%%"REG_d",%5,4), %%"REG_d"	\n\t"
+                        "lea (%%"REG_a",%4,4), %%"REG_a"        \n\t"
+                        "lea (%%"REG_d",%5,4), %%"REG_d"        \n\t"
 SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
 
 
-						: "=&a" (packedOffsetAndScale)
-						: "0" (packedOffsetAndScale),
-						"r"(src),
-						"r"(dst),
-						"r" ((long)srcStride),
-						"r" ((long)dstStride)
-						: "%"REG_d
-					);
-#else
-				for(i=0; i<8; i++)
-					memcpy(	&(dst[dstStride*i]),
-						&(src[srcStride*i]), BLOCK_SIZE);
-#endif
-	}
-	else
-	{
+                        : "=&a" (packedOffsetAndScale)
+                        : "0" (packedOffsetAndScale),
+                        "r"(src),
+                        "r"(dst),
+                        "r" ((long)srcStride),
+                        "r" ((long)dstStride)
+                        : "%"REG_d
+                                        );
+#else //HAVE_MMX
+        for(i=0; i<8; i++)
+                memcpy( &(dst[dstStride*i]),
+                        &(src[srcStride*i]), BLOCK_SIZE);
+#endif //HAVE_MMX
+        }
+        else
+        {
 #ifdef HAVE_MMX
-					asm volatile(
-						"lea (%0,%2), %%"REG_a"	\n\t"
-						"lea (%1,%3), %%"REG_d"	\n\t"
+        asm volatile(
+                "lea (%0,%2), %%"REG_a"                 \n\t"
+                "lea (%1,%3), %%"REG_d"                 \n\t"
 
-#define REAL_SIMPLE_CPY(src1, src2, dst1, dst2)				\
-						"movq " #src1 ", %%mm0	\n\t"\
-						"movq " #src2 ", %%mm1	\n\t"\
-						"movq %%mm0, " #dst1 "	\n\t"\
-						"movq %%mm1, " #dst2 "	\n\t"\
+#define REAL_SIMPLE_CPY(src1, src2, dst1, dst2)                              \
+                "movq " #src1 ", %%mm0          \n\t"\
+                "movq " #src2 ", %%mm1          \n\t"\
+                "movq %%mm0, " #dst1 "          \n\t"\
+                "movq %%mm1, " #dst2 "          \n\t"\
 
 #define SIMPLE_CPY(src1, src2, dst1, dst2)\
    REAL_SIMPLE_CPY(src1, src2, dst1, dst2)
 
-SIMPLE_CPY((%0)       , (%0, %2)      , (%1)       , (%1, %3))
+SIMPLE_CPY((%0)       , (%0, %2)       , (%1)       , (%1, %3))
 SIMPLE_CPY((%0, %2, 2), (%%REGa, %2, 2), (%1, %3, 2), (%%REGd, %3, 2))
 SIMPLE_CPY((%0, %2, 4), (%%REGa, %2, 4), (%1, %3, 4), (%%REGd, %3, 4))
-						"lea (%%"REG_a",%2,4), %%"REG_a"	\n\t"
-						"lea (%%"REG_d",%3,4), %%"REG_d"	\n\t"
+                "lea (%%"REG_a",%2,4), %%"REG_a"        \n\t"
+                "lea (%%"REG_d",%3,4), %%"REG_d"        \n\t"
 SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
 
-						: : "r" (src),
-						"r" (dst),
-						"r" ((long)srcStride),
-						"r" ((long)dstStride)
-						: "%"REG_a, "%"REG_d
-					);
-#else
-				for(i=0; i<8; i++)
-					memcpy(	&(dst[dstStride*i]),
-						&(src[srcStride*i]), BLOCK_SIZE);
-#endif
-	}
+                : : "r" (src),
+                "r" (dst),
+                "r" ((long)srcStride),
+                "r" ((long)dstStride)
+                : "%"REG_a, "%"REG_d
+        );
+#else //HAVE_MMX
+        for(i=0; i<8; i++)
+                memcpy( &(dst[dstStride*i]),
+                        &(src[srcStride*i]), BLOCK_SIZE);
+#endif //HAVE_MMX
+        }
 }
 
 /**
@@ -3322,23 +3322,23 @@ SIMPLE_CPY((%%REGa, %2), (%%REGa, %2, 2), (%%REGd, %3), (%%REGd, %3, 2))
 static inline void RENAME(duplicate)(uint8_t src[], int stride)
 {
 #ifdef HAVE_MMX
-	asm volatile(
-		"movq (%0), %%mm0		\n\t"
-		"add %1, %0			\n\t"
-		"movq %%mm0, (%0)		\n\t"
-		"movq %%mm0, (%0, %1)		\n\t"
-		"movq %%mm0, (%0, %1, 2)	\n\t"
-		: "+r" (src)
-		: "r" ((long)-stride)
-	);
+        asm volatile(
+                "movq (%0), %%mm0               \n\t"
+                "add %1, %0                     \n\t"
+                "movq %%mm0, (%0)               \n\t"
+                "movq %%mm0, (%0, %1)           \n\t"
+                "movq %%mm0, (%0, %1, 2)        \n\t"
+                : "+r" (src)
+                : "r" ((long)-stride)
+        );
 #else
-	int i;
-	uint8_t *p=src;
-	for(i=0; i<3; i++)
-	{
-		p-= stride;
-		memcpy(p, src, 8);
-	}
+        int i;
+        uint8_t *p=src;
+        for(i=0; i<3; i++)
+        {
+                p-= stride;
+                memcpy(p, src, 8);
+        }
 #endif
 }
 
@@ -3346,502 +3346,502 @@ static inline void RENAME(duplicate)(uint8_t src[], int stride)
  * Filters array of bytes (Y or U or V values)
  */
 static void RENAME(postProcess)(uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
-	QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2)
+        QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2)
 {
-	PPContext __attribute__((aligned(8))) c= *c2; //copy to stack for faster access
-	int x,y;
+        PPContext __attribute__((aligned(8))) c= *c2; //copy to stack for faster access
+        int x,y;
 #ifdef COMPILE_TIME_MODE
-	const int mode= COMPILE_TIME_MODE;
+        const int mode= COMPILE_TIME_MODE;
 #else
-	const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode;
+        const int mode= isColor ? c.ppMode.chromMode : c.ppMode.lumMode;
 #endif
-	int black=0, white=255; // blackest black and whitest white in the picture
-	int QPCorrecture= 256*256;
+        int black=0, white=255; // blackest black and whitest white in the picture
+        int QPCorrecture= 256*256;
 
-	int copyAhead;
+        int copyAhead;
 #ifdef HAVE_MMX
-	int i;
+        int i;
 #endif
 
-	const int qpHShift= isColor ? 4-c.hChromaSubSample : 4;
-	const int qpVShift= isColor ? 4-c.vChromaSubSample : 4;
+        const int qpHShift= isColor ? 4-c.hChromaSubSample : 4;
+        const int qpVShift= isColor ? 4-c.vChromaSubSample : 4;
 
-	//FIXME remove
-	uint64_t * const yHistogram= c.yHistogram;
-	uint8_t * const tempSrc= srcStride > 0 ? c.tempSrc : c.tempSrc - 23*srcStride;
-	uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride;
-	//const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4;
+        //FIXME remove
+        uint64_t * const yHistogram= c.yHistogram;
+        uint8_t * const tempSrc= srcStride > 0 ? c.tempSrc : c.tempSrc - 23*srcStride;
+        uint8_t * const tempDst= dstStride > 0 ? c.tempDst : c.tempDst - 23*dstStride;
+        //const int mbWidth= isColor ? (width+7)>>3 : (width+15)>>4;
 
 #ifdef HAVE_MMX
-	for(i=0; i<57; i++){
-		int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
-		int threshold= offset*2 + 1;
-		c.mmxDcOffset[i]= 0x7F - offset;
-		c.mmxDcThreshold[i]= 0x7F - threshold;
-		c.mmxDcOffset[i]*= 0x0101010101010101LL;
-		c.mmxDcThreshold[i]*= 0x0101010101010101LL;
-	}
+        for(i=0; i<57; i++){
+                int offset= ((i*c.ppMode.baseDcDiff)>>8) + 1;
+                int threshold= offset*2 + 1;
+                c.mmxDcOffset[i]= 0x7F - offset;
+                c.mmxDcThreshold[i]= 0x7F - threshold;
+                c.mmxDcOffset[i]*= 0x0101010101010101LL;
+                c.mmxDcThreshold[i]*= 0x0101010101010101LL;
+        }
 #endif
 
-	if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16;
-	else if(   (mode & LINEAR_BLEND_DEINT_FILTER)
-		|| (mode & FFMPEG_DEINT_FILTER)
-		|| (mode & LOWPASS5_DEINT_FILTER)) copyAhead=14;
-	else if(   (mode & V_DEBLOCK)
-		|| (mode & LINEAR_IPOL_DEINT_FILTER)
-		|| (mode & MEDIAN_DEINT_FILTER)
-		|| (mode & V_A_DEBLOCK)) copyAhead=13;
-	else if(mode & V_X1_FILTER) copyAhead=11;
-//	else if(mode & V_RK1_FILTER) copyAhead=10;
-	else if(mode & DERING) copyAhead=9;
-	else copyAhead=8;
-
-	copyAhead-= 8;
-
-	if(!isColor)
-	{
-		uint64_t sum= 0;
-		int i;
-		uint64_t maxClipped;
-		uint64_t clipped;
-		double scale;
-
-		c.frameNum++;
-		// first frame is fscked so we ignore it
-		if(c.frameNum == 1) yHistogram[0]= width*height/64*15/256;
-
-		for(i=0; i<256; i++)
-		{
-			sum+= yHistogram[i];
-//			printf("%d ", yHistogram[i]);
-		}
-//		printf("\n\n");
-
-		/* we allways get a completly black picture first */
-		maxClipped= (uint64_t)(sum * c.ppMode.maxClippedThreshold);
-
-		clipped= sum;
-		for(black=255; black>0; black--)
-		{
-			if(clipped < maxClipped) break;
-			clipped-= yHistogram[black];
-		}
-
-		clipped= sum;
-		for(white=0; white<256; white++)
-		{
-			if(clipped < maxClipped) break;
-			clipped-= yHistogram[white];
-		}
-
-		scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black);
+        if(mode & CUBIC_IPOL_DEINT_FILTER) copyAhead=16;
+        else if(   (mode & LINEAR_BLEND_DEINT_FILTER)
+                || (mode & FFMPEG_DEINT_FILTER)
+                || (mode & LOWPASS5_DEINT_FILTER)) copyAhead=14;
+        else if(   (mode & V_DEBLOCK)
+                || (mode & LINEAR_IPOL_DEINT_FILTER)
+                || (mode & MEDIAN_DEINT_FILTER)
+                || (mode & V_A_DEBLOCK)) copyAhead=13;
+        else if(mode & V_X1_FILTER) copyAhead=11;
+//        else if(mode & V_RK1_FILTER) copyAhead=10;
+        else if(mode & DERING) copyAhead=9;
+        else copyAhead=8;
+
+        copyAhead-= 8;
+
+        if(!isColor)
+        {
+                uint64_t sum= 0;
+                int i;
+                uint64_t maxClipped;
+                uint64_t clipped;
+                double scale;
+
+                c.frameNum++;
+                // first frame is fscked so we ignore it
+                if(c.frameNum == 1) yHistogram[0]= width*height/64*15/256;
+
+                for(i=0; i<256; i++)
+                {
+                        sum+= yHistogram[i];
+//                        printf("%d ", yHistogram[i]);
+                }
+//                printf("\n\n");
+
+                /* we allways get a completly black picture first */
+                maxClipped= (uint64_t)(sum * c.ppMode.maxClippedThreshold);
+
+                clipped= sum;
+                for(black=255; black>0; black--)
+                {
+                        if(clipped < maxClipped) break;
+                        clipped-= yHistogram[black];
+                }
+
+                clipped= sum;
+                for(white=0; white<256; white++)
+                {
+                        if(clipped < maxClipped) break;
+                        clipped-= yHistogram[white];
+                }
+
+                scale= (double)(c.ppMode.maxAllowedY - c.ppMode.minAllowedY) / (double)(white-black);
 
 #ifdef HAVE_MMX2
-		c.packedYScale= (uint16_t)(scale*256.0 + 0.5);
-		c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF;
+                c.packedYScale= (uint16_t)(scale*256.0 + 0.5);
+                c.packedYOffset= (((black*c.packedYScale)>>8) - c.ppMode.minAllowedY) & 0xFFFF;
 #else
-		c.packedYScale= (uint16_t)(scale*1024.0 + 0.5);
-		c.packedYOffset= (black - c.ppMode.minAllowedY) & 0xFFFF;
+                c.packedYScale= (uint16_t)(scale*1024.0 + 0.5);
+                c.packedYOffset= (black - c.ppMode.minAllowedY) & 0xFFFF;
 #endif
 
-		c.packedYOffset|= c.packedYOffset<<32;
-		c.packedYOffset|= c.packedYOffset<<16;
-
-		c.packedYScale|= c.packedYScale<<32;
-		c.packedYScale|= c.packedYScale<<16;
-		
-		if(mode & LEVEL_FIX)	QPCorrecture= (int)(scale*256*256 + 0.5);
-		else			QPCorrecture= 256*256;
-	}
-	else
-	{
-		c.packedYScale= 0x0100010001000100LL;
-		c.packedYOffset= 0;
-		QPCorrecture= 256*256;
-	}
-
-	/* copy & deinterlace first row of blocks */
-	y=-BLOCK_SIZE;
-	{
-		uint8_t *srcBlock= &(src[y*srcStride]);
-		uint8_t *dstBlock= tempDst + dstStride;
-
-		// From this point on it is guranteed that we can read and write 16 lines downward
-		// finish 1 block before the next otherwise we might have a problem
-		// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
-		for(x=0; x<width; x+=BLOCK_SIZE)
-		{
+                c.packedYOffset|= c.packedYOffset<<32;
+                c.packedYOffset|= c.packedYOffset<<16;
+
+                c.packedYScale|= c.packedYScale<<32;
+                c.packedYScale|= c.packedYScale<<16;
+
+                if(mode & LEVEL_FIX)        QPCorrecture= (int)(scale*256*256 + 0.5);
+                else                        QPCorrecture= 256*256;
+        }
+        else
+        {
+                c.packedYScale= 0x0100010001000100LL;
+                c.packedYOffset= 0;
+                QPCorrecture= 256*256;
+        }
+
+        /* copy & deinterlace first row of blocks */
+        y=-BLOCK_SIZE;
+        {
+                uint8_t *srcBlock= &(src[y*srcStride]);
+                uint8_t *dstBlock= tempDst + dstStride;
+
+                // From this point on it is guranteed that we can read and write 16 lines downward
+                // finish 1 block before the next otherwise we might have a problem
+                // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
+                for(x=0; x<width; x+=BLOCK_SIZE)
+                {
 
 #ifdef HAVE_MMX2
 /*
-			prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
-			prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
-			prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
-			prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
+                        prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
+                        prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
+                        prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
+                        prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
 */
 
-			asm(
-				"mov %4, %%"REG_a"		\n\t"
-				"shr $2, %%"REG_a"		\n\t"
-				"and $6, %%"REG_a"		\n\t"
-				"add %5, %%"REG_a"		\n\t"
-				"mov %%"REG_a", %%"REG_d"	\n\t"
-				"imul %1, %%"REG_a"		\n\t"
-				"imul %3, %%"REG_d"		\n\t"
-				"prefetchnta 32(%%"REG_a", %0)	\n\t"
-				"prefetcht0 32(%%"REG_d", %2)	\n\t"
-				"add %1, %%"REG_a"		\n\t"
-				"add %3, %%"REG_d"		\n\t"
-				"prefetchnta 32(%%"REG_a", %0)	\n\t"
-				"prefetcht0 32(%%"REG_d", %2)	\n\t"
-			:: "r" (srcBlock), "r" ((long)srcStride), "r" (dstBlock), "r" ((long)dstStride),
-			"g" ((long)x), "g" ((long)copyAhead)
-			: "%"REG_a, "%"REG_d
-			);
+                        asm(
+                                "mov %4, %%"REG_a"              \n\t"
+                                "shr $2, %%"REG_a"              \n\t"
+                                "and $6, %%"REG_a"              \n\t"
+                                "add %5, %%"REG_a"              \n\t"
+                                "mov %%"REG_a", %%"REG_d"       \n\t"
+                                "imul %1, %%"REG_a"             \n\t"
+                                "imul %3, %%"REG_d"             \n\t"
+                                "prefetchnta 32(%%"REG_a", %0)  \n\t"
+                                "prefetcht0 32(%%"REG_d", %2)   \n\t"
+                                "add %1, %%"REG_a"              \n\t"
+                                "add %3, %%"REG_d"              \n\t"
+                                "prefetchnta 32(%%"REG_a", %0)  \n\t"
+                                "prefetcht0 32(%%"REG_d", %2)   \n\t"
+                        :: "r" (srcBlock), "r" ((long)srcStride), "r" (dstBlock), "r" ((long)dstStride),
+                        "g" ((long)x), "g" ((long)copyAhead)
+                        : "%"REG_a, "%"REG_d
+                        );
 
 #elif defined(HAVE_3DNOW)
 //FIXME check if this is faster on an 3dnow chip or if its faster without the prefetch or ...
-/*			prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
-			prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
-			prefetchw(dstBlock + (((x>>3)&3) + 5)*dstStride + 32);
-			prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32);
+/*                        prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
+                        prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
+                        prefetchw(dstBlock + (((x>>3)&3) + 5)*dstStride + 32);
+                        prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32);
 */
 #endif
 
-			RENAME(blockCopy)(dstBlock + dstStride*8, dstStride,
-				srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
-
-			RENAME(duplicate)(dstBlock + dstStride*8, dstStride);
-
-			if(mode & LINEAR_IPOL_DEINT_FILTER)
-				RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
-			else if(mode & LINEAR_BLEND_DEINT_FILTER)
-				RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
-			else if(mode & MEDIAN_DEINT_FILTER)
-				RENAME(deInterlaceMedian)(dstBlock, dstStride);
-			else if(mode & CUBIC_IPOL_DEINT_FILTER)
-				RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
-			else if(mode & FFMPEG_DEINT_FILTER)
-				RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
-			else if(mode & LOWPASS5_DEINT_FILTER)
-				RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
-/*			else if(mode & CUBIC_BLEND_DEINT_FILTER)
-				RENAME(deInterlaceBlendCubic)(dstBlock, dstStride);
+                        RENAME(blockCopy)(dstBlock + dstStride*8, dstStride,
+                                srcBlock + srcStride*8, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
+
+                        RENAME(duplicate)(dstBlock + dstStride*8, dstStride);
+
+                        if(mode & LINEAR_IPOL_DEINT_FILTER)
+                                RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
+                        else if(mode & LINEAR_BLEND_DEINT_FILTER)
+                                RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
+                        else if(mode & MEDIAN_DEINT_FILTER)
+                                RENAME(deInterlaceMedian)(dstBlock, dstStride);
+                        else if(mode & CUBIC_IPOL_DEINT_FILTER)
+                                RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
+                        else if(mode & FFMPEG_DEINT_FILTER)
+                                RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
+                        else if(mode & LOWPASS5_DEINT_FILTER)
+                                RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
+/*                        else if(mode & CUBIC_BLEND_DEINT_FILTER)
+                                RENAME(deInterlaceBlendCubic)(dstBlock, dstStride);
 */
-			dstBlock+=8;
-			srcBlock+=8;
-		}
-		if(width==ABS(dstStride))
-			linecpy(dst, tempDst + 9*dstStride, copyAhead, dstStride);
-		else
-		{
-			int i;
-			for(i=0; i<copyAhead; i++)
-			{
-				memcpy(dst + i*dstStride, tempDst + (9+i)*dstStride, width);
-			}
-		}
-	}
+                        dstBlock+=8;
+                        srcBlock+=8;
+                }
+                if(width==ABS(dstStride))
+                        linecpy(dst, tempDst + 9*dstStride, copyAhead, dstStride);
+                else
+                {
+                        int i;
+                        for(i=0; i<copyAhead; i++)
+                        {
+                                memcpy(dst + i*dstStride, tempDst + (9+i)*dstStride, width);
+                        }
+                }
+        }
 
 //printf("\n");
-	for(y=0; y<height; y+=BLOCK_SIZE)
-	{
-		//1% speedup if these are here instead of the inner loop
-		uint8_t *srcBlock= &(src[y*srcStride]);
-		uint8_t *dstBlock= &(dst[y*dstStride]);
+        for(y=0; y<height; y+=BLOCK_SIZE)
+        {
+                //1% speedup if these are here instead of the inner loop
+                uint8_t *srcBlock= &(src[y*srcStride]);
+                uint8_t *dstBlock= &(dst[y*dstStride]);
 #ifdef HAVE_MMX
-		uint8_t *tempBlock1= c.tempBlocks;
-		uint8_t *tempBlock2= c.tempBlocks + 8;
+                uint8_t *tempBlock1= c.tempBlocks;
+                uint8_t *tempBlock2= c.tempBlocks + 8;
 #endif
-		int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
-		int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*ABS(QPStride)];
-		int QP=0;
-		/* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards
-		   if not than use a temporary buffer */
-		if(y+15 >= height)
-		{
-			int i;
-			/* copy from line (copyAhead) to (copyAhead+7) of src, these will be copied with
-			   blockcopy to dst later */
-			linecpy(tempSrc + srcStride*copyAhead, srcBlock + srcStride*copyAhead,
-				MAX(height-y-copyAhead, 0), srcStride);
-
-			/* duplicate last line of src to fill the void upto line (copyAhead+7) */
-			for(i=MAX(height-y, 8); i<copyAhead+8; i++)
-				memcpy(tempSrc + srcStride*i, src + srcStride*(height-1), ABS(srcStride));
-
-			/* copy up to (copyAhead+1) lines of dst (line -1 to (copyAhead-1))*/
-			linecpy(tempDst, dstBlock - dstStride, MIN(height-y+1, copyAhead+1), dstStride);
-
-			/* duplicate last line of dst to fill the void upto line (copyAhead) */
-			for(i=height-y+1; i<=copyAhead; i++)
-				memcpy(tempDst + dstStride*i, dst + dstStride*(height-1), ABS(dstStride));
-
-			dstBlock= tempDst + dstStride;
-			srcBlock= tempSrc;
-		}
+                int8_t *QPptr= &QPs[(y>>qpVShift)*QPStride];
+                int8_t *nonBQPptr= &c.nonBQPTable[(y>>qpVShift)*ABS(QPStride)];
+                int QP=0;
+                /* can we mess with a 8x16 block from srcBlock/dstBlock downwards and 1 line upwards
+                   if not than use a temporary buffer */
+                if(y+15 >= height)
+                {
+                        int i;
+                        /* copy from line (copyAhead) to (copyAhead+7) of src, these will be copied with
+                           blockcopy to dst later */
+                        linecpy(tempSrc + srcStride*copyAhead, srcBlock + srcStride*copyAhead,
+                                MAX(height-y-copyAhead, 0), srcStride);
+
+                        /* duplicate last line of src to fill the void upto line (copyAhead+7) */
+                        for(i=MAX(height-y, 8); i<copyAhead+8; i++)
+                                memcpy(tempSrc + srcStride*i, src + srcStride*(height-1), ABS(srcStride));
+
+                        /* copy up to (copyAhead+1) lines of dst (line -1 to (copyAhead-1))*/
+                        linecpy(tempDst, dstBlock - dstStride, MIN(height-y+1, copyAhead+1), dstStride);
+
+                        /* duplicate last line of dst to fill the void upto line (copyAhead) */
+                        for(i=height-y+1; i<=copyAhead; i++)
+                                memcpy(tempDst + dstStride*i, dst + dstStride*(height-1), ABS(dstStride));
+
+                        dstBlock= tempDst + dstStride;
+                        srcBlock= tempSrc;
+                }
 //printf("\n");
 
-		// From this point on it is guranteed that we can read and write 16 lines downward
-		// finish 1 block before the next otherwise we might have a problem
-		// with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
-		for(x=0; x<width; x+=BLOCK_SIZE)
-		{
-			const int stride= dstStride;
+                // From this point on it is guranteed that we can read and write 16 lines downward
+                // finish 1 block before the next otherwise we might have a problem
+                // with the L1 Cache of the P4 ... or only a few blocks at a time or soemthing
+                for(x=0; x<width; x+=BLOCK_SIZE)
+                {
+                        const int stride= dstStride;
 #ifdef HAVE_MMX
-			uint8_t *tmpXchg;
+                        uint8_t *tmpXchg;
 #endif
-			if(isColor)
-			{
-				QP= QPptr[x>>qpHShift];
-				c.nonBQP= nonBQPptr[x>>qpHShift];
-			}
-			else
-			{
-				QP= QPptr[x>>4];
-				QP= (QP* QPCorrecture + 256*128)>>16;
-				c.nonBQP= nonBQPptr[x>>4];
-				c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
-				yHistogram[ srcBlock[srcStride*12 + 4] ]++;
-			}
-			c.QP= QP;
+                        if(isColor)
+                        {
+                                QP= QPptr[x>>qpHShift];
+                                c.nonBQP= nonBQPptr[x>>qpHShift];
+                        }
+                        else
+                        {
+                                QP= QPptr[x>>4];
+                                QP= (QP* QPCorrecture + 256*128)>>16;
+                                c.nonBQP= nonBQPptr[x>>4];
+                                c.nonBQP= (c.nonBQP* QPCorrecture + 256*128)>>16;
+                                yHistogram[ srcBlock[srcStride*12 + 4] ]++;
+                        }
+                        c.QP= QP;
 #ifdef HAVE_MMX
-			asm volatile(
-				"movd %1, %%mm7					\n\t"
-				"packuswb %%mm7, %%mm7				\n\t" // 0, 0, 0, QP, 0, 0, 0, QP
-				"packuswb %%mm7, %%mm7				\n\t" // 0,QP, 0, QP, 0,QP, 0, QP
-				"packuswb %%mm7, %%mm7				\n\t" // QP,..., QP
-				"movq %%mm7, %0			\n\t"
-				: "=m" (c.pQPb) 
-				: "r" (QP)
-			);
+                        asm volatile(
+                                "movd %1, %%mm7         \n\t"
+                                "packuswb %%mm7, %%mm7  \n\t" // 0, 0, 0, QP, 0, 0, 0, QP
+                                "packuswb %%mm7, %%mm7  \n\t" // 0,QP, 0, QP, 0,QP, 0, QP
+                                "packuswb %%mm7, %%mm7  \n\t" // QP,..., QP
+                                "movq %%mm7, %0         \n\t"
+                                : "=m" (c.pQPb)
+                                : "r" (QP)
+                        );
 #endif
 
 
 #ifdef HAVE_MMX2
 /*
-			prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
-			prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
-			prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
-			prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
+                        prefetchnta(srcBlock + (((x>>2)&6) + 5)*srcStride + 32);
+                        prefetchnta(srcBlock + (((x>>2)&6) + 6)*srcStride + 32);
+                        prefetcht0(dstBlock + (((x>>2)&6) + 5)*dstStride + 32);
+                        prefetcht0(dstBlock + (((x>>2)&6) + 6)*dstStride + 32);
 */
 
-			asm(
-				"mov %4, %%"REG_a"		\n\t"
-				"shr $2, %%"REG_a"		\n\t"
-				"and $6, %%"REG_a"		\n\t"
-				"add %5, %%"REG_a"		\n\t"
-				"mov %%"REG_a", %%"REG_d"	\n\t"
-				"imul %1, %%"REG_a"		\n\t"
-				"imul %3, %%"REG_d"		\n\t"
-				"prefetchnta 32(%%"REG_a", %0)	\n\t"
-				"prefetcht0 32(%%"REG_d", %2)	\n\t"
-				"add %1, %%"REG_a"		\n\t"
-				"add %3, %%"REG_d"		\n\t"
-				"prefetchnta 32(%%"REG_a", %0)	\n\t"
-				"prefetcht0 32(%%"REG_d", %2)	\n\t"
-			:: "r" (srcBlock), "r" ((long)srcStride), "r" (dstBlock), "r" ((long)dstStride),
-			 "g" ((long)x), "g" ((long)copyAhead)
-			: "%"REG_a, "%"REG_d
-			);
+                        asm(
+                                "mov %4, %%"REG_a"              \n\t"
+                                "shr $2, %%"REG_a"              \n\t"
+                                "and $6, %%"REG_a"              \n\t"
+                                "add %5, %%"REG_a"              \n\t"
+                                "mov %%"REG_a", %%"REG_d"       \n\t"
+                                "imul %1, %%"REG_a"             \n\t"
+                                "imul %3, %%"REG_d"             \n\t"
+                                "prefetchnta 32(%%"REG_a", %0)  \n\t"
+                                "prefetcht0 32(%%"REG_d", %2)   \n\t"
+                                "add %1, %%"REG_a"              \n\t"
+                                "add %3, %%"REG_d"              \n\t"
+                                "prefetchnta 32(%%"REG_a", %0)  \n\t"
+                                "prefetcht0 32(%%"REG_d", %2)   \n\t"
+                        :: "r" (srcBlock), "r" ((long)srcStride), "r" (dstBlock), "r" ((long)dstStride),
+                         "g" ((long)x), "g" ((long)copyAhead)
+                        : "%"REG_a, "%"REG_d
+                        );
 
 #elif defined(HAVE_3DNOW)
 //FIXME check if this is faster on an 3dnow chip or if its faster without the prefetch or ...
-/*			prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
-			prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
-			prefetchw(dstBlock + (((x>>3)&3) + 5)*dstStride + 32);
-			prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32);
+/*                        prefetch(srcBlock + (((x>>3)&3) + 5)*srcStride + 32);
+                        prefetch(srcBlock + (((x>>3)&3) + 9)*srcStride + 32);
+                        prefetchw(dstBlock + (((x>>3)&3) + 5)*dstStride + 32);
+                        prefetchw(dstBlock + (((x>>3)&3) + 9)*dstStride + 32);
 */
 #endif
 
-			RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
-				srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
-
-			if(mode & LINEAR_IPOL_DEINT_FILTER)
-				RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
-			else if(mode & LINEAR_BLEND_DEINT_FILTER)
-				RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
-			else if(mode & MEDIAN_DEINT_FILTER)
-				RENAME(deInterlaceMedian)(dstBlock, dstStride);
-			else if(mode & CUBIC_IPOL_DEINT_FILTER)
-				RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
-			else if(mode & FFMPEG_DEINT_FILTER)
-				RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
-			else if(mode & LOWPASS5_DEINT_FILTER)
-				RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
-/*			else if(mode & CUBIC_BLEND_DEINT_FILTER)
-				RENAME(deInterlaceBlendCubic)(dstBlock, dstStride);
+                        RENAME(blockCopy)(dstBlock + dstStride*copyAhead, dstStride,
+                                srcBlock + srcStride*copyAhead, srcStride, mode & LEVEL_FIX, &c.packedYOffset);
+
+                        if(mode & LINEAR_IPOL_DEINT_FILTER)
+                                RENAME(deInterlaceInterpolateLinear)(dstBlock, dstStride);
+                        else if(mode & LINEAR_BLEND_DEINT_FILTER)
+                                RENAME(deInterlaceBlendLinear)(dstBlock, dstStride, c.deintTemp + x);
+                        else if(mode & MEDIAN_DEINT_FILTER)
+                                RENAME(deInterlaceMedian)(dstBlock, dstStride);
+                        else if(mode & CUBIC_IPOL_DEINT_FILTER)
+                                RENAME(deInterlaceInterpolateCubic)(dstBlock, dstStride);
+                        else if(mode & FFMPEG_DEINT_FILTER)
+                                RENAME(deInterlaceFF)(dstBlock, dstStride, c.deintTemp + x);
+                        else if(mode & LOWPASS5_DEINT_FILTER)
+                                RENAME(deInterlaceL5)(dstBlock, dstStride, c.deintTemp + x, c.deintTemp + width + x);
+/*                        else if(mode & CUBIC_BLEND_DEINT_FILTER)
+                                RENAME(deInterlaceBlendCubic)(dstBlock, dstStride);
 */
 
-			/* only deblock if we have 2 blocks */
-			if(y + 8 < height)
-			{
-				if(mode & V_X1_FILTER)
-					RENAME(vertX1Filter)(dstBlock, stride, &c);
-				else if(mode & V_DEBLOCK)
-				{
-					const int t= RENAME(vertClassify)(dstBlock, stride, &c);
-
-					if(t==1)
-						RENAME(doVertLowPass)(dstBlock, stride, &c);
-					else if(t==2)
-						RENAME(doVertDefFilter)(dstBlock, stride, &c);
-				}else if(mode & V_A_DEBLOCK){
-					RENAME(do_a_deblock)(dstBlock, stride, 1, &c);
-				}
-			}
+                        /* only deblock if we have 2 blocks */
+                        if(y + 8 < height)
+                        {
+                                if(mode & V_X1_FILTER)
+                                        RENAME(vertX1Filter)(dstBlock, stride, &c);
+                                else if(mode & V_DEBLOCK)
+                                {
+                                        const int t= RENAME(vertClassify)(dstBlock, stride, &c);
+
+                                        if(t==1)
+                                                RENAME(doVertLowPass)(dstBlock, stride, &c);
+                                        else if(t==2)
+                                                RENAME(doVertDefFilter)(dstBlock, stride, &c);
+                                }else if(mode & V_A_DEBLOCK){
+                                        RENAME(do_a_deblock)(dstBlock, stride, 1, &c);
+                                }
+                        }
 
 #ifdef HAVE_MMX
-			RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
+                        RENAME(transpose1)(tempBlock1, tempBlock2, dstBlock, dstStride);
 #endif
-			/* check if we have a previous block to deblock it with dstBlock */
-			if(x - 8 >= 0)
-			{
+                        /* check if we have a previous block to deblock it with dstBlock */
+                        if(x - 8 >= 0)
+                        {
 #ifdef HAVE_MMX
-				if(mode & H_X1_FILTER)
-					RENAME(vertX1Filter)(tempBlock1, 16, &c);
-				else if(mode & H_DEBLOCK)
-				{
+                                if(mode & H_X1_FILTER)
+                                        RENAME(vertX1Filter)(tempBlock1, 16, &c);
+                                else if(mode & H_DEBLOCK)
+                                {
 //START_TIMER
-					const int t= RENAME(vertClassify)(tempBlock1, 16, &c);
+                                        const int t= RENAME(vertClassify)(tempBlock1, 16, &c);
 //STOP_TIMER("dc & minmax")
                                         if(t==1)
-						RENAME(doVertLowPass)(tempBlock1, 16, &c);
-					else if(t==2)
-						RENAME(doVertDefFilter)(tempBlock1, 16, &c);
-				}else if(mode & H_A_DEBLOCK){
-					RENAME(do_a_deblock)(tempBlock1, 16, 1, &c);
-				}
+                                                RENAME(doVertLowPass)(tempBlock1, 16, &c);
+                                        else if(t==2)
+                                                RENAME(doVertDefFilter)(tempBlock1, 16, &c);
+                                }else if(mode & H_A_DEBLOCK){
+                                        RENAME(do_a_deblock)(tempBlock1, 16, 1, &c);
+                                }
 
-				RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16);
+                                RENAME(transpose2)(dstBlock-4, dstStride, tempBlock1 + 4*16);
 
 #else
-				if(mode & H_X1_FILTER)
-					horizX1Filter(dstBlock-4, stride, QP);
-				else if(mode & H_DEBLOCK)
-				{
+                                if(mode & H_X1_FILTER)
+                                        horizX1Filter(dstBlock-4, stride, QP);
+                                else if(mode & H_DEBLOCK)
+                                {
 #ifdef HAVE_ALTIVEC
-					unsigned char __attribute__ ((aligned(16))) tempBlock[272];
-					transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride);
+                                        unsigned char __attribute__ ((aligned(16))) tempBlock[272];
+                                        transpose_16x8_char_toPackedAlign_altivec(tempBlock, dstBlock - (4 + 1), stride);
 
-					const int t=vertClassify_altivec(tempBlock-48, 16, &c);
-					if(t==1) {
-						doVertLowPass_altivec(tempBlock-48, 16, &c);
+                                        const int t=vertClassify_altivec(tempBlock-48, 16, &c);
+                                        if(t==1) {
+                                                doVertLowPass_altivec(tempBlock-48, 16, &c);
                                                 transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
                                         }
-					else if(t==2) {
-						doVertDefFilter_altivec(tempBlock-48, 16, &c);
+                                        else if(t==2) {
+                                                doVertDefFilter_altivec(tempBlock-48, 16, &c);
                                                 transpose_8x16_char_fromPackedAlign_altivec(dstBlock - (4 + 1), tempBlock, stride);
                                         }
 #else
-					const int t= RENAME(horizClassify)(dstBlock-4, stride, &c);
+                                        const int t= RENAME(horizClassify)(dstBlock-4, stride, &c);
 
-					if(t==1)
-						RENAME(doHorizLowPass)(dstBlock-4, stride, &c);
-					else if(t==2)
-						RENAME(doHorizDefFilter)(dstBlock-4, stride, &c);
-#endif
-				}else if(mode & H_A_DEBLOCK){
-					RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c);
-				}
+                                        if(t==1)
+                                                RENAME(doHorizLowPass)(dstBlock-4, stride, &c);
+                                        else if(t==2)
+                                                RENAME(doHorizDefFilter)(dstBlock-4, stride, &c);
 #endif
-				if(mode & DERING)
-				{
-				//FIXME filter first line
-					if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c);
-				}
-
-				if(mode & TEMP_NOISE_FILTER)
-				{
-					RENAME(tempNoiseReducer)(dstBlock-8, stride,
-						c.tempBlured[isColor] + y*dstStride + x,
-						c.tempBluredPast[isColor] + (y>>3)*256 + (x>>3),
-						c.ppMode.maxTmpNoise);
-				}
-			}
-
-			dstBlock+=8;
-			srcBlock+=8;
+                                }else if(mode & H_A_DEBLOCK){
+                                        RENAME(do_a_deblock)(dstBlock-8, 1, stride, &c);
+                                }
+#endif //HAVE_MMX
+                                if(mode & DERING)
+                                {
+                                //FIXME filter first line
+                                        if(y>0) RENAME(dering)(dstBlock - stride - 8, stride, &c);
+                                }
+
+                                if(mode & TEMP_NOISE_FILTER)
+                                {
+                                        RENAME(tempNoiseReducer)(dstBlock-8, stride,
+                                                c.tempBlured[isColor] + y*dstStride + x,
+                                                c.tempBluredPast[isColor] + (y>>3)*256 + (x>>3),
+                                                c.ppMode.maxTmpNoise);
+                                }
+                        }
+
+                        dstBlock+=8;
+                        srcBlock+=8;
 
 #ifdef HAVE_MMX
-			tmpXchg= tempBlock1;
-			tempBlock1= tempBlock2;
-			tempBlock2 = tmpXchg;
+                        tmpXchg= tempBlock1;
+                        tempBlock1= tempBlock2;
+                        tempBlock2 = tmpXchg;
 #endif
-		}
-
-		if(mode & DERING)
-		{
-				if(y > 0) RENAME(dering)(dstBlock - dstStride - 8, dstStride, &c);
-		}
-
-		if((mode & TEMP_NOISE_FILTER))
-		{
-			RENAME(tempNoiseReducer)(dstBlock-8, dstStride,
-				c.tempBlured[isColor] + y*dstStride + x,
-				c.tempBluredPast[isColor] + (y>>3)*256 + (x>>3),
-				c.ppMode.maxTmpNoise);
-		}
-
-		/* did we use a tmp buffer for the last lines*/
-		if(y+15 >= height)
-		{
-			uint8_t *dstBlock= &(dst[y*dstStride]);
-			if(width==ABS(dstStride))
-				linecpy(dstBlock, tempDst + dstStride, height-y, dstStride);
-			else
-			{
-				int i;
-				for(i=0; i<height-y; i++)
-				{
-					memcpy(dstBlock + i*dstStride, tempDst + (i+1)*dstStride, width);
-				}
-			}
-		}
+                }
+
+                if(mode & DERING)
+                {
+                                if(y > 0) RENAME(dering)(dstBlock - dstStride - 8, dstStride, &c);
+                }
+
+                if((mode & TEMP_NOISE_FILTER))
+                {
+                        RENAME(tempNoiseReducer)(dstBlock-8, dstStride,
+                                c.tempBlured[isColor] + y*dstStride + x,
+                                c.tempBluredPast[isColor] + (y>>3)*256 + (x>>3),
+                                c.ppMode.maxTmpNoise);
+                }
+
+                /* did we use a tmp buffer for the last lines*/
+                if(y+15 >= height)
+                {
+                        uint8_t *dstBlock= &(dst[y*dstStride]);
+                        if(width==ABS(dstStride))
+                                linecpy(dstBlock, tempDst + dstStride, height-y, dstStride);
+                        else
+                        {
+                                int i;
+                                for(i=0; i<height-y; i++)
+                                {
+                                        memcpy(dstBlock + i*dstStride, tempDst + (i+1)*dstStride, width);
+                                }
+                        }
+                }
 /*
-		for(x=0; x<width; x+=32)
-		{
-			volatile int i;
-			i+=	+ dstBlock[x + 7*dstStride] + dstBlock[x + 8*dstStride]
-				+ dstBlock[x + 9*dstStride] + dstBlock[x +10*dstStride]
-				+ dstBlock[x +11*dstStride] + dstBlock[x +12*dstStride];
-//				+ dstBlock[x +13*dstStride]
-//				+ dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride];
-		}*/
-	}
+                for(x=0; x<width; x+=32)
+                {
+                        volatile int i;
+                        i+=        + dstBlock[x + 7*dstStride] + dstBlock[x + 8*dstStride]
+                                + dstBlock[x + 9*dstStride] + dstBlock[x +10*dstStride]
+                                + dstBlock[x +11*dstStride] + dstBlock[x +12*dstStride];
+//                                + dstBlock[x +13*dstStride]
+//                                + dstBlock[x +14*dstStride] + dstBlock[x +15*dstStride];
+                }*/
+        }
 #ifdef HAVE_3DNOW
-	asm volatile("femms");
+        asm volatile("femms");
 #elif defined (HAVE_MMX)
-	asm volatile("emms");
+        asm volatile("emms");
 #endif
 
 #ifdef DEBUG_BRIGHTNESS
-	if(!isColor)
-	{
-		int max=1;
-		int i;
-		for(i=0; i<256; i++)
-			if(yHistogram[i] > max) max=yHistogram[i];
-
-		for(i=1; i<256; i++)
-		{
-			int x;
-			int start=yHistogram[i-1]/(max/256+1);
-			int end=yHistogram[i]/(max/256+1);
-			int inc= end > start ? 1 : -1;
-			for(x=start; x!=end+inc; x+=inc)
-				dst[ i*dstStride + x]+=128;
-		}
-
-		for(i=0; i<100; i+=2)
-		{
-			dst[ (white)*dstStride + i]+=128;
-			dst[ (black)*dstStride + i]+=128;
-		}
-
-	}
+        if(!isColor)
+        {
+                int max=1;
+                int i;
+                for(i=0; i<256; i++)
+                        if(yHistogram[i] > max) max=yHistogram[i];
+
+                for(i=1; i<256; i++)
+                {
+                        int x;
+                        int start=yHistogram[i-1]/(max/256+1);
+                        int end=yHistogram[i]/(max/256+1);
+                        int inc= end > start ? 1 : -1;
+                        for(x=start; x!=end+inc; x+=inc)
+                                dst[ i*dstStride + x]+=128;
+                }
+
+                for(i=0; i<100; i+=2)
+                {
+                        dst[ (white)*dstStride + i]+=128;
+                        dst[ (black)*dstStride + i]+=128;
+                }
+
+        }
 #endif
 
-	*c2= c; //copy local context back
+        *c2= c; //copy local context back
 
 }
diff --git a/src/libffmpeg/libavcodec/loco.c b/src/libffmpeg/libavcodec/loco.c
index 6f90c1ef1..37f141821 100644
--- a/src/libffmpeg/libavcodec/loco.c
+++ b/src/libffmpeg/libavcodec/loco.c
@@ -14,15 +14,15 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
- 
+
 /**
  * @file loco.c
  * LOCO codec.
  */
- 
+
 #include "avcodec.h"
 #include "common.h"
 #include "bitstream.h"
@@ -49,12 +49,12 @@ static int loco_get_rice_param(RICEContext *r)
 {
     int cnt = 0;
     int val = r->count;
-    
+
     while(r->sum > val && cnt < 9) {
         val <<= 1;
         cnt++;
     }
-    
+
     return cnt;
 }
 
@@ -62,7 +62,7 @@ static inline void loco_update_rice_param(RICEContext *r, int val)
 {
     r->sum += val;
     r->count++;
-    
+
     if(r->count == 16) {
         r->sum >>= 1;
         r->count >>= 1;
@@ -99,7 +99,7 @@ static inline int loco_get_rice(RICEContext *r)
             r->run2 = 0;
         }
     }
-    
+
     return v;
 }
 
@@ -107,11 +107,11 @@ static inline int loco_get_rice(RICEContext *r)
 static inline int loco_predict(uint8_t* data, int stride, int step)
 {
     int a, b, c;
-    
+
     a = data[-stride];
     b = data[-step];
     c = data[-stride - step];
-    
+
     return mid_pred(a, a + b - c, b);
 }
 
@@ -121,16 +121,16 @@ static int loco_decode_plane(LOCOContext *l, uint8_t *data, int width, int heigh
     RICEContext rc;
     int val;
     int i, j;
-    
+
     init_get_bits(&rc.gb, buf, buf_size*8);
     rc.save = 0;
     rc.run = 0;
     rc.run2 = 0;
-    rc.lossy = l->lossy; 
-    
+    rc.lossy = l->lossy;
+
     rc.sum = 8;
     rc.count = 1;
-    
+
     /* restore top left pixel */
     val = loco_get_rice(&rc);
     data[0] = 128 + val;
@@ -151,11 +151,11 @@ static int loco_decode_plane(LOCOContext *l, uint8_t *data, int width, int heigh
         }
         data += stride;
     }
-    
+
     return ((get_bits_count(&rc.gb) + 7) >> 3);
 }
 
-static int decode_frame(AVCodecContext *avctx, 
+static int decode_frame(AVCodecContext *avctx,
                         void *data, int *data_size,
                         uint8_t *buf, int buf_size)
 {
@@ -221,7 +221,7 @@ static int decode_frame(AVCodecContext *avctx,
 
     *data_size = sizeof(AVFrame);
     *(AVFrame*)data = l->pic;
-    
+
     return buf_size;
 }
 
@@ -247,7 +247,7 @@ static int decode_init(AVCodecContext *avctx){
         l->lossy = LE_32(avctx->extradata + 8);
         av_log(avctx, AV_LOG_INFO, "This is LOCO codec version %i, please upload file for study\n", version);
     }
-    
+
     l->mode = LE_32(avctx->extradata + 4);
     switch(l->mode) {
     case LOCO_CYUY2: case LOCO_YUY2: case LOCO_UYVY:
diff --git a/src/libffmpeg/libavcodec/mace.c b/src/libffmpeg/libavcodec/mace.c
index 80cd28393..a104fb04e 100644
--- a/src/libffmpeg/libavcodec/mace.c
+++ b/src/libffmpeg/libavcodec/mace.c
@@ -14,14 +14,14 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file mace.c
  * MACE decoder.
  */
- 
+
 #include "avcodec.h"
 
 /*
diff --git a/src/libffmpeg/libavcodec/mdct.c b/src/libffmpeg/libavcodec/mdct.c
index 6628958b6..5c3e7b3b1 100644
--- a/src/libffmpeg/libavcodec/mdct.c
+++ b/src/libffmpeg/libavcodec/mdct.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "dsputil.h"
 
@@ -74,7 +74,7 @@ int ff_mdct_init(MDCTContext *s, int nbits, int inverse)
  * @param input N/2 samples
  * @param tmp N/2 samples
  */
-void ff_imdct_calc(MDCTContext *s, FFTSample *output, 
+void ff_imdct_calc(MDCTContext *s, FFTSample *output,
                    const FFTSample *input, FFTSample *tmp)
 {
     int k, n8, n4, n2, n, j;
@@ -126,7 +126,7 @@ void ff_imdct_calc(MDCTContext *s, FFTSample *output,
  * @param out N/2 samples
  * @param tmp temporary storage of N/2 samples
  */
-void ff_mdct_calc(MDCTContext *s, FFTSample *out, 
+void ff_mdct_calc(MDCTContext *s, FFTSample *out,
                   const FFTSample *input, FFTSample *tmp)
 {
     int i, j, n, n8, n4, n2, n3;
@@ -156,7 +156,7 @@ void ff_mdct_calc(MDCTContext *s, FFTSample *out,
     }
 
     ff_fft_calc(&s->fft, x);
-  
+
     /* post rotation */
     for(i=0;i<n4;i++) {
         re = x[i].re;
diff --git a/src/libffmpeg/libavcodec/mdec.c b/src/libffmpeg/libavcodec/mdec.c
index afe122cfe..79caa24c1 100644
--- a/src/libffmpeg/libavcodec/mdec.c
+++ b/src/libffmpeg/libavcodec/mdec.c
@@ -14,17 +14,17 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * based upon code from Sebastian Jedruszkiewicz <elf@frogger.rules.pl>
  */
- 
+
 /**
  * @file mdec.c
  * PSX MDEC codec.
  * This is very similar to intra only MPEG1.
  */
- 
+
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
@@ -45,11 +45,11 @@ typedef struct MDECContext{
     int mb_width;
     int mb_height;
     int mb_x, mb_y;
-    DCTELEM __align8 block[6][64];
-    uint16_t __align8 intra_matrix[64];
-    int __align8 q_intra_matrix[64];
+    DECLARE_ALIGNED_8(DCTELEM, block[6][64]);
+    DECLARE_ALIGNED_8(uint16_t, intra_matrix[64]);
+    DECLARE_ALIGNED_8(int, q_intra_matrix[64]);
     uint8_t *bitstream_buffer;
-    int bitstream_buffer_size;
+    unsigned int bitstream_buffer_size;
     int block_last_index[6];
 } MDECContext;
 
@@ -74,15 +74,15 @@ static inline int mdec_decode_block_intra(MDECContext *a, DCTELEM *block, int n)
         a->last_dc[component]+= diff;
         block[0] = a->last_dc[component]<<3;
     }
-    
+
     i = 0;
     {
-        OPEN_READER(re, &a->gb);    
+        OPEN_READER(re, &a->gb);
         /* now quantify & encode AC coefs */
         for(;;) {
             UPDATE_CACHE(re, &a->gb);
             GET_RL_VLC(level, run, re, &a->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
-            
+
             if(level == 127){
                 break;
             } else if(level != 0) {
@@ -127,9 +127,9 @@ static inline int decode_mb(MDECContext *a, DCTELEM block[6][64]){
     const int block_index[6]= {5,4,0,1,2,3};
 
     a->dsp.clear_blocks(block[0]);
-    
+
     for(i=0; i<6; i++){
-        if( mdec_decode_block_intra(a, block[ block_index[i] ], block_index[i]) < 0) 
+        if( mdec_decode_block_intra(a, block[ block_index[i] ], block_index[i]) < 0)
             return -1;
     }
     return 0;
@@ -138,7 +138,7 @@ static inline int decode_mb(MDECContext *a, DCTELEM block[6][64]){
 static inline void idct_put(MDECContext *a, int mb_x, int mb_y){
     DCTELEM (*block)[64]= a->block;
     int linesize= a->picture.linesize[0];
-    
+
     uint8_t *dest_y  = a->picture.data[0] + (mb_y * 16* linesize              ) + mb_x * 16;
     uint8_t *dest_cb = a->picture.data[1] + (mb_y * 8 * a->picture.linesize[1]) + mb_x * 8;
     uint8_t *dest_cr = a->picture.data[2] + (mb_y * 8 * a->picture.linesize[2]) + mb_x * 8;
@@ -154,7 +154,7 @@ static inline void idct_put(MDECContext *a, int mb_x, int mb_y){
     }
 }
 
-static int decode_frame(AVCodecContext *avctx, 
+static int decode_frame(AVCodecContext *avctx,
                         void *data, int *data_size,
                         uint8_t *buf, int buf_size)
 {
@@ -183,32 +183,32 @@ static int decode_frame(AVCodecContext *avctx,
         a->bitstream_buffer[i+1]= buf[i  ];
     }
     init_get_bits(&a->gb, a->bitstream_buffer, buf_size*8);
-    
+
     /* skip over 4 preamble bytes in stream (typically 0xXX 0xXX 0x00 0x38) */
     skip_bits(&a->gb, 32);
 
     a->qscale=  get_bits(&a->gb, 16);
     a->version= get_bits(&a->gb, 16);
-    
+
 //    printf("qscale:%d (0x%X), version:%d (0x%X)\n", a->qscale, a->qscale, a->version, a->version);
-    
+
     for(a->mb_x=0; a->mb_x<a->mb_width; a->mb_x++){
         for(a->mb_y=0; a->mb_y<a->mb_height; a->mb_y++){
             if( decode_mb(a, a->block) <0)
                 return -1;
-             
+
             idct_put(a, a->mb_x, a->mb_y);
         }
     }
 
 //    p->quality= (32 + a->inv_qscale/2)/a->inv_qscale;
 //    memset(p->qscale_table, p->quality, p->qstride*a->mb_height);
-    
+
     *picture= *(AVFrame*)&a->picture;
     *data_size = sizeof(AVPicture);
 
     emms_c();
-    
+
     return (get_bits_count(&a->gb)+31)/32*4;
 }
 
@@ -227,7 +227,7 @@ static void mdec_common_init(AVCodecContext *avctx){
 static int decode_init(AVCodecContext *avctx){
     MDECContext * const a = avctx->priv_data;
     AVFrame *p= (AVFrame*)&a->picture;
- 
+
     mdec_common_init(avctx);
     init_vlcs();
     ff_init_scantable(a->dsp.idct_permutation, &a->scantable, ff_zigzag_direct);
@@ -250,7 +250,7 @@ static int decode_end(AVCodecContext *avctx){
     av_freep(&a->bitstream_buffer);
     av_freep(&a->picture.qscale_table);
     a->bitstream_buffer_size=0;
-    
+
     return 0;
 }
 
diff --git a/src/libffmpeg/libavcodec/mem.c b/src/libffmpeg/libavcodec/mem.c
index 462d674e4..24d75e948 100644
--- a/src/libffmpeg/libavcodec/mem.c
+++ b/src/libffmpeg/libavcodec/mem.c
@@ -14,14 +14,14 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
- 
+
 /**
  * @file mem.c
  * default memory allocator for libavcodec.
  */
- 
+
 #include "avcodec.h"
 
 /* here we can use OS dependant allocation functions */
@@ -37,7 +37,7 @@
    memory allocator. You do not need to suppress this file because the
    linker will do it automatically */
 
-/** 
+/**
  * Memory allocation of size byte with alignment suitable for all
  * memory accesses (including vectors if available on the
  * CPU). av_malloc(0) must return a non NULL pointer.
@@ -46,26 +46,26 @@ void *av_malloc(unsigned int size)
 {
     void *ptr;
 #ifdef MEMALIGN_HACK
-    int diff;
+    long diff;
 #endif
 
     /* lets disallow possible ambiguous cases */
     if(size > INT_MAX)
         return NULL;
-    
+
 #ifdef MEMALIGN_HACK
     ptr = malloc(size+16+1);
-    diff= ((-(int)ptr - 1)&15) + 1;
+    diff= ((-(long)ptr - 1)&15) + 1;
     ptr += diff;
     ((char*)ptr)[-1]= diff;
-#elif defined (HAVE_MEMALIGN) 
+#elif defined (HAVE_MEMALIGN)
     ptr = memalign(16,size);
-    /* Why 64? 
+    /* Why 64?
        Indeed, we should align it:
          on 4 for 386
          on 16 for 486
-	 on 32 for 586, PPro - k6-III
-	 on 64 for K7 (maybe for P3 too).
+         on 32 for 586, PPro - k6-III
+         on 64 for K7 (maybe for P3 too).
        Because L1 and L2 caches are aligned on those values.
        But I don't want to code such logic here!
      */
@@ -76,14 +76,14 @@ void *av_malloc(unsigned int size)
         Why not larger? because i didnt see a difference in benchmarks ...
      */
      /* benchmarks with p3
-        memalign(64)+1		3071,3051,3032
-        memalign(64)+2		3051,3032,3041
-        memalign(64)+4		2911,2896,2915
-        memalign(64)+8		2545,2554,2550
-        memalign(64)+16		2543,2572,2563
-        memalign(64)+32		2546,2545,2571
-        memalign(64)+64		2570,2533,2558
-        
+        memalign(64)+1          3071,3051,3032
+        memalign(64)+2          3051,3032,3041
+        memalign(64)+4          2911,2896,2915
+        memalign(64)+8          2545,2554,2550
+        memalign(64)+16         2543,2572,2563
+        memalign(64)+32         2546,2545,2571
+        memalign(64)+64         2570,2533,2558
+
         btw, malloc seems to do 8 byte alignment by default here
      */
 #else
@@ -95,7 +95,7 @@ void *av_malloc(unsigned int size)
 /**
  * av_realloc semantics (same as glibc): if ptr is NULL and size > 0,
  * identical to malloc(size). If size is zero, it is identical to
- * free(ptr) and NULL is returned.  
+ * free(ptr) and NULL is returned.
  */
 void *av_realloc(void *ptr, unsigned int size)
 {
@@ -117,7 +117,11 @@ void *av_realloc(void *ptr, unsigned int size)
 #endif
 }
 
-/* NOTE: ptr = NULL is explicetly allowed */
+/**
+ * Free memory which has been allocated with av_malloc(z)() or av_realloc().
+ * NOTE: ptr = NULL is explicetly allowed
+ * Note2: it is recommanded that you use av_freep() instead
+ */
 void av_free(void *ptr)
 {
     /* XXX: this test should not be needed on most libcs */
diff --git a/src/libffmpeg/libavcodec/mjpeg.c b/src/libffmpeg/libavcodec/mjpeg.c
index e8bba0619..951a622ee 100644
--- a/src/libffmpeg/libavcodec/mjpeg.c
+++ b/src/libffmpeg/libavcodec/mjpeg.c
@@ -16,7 +16,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * Support for external huffman table, various fixes (AVID workaround),
  * aspecting, new decode_frame mechanism and apple mjpeg-b support
@@ -27,7 +27,7 @@
  * @file mjpeg.c
  * MJPEG encoder and decoder.
  */
- 
+
 //#define DEBUG
 #include <assert.h>
 
@@ -40,7 +40,7 @@
 #undef TWOMATRIXES
 
 typedef struct MJpegContext {
-    uint8_t huff_size_dc_luminance[12]; //FIXME use array [3] instead of lumi / chrom, for easier addressing 
+    uint8_t huff_size_dc_luminance[12]; //FIXME use array [3] instead of lumi / chrom, for easier addressing
     uint16_t huff_code_dc_luminance[12];
     uint8_t huff_size_dc_chrominance[12];
     uint16_t huff_code_dc_chrominance[12];
@@ -54,26 +54,26 @@ typedef struct MJpegContext {
 /* JPEG marker codes */
 typedef enum {
     /* start of frame */
-    SOF0  = 0xc0,	/* baseline */
-    SOF1  = 0xc1,	/* extended sequential, huffman */
-    SOF2  = 0xc2,	/* progressive, huffman */
-    SOF3  = 0xc3,	/* lossless, huffman */
+    SOF0  = 0xc0,       /* baseline */
+    SOF1  = 0xc1,       /* extended sequential, huffman */
+    SOF2  = 0xc2,       /* progressive, huffman */
+    SOF3  = 0xc3,       /* lossless, huffman */
 
-    SOF5  = 0xc5,	/* differential sequential, huffman */
-    SOF6  = 0xc6,	/* differential progressive, huffman */
-    SOF7  = 0xc7,	/* differential lossless, huffman */
-    JPG   = 0xc8,	/* reserved for JPEG extension */
-    SOF9  = 0xc9,	/* extended sequential, arithmetic */
-    SOF10 = 0xca,	/* progressive, arithmetic */
-    SOF11 = 0xcb,	/* lossless, arithmetic */
+    SOF5  = 0xc5,       /* differential sequential, huffman */
+    SOF6  = 0xc6,       /* differential progressive, huffman */
+    SOF7  = 0xc7,       /* differential lossless, huffman */
+    JPG   = 0xc8,       /* reserved for JPEG extension */
+    SOF9  = 0xc9,       /* extended sequential, arithmetic */
+    SOF10 = 0xca,       /* progressive, arithmetic */
+    SOF11 = 0xcb,       /* lossless, arithmetic */
 
-    SOF13 = 0xcd,	/* differential sequential, arithmetic */
-    SOF14 = 0xce,	/* differential progressive, arithmetic */
-    SOF15 = 0xcf,	/* differential lossless, arithmetic */
+    SOF13 = 0xcd,       /* differential sequential, arithmetic */
+    SOF14 = 0xce,       /* differential progressive, arithmetic */
+    SOF15 = 0xcf,       /* differential lossless, arithmetic */
 
-    DHT   = 0xc4,	/* define huffman tables */
+    DHT   = 0xc4,       /* define huffman tables */
 
-    DAC   = 0xcc,	/* define arithmetic-coding conditioning */
+    DAC   = 0xcc,       /* define arithmetic-coding conditioning */
 
     /* restart with modulo 8 count "m" */
     RST0  = 0xd0,
@@ -85,14 +85,14 @@ typedef enum {
     RST6  = 0xd6,
     RST7  = 0xd7,
 
-    SOI   = 0xd8,	/* start of image */
-    EOI   = 0xd9,	/* end of image */
-    SOS   = 0xda,	/* start of scan */
-    DQT   = 0xdb,	/* define quantization tables */
-    DNL   = 0xdc,	/* define number of lines */
-    DRI   = 0xdd,	/* define restart interval */
-    DHP   = 0xde,	/* define hierarchical progression */
-    EXP   = 0xdf,	/* expand reference components */
+    SOI   = 0xd8,       /* start of image */
+    EOI   = 0xd9,       /* end of image */
+    SOS   = 0xda,       /* start of scan */
+    DQT   = 0xdb,       /* define quantization tables */
+    DNL   = 0xdc,       /* define number of lines */
+    DRI   = 0xdd,       /* define restart interval */
+    DHP   = 0xde,       /* define hierarchical progression */
+    EXP   = 0xdf,       /* expand reference components */
 
     APP0  = 0xe0,
     APP1  = 0xe1,
@@ -118,17 +118,17 @@ typedef enum {
     JPG4  = 0xf4,
     JPG5  = 0xf5,
     JPG6  = 0xf6,
-    JPG7  = 0xf7,
-    JPG8  = 0xf8,
+    SOF48 = 0xf7,       ///< JPEG-LS
+    LSE   = 0xf8,       ///< JPEG-LS extension parameters
     JPG9  = 0xf9,
     JPG10 = 0xfa,
     JPG11 = 0xfb,
     JPG12 = 0xfc,
     JPG13 = 0xfd,
 
-    COM   = 0xfe,	/* comment */
+    COM   = 0xfe,       /* comment */
 
-    TEM   = 0x01,	/* temporary private use for arithmetic coding */
+    TEM   = 0x01,       /* temporary private use for arithmetic coding */
 
     /* 0x02 -> 0xbf reserved */
 } JPEG_MARKER;
@@ -195,7 +195,7 @@ static const uint8_t val_ac_luminance[] =
   0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
   0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
   0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-  0xf9, 0xfa 
+  0xf9, 0xfa
 };
 
 static const uint8_t bits_ac_chrominance[17] =
@@ -222,7 +222,7 @@ static const uint8_t val_ac_chrominance[] =
   0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
   0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
   0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
-  0xf9, 0xfa 
+  0xf9, 0xfa
 };
 
 /* isn't this function nicer than the one in the libjpeg ? */
@@ -249,11 +249,11 @@ static void build_huffman_codes(uint8_t *huff_size, uint16_t *huff_code,
 int mjpeg_init(MpegEncContext *s)
 {
     MJpegContext *m;
-    
+
     m = av_malloc(sizeof(MJpegContext));
     if (!m)
         return -1;
-    
+
     s->min_qcoeff=-1023;
     s->max_qcoeff= 1023;
 
@@ -274,7 +274,7 @@ int mjpeg_init(MpegEncContext *s)
                         m->huff_code_ac_chrominance,
                         bits_ac_chrominance,
                         val_ac_chrominance);
-    
+
     s->mjpeg_ctx = m;
     return 0;
 }
@@ -362,7 +362,7 @@ static void jpeg_table_header(MpegEncContext *s)
     size = 2;
     size += put_huffman_table(s, 0, 0, bits_dc_luminance, val_dc_luminance);
     size += put_huffman_table(s, 0, 1, bits_dc_chrominance, val_dc_chrominance);
-    
+
     size += put_huffman_table(s, 1, 0, bits_ac_luminance, val_ac_luminance);
     size += put_huffman_table(s, 1, 1, bits_ac_chrominance, val_ac_chrominance);
     ptr[0] = size >> 8;
@@ -401,7 +401,7 @@ static void jpeg_put_comments(MpegEncContext *s)
         ptr[1] = size;
     }
 
-    if(  s->avctx->pix_fmt == PIX_FMT_YUV420P 
+    if(  s->avctx->pix_fmt == PIX_FMT_YUV420P
        ||s->avctx->pix_fmt == PIX_FMT_YUV422P
        ||s->avctx->pix_fmt == PIX_FMT_YUV444P){
         put_marker(p, COM);
@@ -417,17 +417,25 @@ static void jpeg_put_comments(MpegEncContext *s)
 
 void mjpeg_picture_header(MpegEncContext *s)
 {
-    const int lossless= s->avctx->codec_id == CODEC_ID_LJPEG;
+    const int lossless= s->avctx->codec_id != CODEC_ID_MJPEG;
+    const int ls      = s->avctx->codec_id == CODEC_ID_JPEGLS;
+
+    assert(!(ls && s->mjpeg_write_tables));
 
     put_marker(&s->pb, SOI);
 
     if (!s->mjpeg_data_only_frames)
     {
-    jpeg_put_comments(s);    
+    jpeg_put_comments(s);
 
     if (s->mjpeg_write_tables) jpeg_table_header(s);
 
-    put_marker(&s->pb, lossless ? SOF3 : SOF0);
+    switch(s->avctx->codec_id){
+    case CODEC_ID_MJPEG:  put_marker(&s->pb, SOF0 ); break;
+    case CODEC_ID_LJPEG:  put_marker(&s->pb, SOF3 ); break;
+    case CODEC_ID_JPEGLS: put_marker(&s->pb, SOF48); break;
+    default: assert(0);
+    }
 
     put_bits(&s->pb, 16, 17);
     if(lossless && s->avctx->pix_fmt == PIX_FMT_RGBA32)
@@ -437,13 +445,13 @@ void mjpeg_picture_header(MpegEncContext *s)
     put_bits(&s->pb, 16, s->height);
     put_bits(&s->pb, 16, s->width);
     put_bits(&s->pb, 8, 3); /* 3 components */
-    
+
     /* Y component */
     put_bits(&s->pb, 8, 1); /* component number */
     put_bits(&s->pb, 4, s->mjpeg_hsample[0]); /* H factor */
     put_bits(&s->pb, 4, s->mjpeg_vsample[0]); /* V factor */
     put_bits(&s->pb, 8, 0); /* select matrix */
-    
+
     /* Cb component */
     put_bits(&s->pb, 8, 2); /* component number */
     put_bits(&s->pb, 4, s->mjpeg_hsample[1]); /* H factor */
@@ -469,25 +477,34 @@ void mjpeg_picture_header(MpegEncContext *s)
     put_marker(&s->pb, SOS);
     put_bits(&s->pb, 16, 12); /* length */
     put_bits(&s->pb, 8, 3); /* 3 components */
-    
+
     /* Y component */
     put_bits(&s->pb, 8, 1); /* index */
     put_bits(&s->pb, 4, 0); /* DC huffman table index */
     put_bits(&s->pb, 4, 0); /* AC huffman table index */
-    
+
     /* Cb component */
     put_bits(&s->pb, 8, 2); /* index */
     put_bits(&s->pb, 4, 1); /* DC huffman table index */
     put_bits(&s->pb, 4, lossless ? 0 : 1); /* AC huffman table index */
-    
+
     /* Cr component */
     put_bits(&s->pb, 8, 3); /* index */
     put_bits(&s->pb, 4, 1); /* DC huffman table index */
     put_bits(&s->pb, 4, lossless ? 0 : 1); /* AC huffman table index */
 
-    put_bits(&s->pb, 8, lossless ? s->avctx->prediction_method+1 : 0); /* Ss (not used) */
-    put_bits(&s->pb, 8, lossless ? 0 : 63); /* Se (not used) */
+    put_bits(&s->pb, 8, (lossless && !ls) ? s->avctx->prediction_method+1 : 0); /* Ss (not used) */
+
+    switch(s->avctx->codec_id){
+    case CODEC_ID_MJPEG:  put_bits(&s->pb, 8, 63); break; /* Se (not used) */
+    case CODEC_ID_LJPEG:  put_bits(&s->pb, 8,  0); break; /* not used */
+    case CODEC_ID_JPEGLS: put_bits(&s->pb, 8,  1); break; /* ILV = line interleaved */
+    default: assert(0);
+    }
+
     put_bits(&s->pb, 8, 0); /* Ah/Al (not used) */
+
+    //FIXME DC/AC entropy table selectors stuff in jpegls
 }
 
 static void escape_FF(MpegEncContext *s, int start)
@@ -496,10 +513,10 @@ static void escape_FF(MpegEncContext *s, int start)
     int i, ff_count;
     uint8_t *buf= s->pb.buf + start;
     int align= (-(size_t)(buf))&3;
-    
+
     assert((size&7) == 0);
     size >>= 3;
-    
+
     ff_count=0;
     for(i=0; i<size && i<align; i++){
         if(buf[i]==0xFF) ff_count++;
@@ -526,12 +543,12 @@ static void escape_FF(MpegEncContext *s, int start)
     }
 
     if(ff_count==0) return;
-    
+
     /* skip put bits */
     for(i=0; i<ff_count-3; i+=4)
         put_bits(&s->pb, 32, 0);
     put_bits(&s->pb, (ff_count-i)*8, 0);
-    flush_put_bits(&s->pb); 
+    flush_put_bits(&s->pb);
 
     for(i=size-1; ff_count; i--){
         int v= buf[i];
@@ -559,14 +576,14 @@ void mjpeg_picture_trailer(MpegEncContext *s)
     flush_put_bits(&s->pb);
 
     assert((s->header_bits&7)==0);
-    
+
     escape_FF(s, s->header_bits>>3);
 
     put_marker(&s->pb, EOI);
 }
 
 static inline void mjpeg_encode_dc(MpegEncContext *s, int val,
-				   uint8_t *huff_size, uint16_t *huff_code)
+                                   uint8_t *huff_size, uint16_t *huff_code)
 {
     int mant, nbits;
 
@@ -578,11 +595,11 @@ static inline void mjpeg_encode_dc(MpegEncContext *s, int val,
             val = -val;
             mant--;
         }
-        
+
         nbits= av_log2_16bit(val) + 1;
-            
+
         put_bits(&s->pb, huff_size[nbits], huff_code[nbits]);
-        
+
         put_bits(&s->pb, nbits, mant & ((1 << nbits) - 1));
     }
 }
@@ -594,7 +611,7 @@ static void encode_block(MpegEncContext *s, DCTELEM *block, int n)
     MJpegContext *m = s->mjpeg_ctx;
     uint8_t *huff_size_ac;
     uint16_t *huff_code_ac;
-    
+
     /* DC coef */
     component = (n <= 3 ? 0 : n - 4 + 1);
     dc = block[0]; /* overflow is impossible */
@@ -609,9 +626,9 @@ static void encode_block(MpegEncContext *s, DCTELEM *block, int n)
         huff_code_ac = m->huff_code_ac_chrominance;
     }
     s->last_dc[component] = dc;
-    
+
     /* AC coefs */
-    
+
     run = 0;
     last_index = s->block_last_index[n];
     for(i=1;i<=last_index;i++) {
@@ -629,12 +646,12 @@ static void encode_block(MpegEncContext *s, DCTELEM *block, int n)
                 val = -val;
                 mant--;
             }
-            
+
             nbits= av_log2(val) + 1;
             code = (run << 4) | nbits;
 
             put_bits(&s->pb, huff_size_ac[code], huff_code_ac[code]);
-        
+
             put_bits(&s->pb, nbits, mant & ((1 << nbits) - 1));
             run = 0;
         }
@@ -645,7 +662,7 @@ static void encode_block(MpegEncContext *s, DCTELEM *block, int n)
         put_bits(&s->pb, huff_size_ac[0], huff_code_ac[0]);
 }
 
-void mjpeg_encode_mb(MpegEncContext *s, 
+void mjpeg_encode_mb(MpegEncContext *s,
                      DCTELEM block[6][64])
 {
     int i;
@@ -668,11 +685,11 @@ static int encode_picture_lossless(AVCodecContext *avctx, unsigned char *buf, in
     *p = *pict;
     p->pict_type= FF_I_TYPE;
     p->key_frame= 1;
-    
+
     mjpeg_picture_header(s);
 
     s->header_bits= put_bits_count(&s->pb);
-    
+
     if(avctx->pix_fmt == PIX_FMT_RGBA32){
         int x, y, i;
         const int linesize= p->linesize[0];
@@ -691,7 +708,7 @@ static int encode_picture_lossless(AVCodecContext *avctx, unsigned char *buf, in
                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
                 return -1;
             }
-            
+
             for(i=0; i<3; i++){
                 top[i]= left[i]= topleft[i]= buffer[0][i];
             }
@@ -704,14 +721,14 @@ static int encode_picture_lossless(AVCodecContext *avctx, unsigned char *buf, in
                     int pred, diff;
 
                     PREDICT(pred, topleft[i], top[i], left[i], modified_predictor);
-                        
+
                     topleft[i]= top[i];
                     top[i]= buffer[x+1][i];
-                    
+
                     left[i]= buffer[x][i];
 
                     diff= ((left[i] - pred + 0x100)&0x1FF) - 0x100;
-                    
+
                     if(i==0)
                         mjpeg_encode_dc(s, diff, m->huff_size_dc_luminance, m->huff_code_dc_luminance); //FIXME ugly
                     else
@@ -723,7 +740,7 @@ static int encode_picture_lossless(AVCodecContext *avctx, unsigned char *buf, in
         int mb_x, mb_y, i;
         const int mb_width  = (width  + s->mjpeg_hsample[0] - 1) / s->mjpeg_hsample[0];
         const int mb_height = (height + s->mjpeg_vsample[0] - 1) / s->mjpeg_vsample[0];
-        
+
         for(mb_y = 0; mb_y < mb_height; mb_y++) {
             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < mb_width * 4 * 3 * s->mjpeg_hsample[0] * s->mjpeg_vsample[0]){
                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
@@ -756,7 +773,7 @@ static int encode_picture_lossless(AVCodecContext *avctx, unsigned char *buf, in
                                         PREDICT(pred, ptr[-linesize-1], ptr[-linesize], ptr[-1], predictor);
                                     }
                                 }
-                                
+
                                 if(i==0)
                                     mjpeg_encode_dc(s, (int8_t)(*ptr - pred), m->huff_size_dc_luminance, m->huff_code_dc_luminance); //FIXME ugly
                                 else
@@ -771,13 +788,13 @@ static int encode_picture_lossless(AVCodecContext *avctx, unsigned char *buf, in
                         h = s->mjpeg_hsample[i];
                         v = s->mjpeg_vsample[i];
                         linesize= p->linesize[i];
-                             
+
                         for(y=0; y<v; y++){
                             for(x=0; x<h; x++){
                                 int pred;
 
                                 ptr = p->data[i] + (linesize * (v * mb_y + y)) + (h * mb_x + x); //FIXME optimize this crap
-//printf("%d %d %d %d %8X\n", mb_x, mb_y, x, y, ptr); 
+//printf("%d %d %d %d %8X\n", mb_x, mb_y, x, y, ptr);
                                 PREDICT(pred, ptr[-linesize-1], ptr[-linesize], ptr[-1], predictor);
 
                                 if(i==0)
@@ -793,7 +810,7 @@ static int encode_picture_lossless(AVCodecContext *avctx, unsigned char *buf, in
     }
 
     emms_c();
-    
+
     mjpeg_picture_trailer(s);
     s->picture_number++;
 
@@ -827,11 +844,17 @@ typedef struct MJpegDecodeContext {
     int interlaced;     /* true if interlaced */
     int bottom_field;   /* true if bottom field */
     int lossless;
+    int ls;
     int rgb;
-    int rct;            /* standard rct */  
-    int pegasus_rct;    /* pegasus reversible colorspace transform */  
+    int rct;            /* standard rct */
+    int pegasus_rct;    /* pegasus reversible colorspace transform */
     int bits;           /* bits per component */
 
+    int maxval;
+    int near;         ///< near lossless bound (si 0 for lossless)
+    int t1,t2,t3;
+    int reset;        ///< context halfing intervall ?rename
+
     int width, height;
     int mb_width, mb_height;
     int nb_components;
@@ -850,7 +873,7 @@ typedef struct MJpegDecodeContext {
     AVFrame picture; /* picture structure */
     int linesize[MAX_COMPONENTS];                   ///< linesize << interlaced
     int8_t *qscale_table;
-    DCTELEM block[64] __align8;
+    DECLARE_ALIGNED_8(DCTELEM, block[64]);
     ScanTable scantable;
     void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
 
@@ -862,11 +885,15 @@ typedef struct MJpegDecodeContext {
     int interlace_polarity;
 
     int mjpb_skiptosod;
+
+    int cur_scan; /* current scan, used by JPEG-LS */
 } MJpegDecodeContext;
 
+#include "jpeg_ls.c" //FIXME make jpeg-ls more independant
+
 static int mjpeg_decode_dht(MJpegDecodeContext *s);
 
-static int build_vlc(VLC *vlc, const uint8_t *bits_table, const uint8_t *val_table, 
+static int build_vlc(VLC *vlc, const uint8_t *bits_table, const uint8_t *val_table,
                       int nb_codes, int use_static)
 {
     uint8_t huff_size[256];
@@ -874,7 +901,7 @@ static int build_vlc(VLC *vlc, const uint8_t *bits_table, const uint8_t *val_tab
 
     memset(huff_size, 0, sizeof(huff_size));
     build_huffman_codes(huff_size, huff_code, bits_table, val_table);
-    
+
     return init_vlc(vlc, 9, nb_codes, huff_size, 1, 1, huff_code, 2, 2, use_static);
 }
 
@@ -902,7 +929,7 @@ static int mjpeg_decode_init(AVCodecContext *avctx)
     s->start_code = -1;
     s->first_picture = 1;
     s->org_height = avctx->coded_height;
-    
+
     build_vlc(&s->vlcs[0][0], bits_dc_luminance, val_dc_luminance, 12, 0);
     build_vlc(&s->vlcs[0][1], bits_dc_chrominance, val_dc_chrominance, 12, 0);
     build_vlc(&s->vlcs[1][0], bits_ac_luminance, val_ac_luminance, 251, 0);
@@ -910,10 +937,10 @@ static int mjpeg_decode_init(AVCodecContext *avctx)
 
     if (avctx->flags & CODEC_FLAG_EXTERN_HUFF)
     {
-	av_log(avctx, AV_LOG_INFO, "mjpeg: using external huffman table\n");
-	init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size*8);
-	mjpeg_decode_dht(s);
-	/* should check for error - but dunno */
+        av_log(avctx, AV_LOG_INFO, "mjpeg: using external huffman table\n");
+        init_get_bits(&s->gb, avctx->extradata, avctx->extradata_size*8);
+        mjpeg_decode_dht(s);
+        /* should check for error - but dunno */
     }
 
     return 0;
@@ -927,10 +954,10 @@ static int mjpeg_decode_init(AVCodecContext *avctx)
 static int find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size){
     int vop_found, i;
     uint16_t state;
-    
+
     vop_found= pc->frame_start_found;
     state= pc->state;
-    
+
     i=0;
     if(!vop_found){
         for(i=0; i<buf_size; i++){
@@ -951,7 +978,7 @@ static int find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size){
             state= (state<<8) | buf[i];
             if(state == 0xFFD8){
                 pc->frame_start_found=0;
-                pc->state=0; 
+                pc->state=0;
                 return i-1;
             }
         }
@@ -963,12 +990,12 @@ static int find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size){
 
 static int jpeg_parse(AVCodecParserContext *s,
                            AVCodecContext *avctx,
-                           uint8_t **poutbuf, int *poutbuf_size, 
+                           uint8_t **poutbuf, int *poutbuf_size,
                            const uint8_t *buf, int buf_size)
 {
     ParseContext *pc = s->priv_data;
     int next;
-    
+
     next= find_frame_end(pc, buf, buf_size);
 
     if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
@@ -986,16 +1013,16 @@ static int jpeg_parse(AVCodecParserContext *s,
 static int mjpeg_decode_dqt(MJpegDecodeContext *s)
 {
     int len, index, i, j;
-    
+
     len = get_bits(&s->gb, 16) - 2;
 
     while (len >= 65) {
         /* only 8 bit precision handled */
         if (get_bits(&s->gb, 4) != 0)
-	{
-	    dprintf("dqt: 16bit precision\n");
+        {
+            dprintf("dqt: 16bit precision\n");
             return -1;
-	}
+        }
         index = get_bits(&s->gb, 4);
         if (index >= 4)
             return -1;
@@ -1003,17 +1030,17 @@ static int mjpeg_decode_dqt(MJpegDecodeContext *s)
         /* read quant table */
         for(i=0;i<64;i++) {
             j = s->scantable.permutated[i];
-	    s->quant_matrixes[index][j] = get_bits(&s->gb, 8);
+            s->quant_matrixes[index][j] = get_bits(&s->gb, 8);
         }
 
         //XXX FIXME finetune, and perhaps add dc too
         s->qscale[index]= FFMAX(
             s->quant_matrixes[index][s->scantable.permutated[1]],
             s->quant_matrixes[index][s->scantable.permutated[8]]) >> 1;
-	dprintf("qscale[%d]: %d\n", index, s->qscale[index]);
+        dprintf("qscale[%d]: %d\n", index, s->qscale[index]);
         len -= 65;
     }
-    
+
     return 0;
 }
 
@@ -1023,7 +1050,7 @@ static int mjpeg_decode_dht(MJpegDecodeContext *s)
     int len, index, i, class, n, v, code_max;
     uint8_t bits_table[17];
     uint8_t val_table[256];
-    
+
     len = get_bits(&s->gb, 16) - 2;
 
     while (len > 0) {
@@ -1071,17 +1098,22 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s)
     /* XXX: verify len field validity */
     len = get_bits(&s->gb, 16);
     s->bits= get_bits(&s->gb, 8);
-    
-    if(s->pegasus_rct) s->bits=9;  
+
+    if(s->pegasus_rct) s->bits=9;
     if(s->bits==9 && !s->pegasus_rct) s->rct=1;    //FIXME ugly
 
     if (s->bits != 8 && !s->lossless){
         av_log(s->avctx, AV_LOG_ERROR, "only 8 bits/component accepted\n");
         return -1;
     }
+    if (s->bits > 8 && s->ls){
+        av_log(s->avctx, AV_LOG_ERROR, "only <= 8 bits/component accepted for JPEG-LS\n");
+        return -1;
+    }
+
     height = get_bits(&s->gb, 16);
     width = get_bits(&s->gb, 16);
-    
+
     dprintf("sof0: picture: %dx%d\n", width, height);
     if(avcodec_check_dimensions(s->avctx, width, height))
         return -1;
@@ -1107,38 +1139,44 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s)
         if (s->quant_index[i] >= 4)
             return -1;
         dprintf("component %d %d:%d id: %d quant:%d\n", i, s->h_count[i],
-	    s->v_count[i], s->component_id[i], s->quant_index[i]);
+            s->v_count[i], s->component_id[i], s->quant_index[i]);
+    }
+
+    if(s->ls && (s->h_max > 1 || s->v_max > 1)) {
+        av_log(s->avctx, AV_LOG_ERROR, "Subsampling in JPEG-LS is not supported.\n");
+        return -1;
     }
-    
+
     if(s->v_max==1 && s->h_max==1 && s->lossless==1) s->rgb=1;
 
     /* if different size, realloc/alloc picture */
     /* XXX: also check h_count and v_count */
     if (width != s->width || height != s->height) {
         av_freep(&s->qscale_table);
-            
+
         s->width = width;
         s->height = height;
-        avcodec_set_dimensions(s->avctx, width, height);
 
         /* test interlaced mode */
         if (s->first_picture &&
             s->org_height != 0 &&
             s->height < ((s->org_height * 3) / 4)) {
             s->interlaced = 1;
-//	    s->bottom_field = (s->interlace_polarity) ? 1 : 0;
+//            s->bottom_field = (s->interlace_polarity) ? 1 : 0;
             s->bottom_field = 0;
-            s->avctx->height *= 2;
+            height *= 2;
         }
 
+        avcodec_set_dimensions(s->avctx, width, height);
+
         s->qscale_table= av_mallocz((s->width+15)/16);
 
         s->first_picture = 0;
     }
-    
+
     if(s->interlaced && s->bottom_field)
         return 0;
- 
+
     /* XXX: not complete test ! */
     switch((s->h_count[0] << 4) | s->v_count[0]) {
     case 0x11:
@@ -1157,6 +1195,12 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s)
         s->avctx->pix_fmt = s->cs_itu601 ? PIX_FMT_YUV420P : PIX_FMT_YUVJ420P;
         break;
     }
+    if(s->ls){
+        if(s->nb_components > 1)
+            s->avctx->pix_fmt = PIX_FMT_RGB24;
+        else
+            s->avctx->pix_fmt = PIX_FMT_GRAY8;
+    }
 
     if(s->picture.data[0])
         s->avctx->release_buffer(s->avctx, &s->picture);
@@ -1168,18 +1212,18 @@ static int mjpeg_decode_sof(MJpegDecodeContext *s)
     }
     s->picture.pict_type= I_TYPE;
     s->picture.key_frame= 1;
-    
+
     for(i=0; i<3; i++){
         s->linesize[i]= s->picture.linesize[i] << s->interlaced;
     }
 
 //    printf("%d %d %d %d %d %d\n", s->width, s->height, s->linesize[0], s->linesize[1], s->interlaced, s->avctx->height);
-    
+
     if (len != (8+(3*nb_components)))
     {
-	dprintf("decode_sof0: error, len(%d) mismatch\n", len);
+        dprintf("decode_sof0: error, len(%d) mismatch\n", len);
     }
-    
+
     return 0;
 }
 
@@ -1189,7 +1233,7 @@ static inline int mjpeg_decode_dc(MJpegDecodeContext *s, int dc_index)
     code = get_vlc2(&s->gb, s->vlcs[0][dc_index].table, 9, 2);
     if (code < 0)
     {
-	dprintf("mjpeg_decode_dc: bad vlc: %d:%d (%p)\n", 0, dc_index,
+        dprintf("mjpeg_decode_dc: bad vlc: %d:%d (%p)\n", 0, dc_index,
                 &s->vlcs[0][dc_index]);
         return 0xffff;
     }
@@ -1201,7 +1245,7 @@ static inline int mjpeg_decode_dc(MJpegDecodeContext *s, int dc_index)
 }
 
 /* decode block and dequantize */
-static int decode_block(MJpegDecodeContext *s, DCTELEM *block, 
+static int decode_block(MJpegDecodeContext *s, DCTELEM *block,
                         int component, int dc_index, int ac_index, int quant_index)
 {
     int code, i, j, level, val;
@@ -1222,7 +1266,7 @@ static int decode_block(MJpegDecodeContext *s, DCTELEM *block,
     ac_vlc = &s->vlcs[1][ac_index];
     i = 1;
     for(;;) {
-	code = get_vlc2(&s->gb, s->vlcs[1][ac_index].table, 9, 2);
+        code = get_vlc2(&s->gb, s->vlcs[1][ac_index].table, 9, 2);
 
         if (code < 0) {
             dprintf("error ac\n");
@@ -1256,10 +1300,10 @@ static int ljpeg_decode_rgb_scan(MJpegDecodeContext *s, int predictor, int point
     int left[3], top[3], topleft[3];
     const int linesize= s->linesize[0];
     const int mask= (1<<s->bits)-1;
-    
+
     if((unsigned)s->mb_width > 32768) //dynamic alloc
         return -1;
-    
+
     for(i=0; i<3; i++){
         buffer[0][i]= 1 << (s->bits + point_transform - 1);
     }
@@ -1284,8 +1328,8 @@ static int ljpeg_decode_rgb_scan(MJpegDecodeContext *s, int predictor, int point
                 top[i]= buffer[mb_x][i];
 
                 PREDICT(pred, topleft[i], top[i], left[i], modified_predictor);
-                
-                left[i]= 
+
+                left[i]=
                 buffer[mb_x][i]= mask & (pred + (mjpeg_decode_dc(s, s->dc_index[i]) << point_transform));
             }
 
@@ -1338,7 +1382,7 @@ static int ljpeg_decode_yuv_scan(MJpegDecodeContext *s, int predictor, int point
                     x = 0;
                     y = 0;
                     linesize= s->linesize[c];
-                    
+
                     for(j=0; j<n; j++) {
                         int pred;
 
@@ -1356,7 +1400,7 @@ static int ljpeg_decode_yuv_scan(MJpegDecodeContext *s, int predictor, int point
                                 PREDICT(pred, ptr[-linesize-1], ptr[-linesize], ptr[-1], predictor);
                             }
                         }
-                        
+
                         if (s->interlaced && s->bottom_field)
                             ptr += linesize >> 1;
                         *ptr= pred + (mjpeg_decode_dc(s, s->dc_index[i]) << point_transform);
@@ -1378,7 +1422,7 @@ static int ljpeg_decode_yuv_scan(MJpegDecodeContext *s, int predictor, int point
                     x = 0;
                     y = 0;
                     linesize= s->linesize[c];
-                    
+
                     for(j=0; j<n; j++) {
                         int pred;
 
@@ -1421,15 +1465,15 @@ static int mjpeg_decode_scan(MJpegDecodeContext *s){
                 y = 0;
                 for(j=0;j<n;j++) {
                     memset(s->block, 0, sizeof(s->block));
-                    if (decode_block(s, s->block, i, 
-                                     s->dc_index[i], s->ac_index[i], 
+                    if (decode_block(s, s->block, i,
+                                     s->dc_index[i], s->ac_index[i],
                                      s->quant_index[c]) < 0) {
                         dprintf("error y=%d x=%d\n", mb_y, mb_x);
                         return -1;
                     }
-//		    dprintf("mb: %d %d processed\n", mb_y, mb_x);
-                    ptr = s->picture.data[c] + 
-                        (((s->linesize[c] * (v * mb_y + y) * 8) + 
+//                    dprintf("mb: %d %d processed\n", mb_y, mb_x);
+                    ptr = s->picture.data[c] +
+                        (((s->linesize[c] * (v * mb_y + y) * 8) +
                         (h * mb_x + x) * 8) >> s->avctx->lowres);
                     if (s->interlaced && s->bottom_field)
                         ptr += s->linesize[c] >> 1;
@@ -1459,35 +1503,36 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s)
     int len, nb_components, i, h, v, predictor, point_transform;
     int vmax, hmax, index, id;
     const int block_size= s->lossless ? 1 : 8;
+    int ilv;
 
     /* XXX: verify len field validity */
     len = get_bits(&s->gb, 16);
     nb_components = get_bits(&s->gb, 8);
     if (len != 6+2*nb_components)
     {
-	dprintf("decode_sos: invalid len (%d)\n", len);
-	return -1;
+        dprintf("decode_sos: invalid len (%d)\n", len);
+        return -1;
     }
     /* XXX: only interleaved scan accepted */
-    if (nb_components != s->nb_components)
+    if ((nb_components != s->nb_components) && !s->ls)
     {
-	dprintf("decode_sos: components(%d) mismatch\n", nb_components);
+        dprintf("decode_sos: components(%d) mismatch\n", nb_components);
         return -1;
     }
     vmax = 0;
     hmax = 0;
     for(i=0;i<nb_components;i++) {
         id = get_bits(&s->gb, 8) - 1;
-	dprintf("component: %d\n", id);
+        dprintf("component: %d\n", id);
         /* find component index */
         for(index=0;index<s->nb_components;index++)
             if (id == s->component_id[index])
                 break;
         if (index == s->nb_components)
-	{
-	    dprintf("decode_sos: index(%d) out of components\n", index);
+        {
+            dprintf("decode_sos: index(%d) out of components\n", index);
             return -1;
-	}
+        }
 
         s->comp_index[i] = index;
 
@@ -1498,42 +1543,42 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s)
         s->dc_index[i] = get_bits(&s->gb, 4);
         s->ac_index[i] = get_bits(&s->gb, 4);
 
-	if (s->dc_index[i] <  0 || s->ac_index[i] < 0 ||
-	    s->dc_index[i] >= 4 || s->ac_index[i] >= 4)
-	    goto out_of_range;
+        if (s->dc_index[i] <  0 || s->ac_index[i] < 0 ||
+            s->dc_index[i] >= 4 || s->ac_index[i] >= 4)
+            goto out_of_range;
 #if 0 //buggy
-	switch(s->start_code)
-	{
-	    case SOF0:
-		if (dc_index[i] > 1 || ac_index[i] > 1)
-		    goto out_of_range;
-		break;
-	    case SOF1:
-	    case SOF2:
-		if (dc_index[i] > 3 || ac_index[i] > 3)
-		    goto out_of_range;
-		break;
-	    case SOF3:
-		if (dc_index[i] > 3 || ac_index[i] != 0)
-		    goto out_of_range;
-		break;	
-	}
+        switch(s->start_code)
+        {
+            case SOF0:
+                if (dc_index[i] > 1 || ac_index[i] > 1)
+                    goto out_of_range;
+                break;
+            case SOF1:
+            case SOF2:
+                if (dc_index[i] > 3 || ac_index[i] > 3)
+                    goto out_of_range;
+                break;
+            case SOF3:
+                if (dc_index[i] > 3 || ac_index[i] != 0)
+                    goto out_of_range;
+                break;
+        }
 #endif
     }
 
-    predictor= get_bits(&s->gb, 8); /* lossless predictor or start of spectral (Ss) */
-    skip_bits(&s->gb, 8); /* Se */
+    predictor= get_bits(&s->gb, 8); /* JPEG Ss / lossless JPEG predictor /JPEG-LS NEAR */
+    ilv= get_bits(&s->gb, 8);    /* JPEG Se / JPEG-LS ILV */
     skip_bits(&s->gb, 4); /* Ah */
     point_transform= get_bits(&s->gb, 4); /* Al */
 
-    for(i=0;i<nb_components;i++) 
+    for(i=0;i<nb_components;i++)
         s->last_dc[i] = 1024;
 
     if (nb_components > 1) {
         /* interleaved stream */
         s->mb_width  = (s->width  + s->h_max * block_size - 1) / (s->h_max * block_size);
         s->mb_height = (s->height + s->v_max * block_size - 1) / (s->v_max * block_size);
-    } else {
+    } else if(!s->ls) { /* skip this for JPEG-LS */
         h = s->h_max / s->h_scount[s->comp_index[0]];
         v = s->v_max / s->v_scount[s->comp_index[0]];
         s->mb_width  = (s->width  + h * block_size - 1) / (h * block_size);
@@ -1544,13 +1589,22 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s)
     }
 
     if(s->avctx->debug & FF_DEBUG_PICT_INFO)
-        av_log(s->avctx, AV_LOG_DEBUG, "%s %s p:%d >>:%d\n", s->lossless ? "lossless" : "sequencial DCT", s->rgb ? "RGB" : "", predictor, point_transform);
-    
+        av_log(s->avctx, AV_LOG_DEBUG, "%s %s p:%d >>:%d ilv:%d bits:%d %s\n", s->lossless ? "lossless" : "sequencial DCT", s->rgb ? "RGB" : "",
+               predictor, point_transform, ilv, s->bits,
+               s->pegasus_rct ? "PRCT" : (s->rct ? "RCT" : ""));
+
+
     /* mjpeg-b can have padding bytes between sos and image data, skip them */
     for (i = s->mjpb_skiptosod; i > 0; i--)
         skip_bits(&s->gb, 8);
 
     if(s->lossless){
+        if(s->ls){
+//            for(){
+//            reset_ls_coding_parameters(s, 0);
+
+            ls_decode_picture(s, predictor, point_transform, ilv);
+        }else{
             if(s->rgb){
                 if(ljpeg_decode_rgb_scan(s, predictor, point_transform) < 0)
                     return -1;
@@ -1558,6 +1612,7 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s)
                 if(ljpeg_decode_yuv_scan(s, predictor, point_transform) < 0)
                     return -1;
             }
+        }
     }else{
         if(mjpeg_decode_scan(s) < 0)
             return -1;
@@ -1572,7 +1627,7 @@ static int mjpeg_decode_sos(MJpegDecodeContext *s)
 static int mjpeg_decode_dri(MJpegDecodeContext *s)
 {
     if (get_bits(&s->gb, 16) != 4)
-	return -1;
+        return -1;
     s->restart_interval = get_bits(&s->gb, 16);
     s->restart_count = 0;
     dprintf("restart interval: %d\n", s->restart_interval);
@@ -1586,7 +1641,7 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
 
     len = get_bits(&s->gb, 16);
     if (len < 5)
-	return -1;
+        return -1;
     if(8*len + get_bits_count(&s->gb) > s->gb.size_in_bits)
         return -1;
 
@@ -1595,43 +1650,43 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
     len -= 6;
 
     if(s->avctx->debug & FF_DEBUG_STARTCODE){
-        av_log(s->avctx, AV_LOG_DEBUG, "APPx %8X\n", id); 
+        av_log(s->avctx, AV_LOG_DEBUG, "APPx %8X\n", id);
     }
-    
+
     /* buggy AVID, it puts EOI only at every 10th frame */
     /* also this fourcc is used by non-avid files too, it holds some
        informations, but it's always present in AVID creates files */
     if (id == ff_get_fourcc("AVI1"))
     {
-	/* structure:
-	    4bytes	AVI1
-	    1bytes	polarity
-	    1bytes	always zero
-	    4bytes	field_size
-	    4bytes	field_size_less_padding
-	*/
-    	s->buggy_avid = 1;
-//	if (s->first_picture)
-//	    printf("mjpeg: workarounding buggy AVID\n");
-	s->interlace_polarity = get_bits(&s->gb, 8);
+        /* structure:
+            4bytes      AVI1
+            1bytes      polarity
+            1bytes      always zero
+            4bytes      field_size
+            4bytes      field_size_less_padding
+        */
+            s->buggy_avid = 1;
+//        if (s->first_picture)
+//            printf("mjpeg: workarounding buggy AVID\n");
+        s->interlace_polarity = get_bits(&s->gb, 8);
 #if 0
-	skip_bits(&s->gb, 8);
-	skip_bits(&s->gb, 32);
-	skip_bits(&s->gb, 32);
-	len -= 10;
+        skip_bits(&s->gb, 8);
+        skip_bits(&s->gb, 32);
+        skip_bits(&s->gb, 32);
+        len -= 10;
 #endif
-//	if (s->interlace_polarity)
-//	    printf("mjpeg: interlace polarity: %d\n", s->interlace_polarity);
-	goto out;
+//        if (s->interlace_polarity)
+//            printf("mjpeg: interlace polarity: %d\n", s->interlace_polarity);
+        goto out;
     }
-    
+
 //    len -= 2;
-    
+
     if (id == ff_get_fourcc("JFIF"))
     {
-	int t_w, t_h, v1, v2;
-	skip_bits(&s->gb, 8); /* the trailing zero-byte */
-	v1= get_bits(&s->gb, 8);
+        int t_w, t_h, v1, v2;
+        skip_bits(&s->gb, 8); /* the trailing zero-byte */
+        v1= get_bits(&s->gb, 8);
         v2= get_bits(&s->gb, 8);
         skip_bits(&s->gb, 8);
 
@@ -1645,37 +1700,37 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
                 s->avctx->sample_aspect_ratio.den
             );
 
-	t_w = get_bits(&s->gb, 8);
-	t_h = get_bits(&s->gb, 8);
-	if (t_w && t_h)
-	{
-	    /* skip thumbnail */
-	    if (len-10-(t_w*t_h*3) > 0)
-		len -= t_w*t_h*3;
-	}
-	len -= 10;
-	goto out;
-    }
-    
+        t_w = get_bits(&s->gb, 8);
+        t_h = get_bits(&s->gb, 8);
+        if (t_w && t_h)
+        {
+            /* skip thumbnail */
+            if (len-10-(t_w*t_h*3) > 0)
+                len -= t_w*t_h*3;
+        }
+        len -= 10;
+        goto out;
+    }
+
     if (id == ff_get_fourcc("Adob") && (get_bits(&s->gb, 8) == 'e'))
     {
         if (s->avctx->debug & FF_DEBUG_PICT_INFO)
             av_log(s->avctx, AV_LOG_INFO, "mjpeg: Adobe header found\n");
-	skip_bits(&s->gb, 16); /* version */
-	skip_bits(&s->gb, 16); /* flags0 */
-	skip_bits(&s->gb, 16); /* flags1 */
-	skip_bits(&s->gb, 8); /* transform */
-	len -= 7;
-	goto out;
+        skip_bits(&s->gb, 16); /* version */
+        skip_bits(&s->gb, 16); /* flags0 */
+        skip_bits(&s->gb, 16); /* flags1 */
+        skip_bits(&s->gb, 8);  /* transform */
+        len -= 7;
+        goto out;
     }
 
     if (id == ff_get_fourcc("LJIF")){
         if (s->avctx->debug & FF_DEBUG_PICT_INFO)
             av_log(s->avctx, AV_LOG_INFO, "Pegasus lossless jpeg header found\n");
-	skip_bits(&s->gb, 16); /* version ? */
-	skip_bits(&s->gb, 16); /* unknwon always 0? */
-	skip_bits(&s->gb, 16); /* unknwon always 0? */
-	skip_bits(&s->gb, 16); /* unknwon always 0? */
+        skip_bits(&s->gb, 16); /* version ? */
+        skip_bits(&s->gb, 16); /* unknwon always 0? */
+        skip_bits(&s->gb, 16); /* unknwon always 0? */
+        skip_bits(&s->gb, 16); /* unknwon always 0? */
         switch( get_bits(&s->gb, 8)){
         case 1:
             s->rgb= 1;
@@ -1691,36 +1746,36 @@ static int mjpeg_decode_app(MJpegDecodeContext *s)
         len -= 9;
         goto out;
     }
-    
+
     /* Apple MJPEG-A */
     if ((s->start_code == APP1) && (len > (0x28 - 8)))
     {
-	id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16);
-	id = be2me_32(id);
-	len -= 4;
-	if (id == ff_get_fourcc("mjpg")) /* Apple MJPEG-A */
-	{
+        id = (get_bits(&s->gb, 16) << 16) | get_bits(&s->gb, 16);
+        id = be2me_32(id);
+        len -= 4;
+        if (id == ff_get_fourcc("mjpg")) /* Apple MJPEG-A */
+        {
 #if 0
-	    skip_bits(&s->gb, 32); /* field size */
-	    skip_bits(&s->gb, 32); /* pad field size */
-	    skip_bits(&s->gb, 32); /* next off */
-	    skip_bits(&s->gb, 32); /* quant off */
-	    skip_bits(&s->gb, 32); /* huff off */
-	    skip_bits(&s->gb, 32); /* image off */
-	    skip_bits(&s->gb, 32); /* scan off */
-	    skip_bits(&s->gb, 32); /* data off */
+            skip_bits(&s->gb, 32); /* field size */
+            skip_bits(&s->gb, 32); /* pad field size */
+            skip_bits(&s->gb, 32); /* next off */
+            skip_bits(&s->gb, 32); /* quant off */
+            skip_bits(&s->gb, 32); /* huff off */
+            skip_bits(&s->gb, 32); /* image off */
+            skip_bits(&s->gb, 32); /* scan off */
+            skip_bits(&s->gb, 32); /* data off */
 #endif
             if (s->avctx->debug & FF_DEBUG_PICT_INFO)
-		av_log(s->avctx, AV_LOG_INFO, "mjpeg: Apple MJPEG-A header found\n");
-	}
+                av_log(s->avctx, AV_LOG_INFO, "mjpeg: Apple MJPEG-A header found\n");
+        }
     }
 
 out:
     /* slow but needed for extreme adobe jpegs */
     if (len < 0)
-	av_log(s->avctx, AV_LOG_ERROR, "mjpeg: error, decode_app parser read over the end\n");
+        av_log(s->avctx, AV_LOG_ERROR, "mjpeg: error, decode_app parser read over the end\n");
     while(--len > 0)
-	skip_bits(&s->gb, 8);
+        skip_bits(&s->gb, 8);
 
     return 0;
 }
@@ -1729,32 +1784,32 @@ static int mjpeg_decode_com(MJpegDecodeContext *s)
 {
     int len = get_bits(&s->gb, 16);
     if (len >= 2 && 8*len - 16 + get_bits_count(&s->gb) <= s->gb.size_in_bits) {
-	uint8_t *cbuf = av_malloc(len - 1);
-	if (cbuf) {
-	    int i;
-	    for (i = 0; i < len - 2; i++)
-		cbuf[i] = get_bits(&s->gb, 8);
-	    if (i > 0 && cbuf[i-1] == '\n')
-		cbuf[i-1] = 0;
-	    else
-		cbuf[i] = 0;
+        uint8_t *cbuf = av_malloc(len - 1);
+        if (cbuf) {
+            int i;
+            for (i = 0; i < len - 2; i++)
+                cbuf[i] = get_bits(&s->gb, 8);
+            if (i > 0 && cbuf[i-1] == '\n')
+                cbuf[i-1] = 0;
+            else
+                cbuf[i] = 0;
 
             if(s->avctx->debug & FF_DEBUG_PICT_INFO)
                 av_log(s->avctx, AV_LOG_INFO, "mjpeg comment: '%s'\n", cbuf);
 
-	    /* buggy avid, it puts EOI only at every 10th frame */
-	    if (!strcmp(cbuf, "AVID"))
-	    {
-		s->buggy_avid = 1;
-		//	if (s->first_picture)
-		//	    printf("mjpeg: workarounding buggy AVID\n");
-	    }
+            /* buggy avid, it puts EOI only at every 10th frame */
+            if (!strcmp(cbuf, "AVID"))
+            {
+                s->buggy_avid = 1;
+                //        if (s->first_picture)
+                //            printf("mjpeg: workarounding buggy AVID\n");
+            }
             else if(!strcmp(cbuf, "CS=ITU601")){
                 s->cs_itu601= 1;
             }
 
-	    av_free(cbuf);
-	}
+            av_free(cbuf);
+        }
     }
 
     return 0;
@@ -1797,13 +1852,13 @@ static int find_marker(uint8_t **pbuf_ptr, uint8_t *buf_end)
     buf_ptr = *pbuf_ptr;
     while (buf_ptr < buf_end) {
         v = *buf_ptr++;
-	v2 = *buf_ptr;
+        v2 = *buf_ptr;
         if ((v == 0xff) && (v2 >= 0xc0) && (v2 <= 0xfe) && buf_ptr < buf_end) {
-	    val = *buf_ptr++;
-	    goto found;
+            val = *buf_ptr++;
+            goto found;
         }
 #ifdef DEBUG
-	skipped++;
+        skipped++;
 #endif
     }
     val = -1;
@@ -1815,7 +1870,7 @@ found:
     return val;
 }
 
-static int mjpeg_decode_frame(AVCodecContext *avctx, 
+static int mjpeg_decode_frame(AVCodecContext *avctx,
                               void *data, int *data_size,
                               uint8_t *buf, int buf_size)
 {
@@ -1829,72 +1884,112 @@ static int mjpeg_decode_frame(AVCodecContext *avctx,
     while (buf_ptr < buf_end) {
         /* find start next marker */
         start_code = find_marker(&buf_ptr, buf_end);
-	{
-	    /* EOF */
+        {
+            /* EOF */
             if (start_code < 0) {
-		goto the_end;
+                goto the_end;
             } else {
                 dprintf("marker=%x avail_size_in_buf=%d\n", start_code, buf_end - buf_ptr);
-		
-		if ((buf_end - buf_ptr) > s->buffer_size)
-		{
-		    av_free(s->buffer);
-		    s->buffer_size = buf_end-buf_ptr;
+
+                if ((buf_end - buf_ptr) > s->buffer_size)
+                {
+                    av_free(s->buffer);
+                    s->buffer_size = buf_end-buf_ptr;
                     s->buffer = av_malloc(s->buffer_size + FF_INPUT_BUFFER_PADDING_SIZE);
-		    dprintf("buffer too small, expanding to %d bytes\n",
-			s->buffer_size);
-		}
-		
-		/* unescape buffer of SOS */
-		if (start_code == SOS)
-		{
-		    uint8_t *src = buf_ptr;
-		    uint8_t *dst = s->buffer;
-
-		    while (src<buf_end)
-		    {
-			uint8_t x = *(src++);
-
-			*(dst++) = x;
-			if (x == 0xff)
-			{
+                    dprintf("buffer too small, expanding to %d bytes\n",
+                        s->buffer_size);
+                }
+
+                /* unescape buffer of SOS, use special treatment for JPEG-LS */
+                if (start_code == SOS && !s->ls)
+                {
+                    uint8_t *src = buf_ptr;
+                    uint8_t *dst = s->buffer;
+
+                    while (src<buf_end)
+                    {
+                        uint8_t x = *(src++);
+
+                        *(dst++) = x;
+                        if (x == 0xff)
+                        {
                             while(src<buf_end && x == 0xff)
                                 x = *(src++);
 
-			    if (x >= 0xd0 && x <= 0xd7)
-				*(dst++) = x;
-			    else if (x)
-				break;
-			}
-		    }
-		    init_get_bits(&s->gb, s->buffer, (dst - s->buffer)*8);
-		    
-		    dprintf("escaping removed %d bytes\n",
-			(buf_end - buf_ptr) - (dst - s->buffer));
-		}
-		else
-		    init_get_bits(&s->gb, buf_ptr, (buf_end - buf_ptr)*8);
-		
-		s->start_code = start_code;
+                            if (x >= 0xd0 && x <= 0xd7)
+                                *(dst++) = x;
+                            else if (x)
+                                break;
+                        }
+                    }
+                    init_get_bits(&s->gb, s->buffer, (dst - s->buffer)*8);
+
+                    dprintf("escaping removed %d bytes\n",
+                        (buf_end - buf_ptr) - (dst - s->buffer));
+                }
+                else if(start_code == SOS && s->ls){
+                    uint8_t *src = buf_ptr;
+                    uint8_t *dst = s->buffer;
+                    int bit_count = 0;
+                    int t = 0, b = 0;
+                    PutBitContext pb;
+
+                    s->cur_scan++;
+
+                    /* find marker */
+                    while (src + t < buf_end){
+                        uint8_t x = src[t++];
+                        if (x == 0xff){
+                            while((src + t < buf_end) && x == 0xff)
+                                x = src[t++];
+                            if (x & 0x80) {
+                                t -= 2;
+                                break;
+                            }
+                        }
+                    }
+                    bit_count = t * 8;
+
+                    init_put_bits(&pb, dst, t);
+
+                    /* unescape bitstream */
+                    while(b < t){
+                        uint8_t x = src[b++];
+                        put_bits(&pb, 8, x);
+                        if(x == 0xFF){
+                            x = src[b++];
+                            put_bits(&pb, 7, x);
+                            bit_count--;
+                        }
+                    }
+                    flush_put_bits(&pb);
+
+                    init_get_bits(&s->gb, dst, bit_count);
+                }
+                else
+                    init_get_bits(&s->gb, buf_ptr, (buf_end - buf_ptr)*8);
+
+                s->start_code = start_code;
                 if(s->avctx->debug & FF_DEBUG_STARTCODE){
                     av_log(s->avctx, AV_LOG_DEBUG, "startcode: %X\n", start_code);
                 }
 
-		/* process markers */
-		if (start_code >= 0xd0 && start_code <= 0xd7) {
-		    dprintf("restart marker: %d\n", start_code&0x0f);
-		    /* APP fields */
-		} else if (start_code >= APP0 && start_code <= APP15) {
-		    mjpeg_decode_app(s);
-		    /* Comment */
-		} else if (start_code == COM){
-		    mjpeg_decode_com(s);
-		}
+                /* process markers */
+                if (start_code >= 0xd0 && start_code <= 0xd7) {
+                    dprintf("restart marker: %d\n", start_code&0x0f);
+                    /* APP fields */
+                } else if (start_code >= APP0 && start_code <= APP15) {
+                    mjpeg_decode_app(s);
+                    /* Comment */
+                } else if (start_code == COM){
+                    mjpeg_decode_com(s);
+                }
 
                 switch(start_code) {
                 case SOI:
-		    s->restart_interval = 0;
-		    s->restart_count = 0;
+                    s->restart_interval = 0;
+
+                    s->restart_count = 0;
                     /* nothing to do on SOI */
                     break;
                 case DQT:
@@ -1908,19 +2003,29 @@ static int mjpeg_decode_frame(AVCodecContext *avctx,
                     break;
                 case SOF0:
                     s->lossless=0;
-                    if (mjpeg_decode_sof(s) < 0) 
-			return -1;
+                    if (mjpeg_decode_sof(s) < 0)
+                        return -1;
                     break;
                 case SOF3:
                     s->lossless=1;
-                    if (mjpeg_decode_sof(s) < 0) 
-			return -1;
+                    if (mjpeg_decode_sof(s) < 0)
+                        return -1;
                     break;
-		case EOI:
-		    if ((s->buggy_avid && !s->interlaced) || s->restart_interval) 
+                case SOF48:
+                    s->lossless=1;
+                    s->ls=1;
+                    if (mjpeg_decode_sof(s) < 0)
+                        return -1;
+                    break;
+                case LSE:
+                    if (decode_lse(s) < 0)
+                        return -1;
+                    break;
+                case EOI:
+                    if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
                         break;
 eoi_parser:
-		    {
+                    {
                         if (s->interlaced) {
                             s->bottom_field ^= 1;
                             /* if not bottom field, do not output image yet */
@@ -1931,7 +2036,7 @@ eoi_parser:
                         *data_size = sizeof(AVFrame);
 
                         if(!s->lossless){
-                            picture->quality= FFMAX(FFMAX(s->qscale[0], s->qscale[1]), s->qscale[2]); 
+                            picture->quality= FFMAX(FFMAX(s->qscale[0], s->qscale[1]), s->qscale[2]);
                             picture->qstride= 0;
                             picture->qscale_table= s->qscale_table;
                             memset(picture->qscale_table, picture->quality, (s->width+15)/16);
@@ -1939,44 +2044,44 @@ eoi_parser:
                                 av_log(s->avctx, AV_LOG_DEBUG, "QP: %d\n", picture->quality);
                             picture->quality*= FF_QP2LAMBDA;
                         }
-                        
+
                         goto the_end;
                     }
-		    break;
+                    break;
                 case SOS:
                     mjpeg_decode_sos(s);
-		    /* buggy avid puts EOI every 10-20th frame */
-		    /* if restart period is over process EOI */
-		    if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
-			goto eoi_parser;
+                    /* buggy avid puts EOI every 10-20th frame */
+                    /* if restart period is over process EOI */
+                    if ((s->buggy_avid && !s->interlaced) || s->restart_interval)
+                        goto eoi_parser;
+                    break;
+                case DRI:
+                    mjpeg_decode_dri(s);
+                    break;
+                case SOF1:
+                case SOF2:
+                case SOF5:
+                case SOF6:
+                case SOF7:
+                case SOF9:
+                case SOF10:
+                case SOF11:
+                case SOF13:
+                case SOF14:
+                case SOF15:
+                case JPG:
+                    av_log(s->avctx, AV_LOG_ERROR, "mjpeg: unsupported coding type (%x)\n", start_code);
                     break;
-		case DRI:
-		    mjpeg_decode_dri(s);
-		    break;
-		case SOF1:
-		case SOF2:
-		case SOF5:
-		case SOF6:
-		case SOF7:
-		case SOF9:
-		case SOF10:
-		case SOF11:
-		case SOF13:
-		case SOF14:
-		case SOF15:
-		case JPG:
-		    av_log(s->avctx, AV_LOG_ERROR, "mjpeg: unsupported coding type (%x)\n", start_code);
-		    break;
-//		default:
-//		    printf("mjpeg: unsupported marker (%x)\n", start_code);
-//		    break;
+//                default:
+//                    printf("mjpeg: unsupported marker (%x)\n", start_code);
+//                    break;
                 }
 
 not_the_end:
-		/* eof process start code */
-		buf_ptr += (get_bits_count(&s->gb)+7)/8;
-		dprintf("marker parser used %d bytes (%d bits)\n",
-		    (get_bits_count(&s->gb)+7)/8, get_bits_count(&s->gb));
+                /* eof process start code */
+                buf_ptr += (get_bits_count(&s->gb)+7)/8;
+                dprintf("marker parser used %d bytes (%d bits)\n",
+                    (get_bits_count(&s->gb)+7)/8, get_bits_count(&s->gb));
             }
         }
     }
@@ -1986,7 +2091,7 @@ the_end:
     return buf_ptr - buf;
 }
 
-static int mjpegb_decode_frame(AVCodecContext *avctx, 
+static int mjpegb_decode_frame(AVCodecContext *avctx,
                               void *data, int *data_size,
                               uint8_t *buf, int buf_size)
 {
@@ -1999,7 +2104,7 @@ static int mjpegb_decode_frame(AVCodecContext *avctx,
 
     buf_ptr = buf;
     buf_end = buf + buf_size;
-    
+
 read_header:
     /* reset on every SOI */
     s->restart_interval = 0;
@@ -2009,11 +2114,11 @@ read_header:
     init_get_bits(&hgb, buf_ptr, /*buf_size*/(buf_end - buf_ptr)*8);
 
     skip_bits(&hgb, 32); /* reserved zeros */
-    
+
     if (get_bits_long(&hgb, 32) != be2me_32(ff_get_fourcc("mjpg")))
     {
-	dprintf("not mjpeg-b (bad fourcc)\n");
-	return 0;
+        dprintf("not mjpeg-b (bad fourcc)\n");
+        return 0;
     }
 
     field_size = get_bits_long(&hgb, 32); /* field size */
@@ -2022,34 +2127,34 @@ read_header:
     second_field_offs = get_bits_long(&hgb, 32);
     dprintf("second field offs: 0x%x\n", second_field_offs);
     if (second_field_offs)
-	s->interlaced = 1;
+        s->interlaced = 1;
 
     dqt_offs = get_bits_long(&hgb, 32);
     dprintf("dqt offs: 0x%x\n", dqt_offs);
     if (dqt_offs)
     {
-	init_get_bits(&s->gb, buf+dqt_offs, (buf_end - (buf+dqt_offs))*8);
-	s->start_code = DQT;
-	mjpeg_decode_dqt(s);
+        init_get_bits(&s->gb, buf+dqt_offs, (buf_end - (buf+dqt_offs))*8);
+        s->start_code = DQT;
+        mjpeg_decode_dqt(s);
     }
-    
+
     dht_offs = get_bits_long(&hgb, 32);
     dprintf("dht offs: 0x%x\n", dht_offs);
     if (dht_offs)
     {
-	init_get_bits(&s->gb, buf+dht_offs, (buf_end - (buf+dht_offs))*8);
-	s->start_code = DHT;
-	mjpeg_decode_dht(s);
+        init_get_bits(&s->gb, buf+dht_offs, (buf_end - (buf+dht_offs))*8);
+        s->start_code = DHT;
+        mjpeg_decode_dht(s);
     }
 
     sof_offs = get_bits_long(&hgb, 32);
     dprintf("sof offs: 0x%x\n", sof_offs);
     if (sof_offs)
     {
-	init_get_bits(&s->gb, buf+sof_offs, (buf_end - (buf+sof_offs))*8);
-	s->start_code = SOF0;
-	if (mjpeg_decode_sof(s) < 0)
-	    return -1;
+        init_get_bits(&s->gb, buf+sof_offs, (buf_end - (buf+sof_offs))*8);
+        s->start_code = SOF0;
+        if (mjpeg_decode_sof(s) < 0)
+            return -1;
     }
 
     sos_offs = get_bits_long(&hgb, 32);
@@ -2058,31 +2163,31 @@ read_header:
     dprintf("sod offs: 0x%x\n", sod_offs);
     if (sos_offs)
     {
-//	init_get_bits(&s->gb, buf+sos_offs, (buf_end - (buf+sos_offs))*8);
-	init_get_bits(&s->gb, buf+sos_offs, field_size*8);
-	s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16));
-	s->start_code = SOS;
-	mjpeg_decode_sos(s);
+//        init_get_bits(&s->gb, buf+sos_offs, (buf_end - (buf+sos_offs))*8);
+        init_get_bits(&s->gb, buf+sos_offs, field_size*8);
+        s->mjpb_skiptosod = (sod_offs - sos_offs - show_bits(&s->gb, 16));
+        s->start_code = SOS;
+        mjpeg_decode_sos(s);
     }
 
     if (s->interlaced) {
         s->bottom_field ^= 1;
         /* if not bottom field, do not output image yet */
         if (s->bottom_field && second_field_offs)
-	{
-	    buf_ptr = buf + second_field_offs;
-	    second_field_offs = 0;
-	    goto read_header;
-    	}
+        {
+            buf_ptr = buf + second_field_offs;
+            second_field_offs = 0;
+            goto read_header;
+            }
     }
 
     //XXX FIXME factorize, this looks very similar to the EOI code
 
     *picture= s->picture;
     *data_size = sizeof(AVFrame);
-    
+
     if(!s->lossless){
-        picture->quality= FFMAX(FFMAX(s->qscale[0], s->qscale[1]), s->qscale[2]); 
+        picture->quality= FFMAX(FFMAX(s->qscale[0], s->qscale[1]), s->qscale[2]);
         picture->qstride= 0;
         picture->qscale_table= s->qscale_table;
         memset(picture->qscale_table, picture->quality, (s->width+15)/16);
@@ -2096,7 +2201,7 @@ read_header:
 
 #include "sp5x.h"
 
-static int sp5x_decode_frame(AVCodecContext *avctx, 
+static int sp5x_decode_frame(AVCodecContext *avctx,
                               void *data, int *data_size,
                               uint8_t *buf, int buf_size)
 {
@@ -2108,7 +2213,7 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
     int i = 0, j = 0;
 
     if (!avctx->width || !avctx->height)
-	return -1;
+        return -1;
 
     buf_ptr = buf;
     buf_end = buf + buf_size;
@@ -2116,7 +2221,7 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
 #if 1
     recoded = av_mallocz(buf_size + 1024);
     if (!recoded)
-	return -1;
+        return -1;
 
     /* SOI */
     recoded[j++] = 0xFF;
@@ -2142,9 +2247,9 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
 
     for (i = 14; i < buf_size && j < buf_size+1024-2; i++)
     {
-	recoded[j++] = buf[i];
-	if (buf[i] == 0xff)
-	    recoded[j++] = 0;
+        recoded[j++] = buf[i];
+        if (buf[i] == 0xff)
+            recoded[j++] = 0;
     }
 
     /* EOI */
@@ -2175,42 +2280,42 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
     s->quant_index[2] = 1;
     s->h_max = 2;
     s->v_max = 2;
-    
+
     s->qscale_table = av_mallocz((s->width+15)/16);
     avctx->pix_fmt = s->cs_itu601 ? PIX_FMT_YUV420P : PIX_FMT_YUVJ420;
     s->interlaced = 0;
-    
+
     s->picture.reference = 0;
     if (avctx->get_buffer(avctx, &s->picture) < 0)
     {
         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
-	return -1;
+        return -1;
     }
 
     s->picture.pict_type = I_TYPE;
     s->picture.key_frame = 1;
 
     for (i = 0; i < 3; i++)
-	s->linesize[i] = s->picture.linesize[i] << s->interlaced;
+        s->linesize[i] = s->picture.linesize[i] << s->interlaced;
 
     /* DQT */
     for (i = 0; i < 64; i++)
     {
-	j = s->scantable.permutated[i];
-	s->quant_matrixes[0][j] = sp5x_quant_table[(qscale * 2) + i];
+        j = s->scantable.permutated[i];
+        s->quant_matrixes[0][j] = sp5x_quant_table[(qscale * 2) + i];
     }
     s->qscale[0] = FFMAX(
-	s->quant_matrixes[0][s->scantable.permutated[1]],
-	s->quant_matrixes[0][s->scantable.permutated[8]]) >> 1;
+        s->quant_matrixes[0][s->scantable.permutated[1]],
+        s->quant_matrixes[0][s->scantable.permutated[8]]) >> 1;
 
     for (i = 0; i < 64; i++)
     {
-	j = s->scantable.permutated[i];
-	s->quant_matrixes[1][j] = sp5x_quant_table[(qscale * 2) + 1 + i];
+        j = s->scantable.permutated[i];
+        s->quant_matrixes[1][j] = sp5x_quant_table[(qscale * 2) + 1 + i];
     }
     s->qscale[1] = FFMAX(
-	s->quant_matrixes[1][s->scantable.permutated[1]],
-	s->quant_matrixes[1][s->scantable.permutated[8]]) >> 1;
+        s->quant_matrixes[1][s->scantable.permutated[1]],
+        s->quant_matrixes[1][s->scantable.permutated[8]]) >> 1;
 
     /* DHT */
 
@@ -2235,15 +2340,15 @@ static int sp5x_decode_frame(AVCodecContext *avctx,
     s->v_scount[2] = s->v_count[2];
     s->dc_index[2] = 1;
     s->ac_index[2] = 1;
-    
+
     for (i = 0; i < 3; i++)
-	s->last_dc[i] = 1024;
+        s->last_dc[i] = 1024;
 
     s->mb_width = (s->width * s->h_max * 8 -1) / (s->h_max * 8);
     s->mb_height = (s->height * s->v_max * 8 -1) / (s->v_max * 8);
 
     init_get_bits(&s->gb, buf+14, (buf_size-14)*8);
-    
+
     return mjpeg_decode_scan(s);
 #endif
 
@@ -2257,7 +2362,7 @@ static int mjpeg_decode_end(AVCodecContext *avctx)
 
     av_free(s->buffer);
     av_free(s->qscale_table);
-    
+
     for(i=0;i<2;i++) {
         for(j=0;j<4;j++)
             free_vlc(&s->vlcs[i][j]);
diff --git a/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c b/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c
index ab467de8c..c52490592 100644
--- a/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c
+++ b/src/libffmpeg/libavcodec/mlib/dsputil_mlib.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "../dsputil.h"
@@ -63,7 +63,7 @@ static void add_pixels_clamped_mlib(const DCTELEM *block, uint8_t *pixels, int l
 /* put block, width 16 pixel, height 8/16 */
 
 static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
+                               int stride, int height)
 {
   switch (height) {
     case 8:
@@ -80,7 +80,7 @@ static void put_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
 }
 
 static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
-				  int stride, int height)
+                                  int stride, int height)
 {
   switch (height) {
     case 8:
@@ -97,7 +97,7 @@ static void put_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
 }
 
 static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
-				  int stride, int height)
+                                  int stride, int height)
 {
   switch (height) {
     case 8:
@@ -114,7 +114,7 @@ static void put_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
 }
 
 static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
-				  int stride, int height)
+                                  int stride, int height)
 {
   switch (height) {
     case 8:
@@ -133,7 +133,7 @@ static void put_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
 /* put block, width 8 pixel, height 4/8/16 */
 
 static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
+                               int stride, int height)
 {
   switch (height) {
     case 4:
@@ -154,7 +154,7 @@ static void put_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
 }
 
 static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
-				  int stride, int height)
+                                  int stride, int height)
 {
   switch (height) {
     case 4:
@@ -175,7 +175,7 @@ static void put_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
 }
 
 static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
-				  int stride, int height)
+                                  int stride, int height)
 {
   switch (height) {
     case 4:
@@ -196,7 +196,7 @@ static void put_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
 }
 
 static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
-				  int stride, int height)
+                                  int stride, int height)
 {
   switch (height) {
     case 4:
@@ -219,7 +219,7 @@ static void put_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
 /* average block, width 16 pixel, height 8/16 */
 
 static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
+                               int stride, int height)
 {
   switch (height) {
     case 8:
@@ -236,7 +236,7 @@ static void avg_pixels16_mlib (uint8_t * dest, const uint8_t * ref,
 }
 
 static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
-				  int stride, int height)
+                                  int stride, int height)
 {
   switch (height) {
     case 8:
@@ -253,7 +253,7 @@ static void avg_pixels16_x2_mlib (uint8_t * dest, const uint8_t * ref,
 }
 
 static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
-				  int stride, int height)
+                                  int stride, int height)
 {
   switch (height) {
     case 8:
@@ -270,7 +270,7 @@ static void avg_pixels16_y2_mlib (uint8_t * dest, const uint8_t * ref,
 }
 
 static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
-				  int stride, int height)
+                                  int stride, int height)
 {
   switch (height) {
     case 8:
@@ -289,7 +289,7 @@ static void avg_pixels16_xy2_mlib(uint8_t * dest, const uint8_t * ref,
 /* average block, width 8 pixel, height 4/8/16 */
 
 static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
-			       int stride, int height)
+                               int stride, int height)
 {
   switch (height) {
     case 4:
@@ -310,7 +310,7 @@ static void avg_pixels8_mlib (uint8_t * dest, const uint8_t * ref,
 }
 
 static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
-				  int stride, int height)
+                                  int stride, int height)
 {
   switch (height) {
     case 4:
@@ -331,7 +331,7 @@ static void avg_pixels8_x2_mlib (uint8_t * dest, const uint8_t * ref,
 }
 
 static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
-				  int stride, int height)
+                                  int stride, int height)
 {
   switch (height) {
     case 4:
@@ -352,7 +352,7 @@ static void avg_pixels8_y2_mlib (uint8_t * dest, const uint8_t * ref,
 }
 
 static void avg_pixels8_xy2_mlib(uint8_t * dest, const uint8_t * ref,
-				  int stride, int height)
+                                  int stride, int height)
 {
   switch (height) {
     case 4:
@@ -387,7 +387,7 @@ static void ff_idct_put_mlib(uint8_t *dest, int line_size, DCTELEM *data)
     uint8_t *cm = cropTbl + MAX_NEG_CROP;
 
     mlib_VideoIDCT8x8_S16_S16 (data, data);
-    
+
     for(i=0;i<8;i++) {
         dest[0] = cm[data[0]];
         dest[1] = cm[data[1]];
@@ -455,7 +455,7 @@ void MPV_common_init_mlib(MpegEncContext *s)
 {
   if (xine_mm_accel() & MM_ACCEL_MLIB) {
     if(s->avctx->dct_algo==FF_DCT_AUTO || s->avctx->dct_algo==FF_DCT_MLIB){
-	s->dsp.fdct = ff_fdct_mlib;
+        s->dsp.fdct = ff_fdct_mlib;
     }
 
     if(s->avctx->idct_algo==FF_IDCT_AUTO || s->avctx->idct_algo==FF_IDCT_MLIB){
diff --git a/src/libffmpeg/libavcodec/motion_est.c b/src/libffmpeg/libavcodec/motion_est.c
index c1779491f..991be55d0 100644
--- a/src/libffmpeg/libavcodec/motion_est.c
+++ b/src/libffmpeg/libavcodec/motion_est.c
@@ -1,8 +1,8 @@
 /*
- * Motion estimation 
+ * Motion estimation
  * Copyright (c) 2000,2001 Fabrice Bellard.
  * Copyright (c) 2002-2004 Michael Niedermayer
- * 
+ *
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -16,19 +16,19 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * new Motion Estimation (X1/EPZS) by Michael Niedermayer <michaelni@gmx.at>
  */
 
 /* motion estimation only needed for encoders */
 #ifdef CONFIG_ENCODERS
- 
+
 /**
  * @file motion_est.c
  * Motion estimation.
  */
- 
+
 #include <stdlib.h>
 #include <stdio.h>
 #include <limits.h>
@@ -48,7 +48,7 @@
 #define P_MV1 P[9]
 
 static inline int sad_hpel_motion_search(MpegEncContext * s,
-				  int *mx_ptr, int *my_ptr, int dmin,
+                                  int *mx_ptr, int *my_ptr, int dmin,
                                   int src_index, int ref_index,
                                   int size, int h);
 
@@ -72,7 +72,7 @@ typedef struct Minima{
 static int minima_cmp(const void *a, const void *b){
     const Minima *da = (const Minima *) a;
     const Minima *db = (const Minima *) b;
-    
+
     return da->height - db->height;
 }
 
@@ -100,7 +100,7 @@ static inline void init_ref(MotionEstContext *c, uint8_t *src[3], uint8_t *ref[3
 
 static int get_flags(MotionEstContext *c, int direct, int chroma){
     return   ((c->avctx->flags&CODEC_FLAG_QPEL) ? FLAG_QPEL : 0)
-           + (direct ? FLAG_DIRECT : 0) 
+           + (direct ? FLAG_DIRECT : 0)
            + (chroma ? FLAG_CHROMA : 0);
 }
 
@@ -133,7 +133,7 @@ static always_inline int cmp(MpegEncContext *s, const int x, const int y, const
                     int by = hy ? fy - c->co_located_mv[i][1] : c->co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(qpel+4));
                     int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                     int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
-        
+
                     uint8_t *dst= c->temp + 8*(i&1) + 8*stride*(i>>1);
                     if(qpel){
                         c->qpel_put[1][fxy](dst, ref[0] + (fx>>2) + (fy>>2)*stride, stride);
@@ -150,7 +150,7 @@ static always_inline int cmp(MpegEncContext *s, const int x, const int y, const
                 int by = hy ? fy - c->co_located_mv[0][1] : (c->co_located_mv[0][1]*(time_pb - time_pp)/time_pp);
                 int fxy= (fx&mask) + ((fy&mask)<<(qpel+1));
                 int bxy= (bx&mask) + ((by&mask)<<(qpel+1));
-                
+
                 if(qpel){
                     c->qpel_put[1][fxy](c->temp               , ref[0] + (fx>>2) + (fy>>2)*stride               , stride);
                     c->qpel_put[1][fxy](c->temp + 8           , ref[0] + (fx>>2) + (fy>>2)*stride + 8           , stride);
@@ -160,7 +160,7 @@ static always_inline int cmp(MpegEncContext *s, const int x, const int y, const
                     c->qpel_avg[1][bxy](c->temp + 8           , ref[8] + (bx>>2) + (by>>2)*stride + 8           , stride);
                     c->qpel_avg[1][bxy](c->temp     + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride     + 8*stride, stride);
                     c->qpel_avg[1][bxy](c->temp + 8 + 8*stride, ref[8] + (bx>>2) + (by>>2)*stride + 8 + 8*stride, stride);
-                }else{            
+                }else{
                     assert((fx>>1) + 16*s->mb_x >= -16);
                     assert((fy>>1) + 16*s->mb_y >= -16);
                     assert((fx>>1) + 16*s->mb_x <= s->width);
@@ -195,9 +195,9 @@ static always_inline int cmp(MpegEncContext *s, const int x, const int y, const
                 if(chroma)
                     uvdxy= dxy | (x&1) | (2*(y&1));
             }
-            d = cmp_func(s, c->temp, src[0], stride, h); 
+            d = cmp_func(s, c->temp, src[0], stride, h);
         }else{
-            d = cmp_func(s, src[0], ref[0] + x + y*stride, stride, h); 
+            d = cmp_func(s, src[0], ref[0] + x + y*stride, stride, h);
             if(chroma)
                 uvdxy= (x&1) + 2*(y&1);
         }
@@ -205,8 +205,8 @@ static always_inline int cmp(MpegEncContext *s, const int x, const int y, const
             uint8_t * const uvtemp= c->temp + 16*stride;
             c->hpel_put[size+1][uvdxy](uvtemp  , ref[1] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
             c->hpel_put[size+1][uvdxy](uvtemp+8, ref[2] + (x>>1) + (y>>1)*uvstride, uvstride, h>>1);
-            d += chroma_cmp_func(s, uvtemp  , src[1], uvstride, h>>1); 
-            d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1); 
+            d += chroma_cmp_func(s, uvtemp  , src[1], uvstride, h>>1);
+            d += chroma_cmp_func(s, uvtemp+8, src[2], uvstride, h>>1);
         }
     }
 #if 0
@@ -237,7 +237,7 @@ void ff_init_me(MpegEncContext *s){
     ff_set_cmp(&s->dsp, s->dsp.me_cmp, c->avctx->me_cmp);
     ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, c->avctx->me_sub_cmp);
     ff_set_cmp(&s->dsp, s->dsp.mb_cmp, c->avctx->mb_cmp);
-    
+
     c->flags    = get_flags(c, 0, c->avctx->me_cmp    &FF_CMP_CHROMA);
     c->sub_flags= get_flags(c, 0, c->avctx->me_sub_cmp&FF_CMP_CHROMA);
     c->mb_flags = get_flags(c, 0, c->avctx->mb_cmp    &FF_CMP_CHROMA);
@@ -251,8 +251,8 @@ void ff_init_me(MpegEncContext *s){
     }else{
         if(c->avctx->me_sub_cmp&FF_CMP_CHROMA)
             c->sub_motion_search= hpel_motion_search;
-        else if(   c->avctx->me_sub_cmp == FF_CMP_SAD 
-                && c->avctx->    me_cmp == FF_CMP_SAD 
+        else if(   c->avctx->me_sub_cmp == FF_CMP_SAD
+                && c->avctx->    me_cmp == FF_CMP_SAD
                 && c->avctx->    mb_cmp == FF_CMP_SAD)
             c->sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles
         else
@@ -263,7 +263,7 @@ void ff_init_me(MpegEncContext *s){
     else               c->hpel_put= s->dsp.put_pixels_tab;
 
     if(s->linesize){
-        c->stride  = s->linesize; 
+        c->stride  = s->linesize;
         c->uvstride= s->uvlinesize;
     }else{
         c->stride  = 16*s->mb_width + 32;
@@ -288,7 +288,7 @@ void ff_init_me(MpegEncContext *s){
 
     c->temp= c->scratchpad;
 }
-      
+
 #if 0
 static int pix_dev(uint8_t * pix, int line_size, int mean)
 {
@@ -296,25 +296,25 @@ static int pix_dev(uint8_t * pix, int line_size, int mean)
 
     s = 0;
     for (i = 0; i < 16; i++) {
-	for (j = 0; j < 16; j += 8) {
-	    s += ABS(pix[0]-mean);
-	    s += ABS(pix[1]-mean);
-	    s += ABS(pix[2]-mean);
-	    s += ABS(pix[3]-mean);
-	    s += ABS(pix[4]-mean);
-	    s += ABS(pix[5]-mean);
-	    s += ABS(pix[6]-mean);
-	    s += ABS(pix[7]-mean);
-	    pix += 8;
-	}
-	pix += line_size - 16;
+        for (j = 0; j < 16; j += 8) {
+            s += ABS(pix[0]-mean);
+            s += ABS(pix[1]-mean);
+            s += ABS(pix[2]-mean);
+            s += ABS(pix[3]-mean);
+            s += ABS(pix[4]-mean);
+            s += ABS(pix[5]-mean);
+            s += ABS(pix[6]-mean);
+            s += ABS(pix[7]-mean);
+            pix += 8;
+        }
+        pix += line_size - 16;
     }
     return s;
 }
 #endif
 
 static inline void no_motion_search(MpegEncContext * s,
-				    int *mx_ptr, int *my_ptr)
+                                    int *mx_ptr, int *my_ptr)
 {
     *mx_ptr = 16 * s->mb_x;
     *my_ptr = 16 * s->mb_y;
@@ -331,35 +331,35 @@ static int full_motion_search(MpegEncContext * s,
 
     xx = 16 * s->mb_x;
     yy = 16 * s->mb_y;
-    x1 = xx - range + 1;	/* we loose one pixel to avoid boundary pb with half pixel pred */
+    x1 = xx - range + 1;        /* we loose one pixel to avoid boundary pb with half pixel pred */
     if (x1 < xmin)
-	x1 = xmin;
+        x1 = xmin;
     x2 = xx + range - 1;
     if (x2 > xmax)
-	x2 = xmax;
+        x2 = xmax;
     y1 = yy - range + 1;
     if (y1 < ymin)
-	y1 = ymin;
+        y1 = ymin;
     y2 = yy + range - 1;
     if (y2 > ymax)
-	y2 = ymax;
+        y2 = ymax;
     pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
     dmin = 0x7fffffff;
     mx = 0;
     my = 0;
     for (y = y1; y <= y2; y++) {
-	for (x = x1; x <= x2; x++) {
-	    d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x,
-			     s->linesize, 16);
-	    if (d < dmin ||
-		(d == dmin &&
-		 (abs(x - xx) + abs(y - yy)) <
-		 (abs(mx - xx) + abs(my - yy)))) {
-		dmin = d;
-		mx = x;
-		my = y;
-	    }
-	}
+        for (x = x1; x <= x2; x++) {
+            d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x,
+                             s->linesize, 16);
+            if (d < dmin ||
+                (d == dmin &&
+                 (abs(x - xx) + abs(y - yy)) <
+                 (abs(mx - xx) + abs(my - yy)))) {
+                dmin = d;
+                mx = x;
+                my = y;
+            }
+        }
     }
 
     *mx_ptr = mx;
@@ -367,8 +367,8 @@ static int full_motion_search(MpegEncContext * s,
 
 #if 0
     if (*mx_ptr < -(2 * range) || *mx_ptr >= (2 * range) ||
-	*my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
-	fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr);
+        *my_ptr < -(2 * range) || *my_ptr >= (2 * range)) {
+        fprintf(stderr, "error %d %d\n", *mx_ptr, *my_ptr);
     }
 #endif
     return dmin;
@@ -389,22 +389,22 @@ static int log_motion_search(MpegEncContext * s,
     /* Left limit */
     x1 = xx - range;
     if (x1 < xmin)
-	x1 = xmin;
+        x1 = xmin;
 
     /* Right limit */
     x2 = xx + range;
     if (x2 > xmax)
-	x2 = xmax;
+        x2 = xmax;
 
     /* Upper limit */
     y1 = yy - range;
     if (y1 < ymin)
-	y1 = ymin;
+        y1 = ymin;
 
     /* Lower limit */
     y2 = yy + range;
     if (y2 > ymax)
-	y2 = ymax;
+        y2 = ymax;
 
     pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
     dmin = 0x7fffffff;
@@ -412,34 +412,34 @@ static int log_motion_search(MpegEncContext * s,
     my = 0;
 
     do {
-	for (y = y1; y <= y2; y += range) {
-	    for (x = x1; x <= x2; x += range) {
-		d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
-		if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
-		    dmin = d;
-		    mx = x;
-		    my = y;
-		}
-	    }
-	}
-
-	range = range >> 1;
-
-	x1 = mx - range;
-	if (x1 < xmin)
-	    x1 = xmin;
-
-	x2 = mx + range;
-	if (x2 > xmax)
-	    x2 = xmax;
-
-	y1 = my - range;
-	if (y1 < ymin)
-	    y1 = ymin;
-
-	y2 = my + range;
-	if (y2 > ymax)
-	    y2 = ymax;
+        for (y = y1; y <= y2; y += range) {
+            for (x = x1; x <= x2; x += range) {
+                d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
+                if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
+                    dmin = d;
+                    mx = x;
+                    my = y;
+                }
+            }
+        }
+
+        range = range >> 1;
+
+        x1 = mx - range;
+        if (x1 < xmin)
+            x1 = xmin;
+
+        x2 = mx + range;
+        if (x2 > xmax)
+            x2 = xmax;
+
+        y1 = my - range;
+        if (y1 < ymin)
+            y1 = ymin;
+
+        y2 = my + range;
+        if (y2 > ymax)
+            y2 = ymax;
 
     } while (range >= 1);
 
@@ -465,22 +465,22 @@ static int phods_motion_search(MpegEncContext * s,
     /* Left limit */
     x1 = xx - range;
     if (x1 < xmin)
-	x1 = xmin;
+        x1 = xmin;
 
     /* Right limit */
     x2 = xx + range;
     if (x2 > xmax)
-	x2 = xmax;
+        x2 = xmax;
 
     /* Upper limit */
     y1 = yy - range;
     if (y1 < ymin)
-	y1 = ymin;
+        y1 = ymin;
 
     /* Lower limit */
     y2 = yy + range;
     if (y2 > ymax)
-	y2 = ymax;
+        y2 = ymax;
 
     pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
     mx = 0;
@@ -492,43 +492,43 @@ static int phods_motion_search(MpegEncContext * s,
         dminx = 0x7fffffff;
         dminy = 0x7fffffff;
 
-	lastx = x;
-	for (x = x1; x <= x2; x += range) {
-	    d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
-	    if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
-		dminx = d;
-		mx = x;
-	    }
-	}
-
-	x = lastx;
-	for (y = y1; y <= y2; y += range) {
-	    d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
-	    if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
-		dminy = d;
-		my = y;
-	    }
-	}
-
-	range = range >> 1;
-
-	x = mx;
-	y = my;
-	x1 = mx - range;
-	if (x1 < xmin)
-	    x1 = xmin;
-
-	x2 = mx + range;
-	if (x2 > xmax)
-	    x2 = xmax;
-
-	y1 = my - range;
-	if (y1 < ymin)
-	    y1 = ymin;
-
-	y2 = my + range;
-	if (y2 > ymax)
-	    y2 = ymax;
+        lastx = x;
+        for (x = x1; x <= x2; x += range) {
+            d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
+            if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
+                dminx = d;
+                mx = x;
+            }
+        }
+
+        x = lastx;
+        for (y = y1; y <= y2; y += range) {
+            d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
+            if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
+                dminy = d;
+                my = y;
+            }
+        }
+
+        range = range >> 1;
+
+        x = mx;
+        y = my;
+        x1 = mx - range;
+        if (x1 < xmin)
+            x1 = xmin;
+
+        x2 = mx + range;
+        if (x2 > xmax)
+            x2 = xmax;
+
+        y1 = my - range;
+        if (y1 < ymin)
+            y1 = ymin;
+
+        y2 = my + range;
+        if (y2 > ymax)
+            y2 = ymax;
 
     } while (range >= 1);
 
@@ -553,7 +553,7 @@ static int phods_motion_search(MpegEncContext * s,
 }
 
 static inline int sad_hpel_motion_search(MpegEncContext * s,
-				  int *mx_ptr, int *my_ptr, int dmin,
+                                  int *mx_ptr, int *my_ptr, int dmin,
                                   int src_index, int ref_index,
                                   int size, int h)
 {
@@ -564,7 +564,7 @@ static inline int sad_hpel_motion_search(MpegEncContext * s,
     int stride= c->stride;
     const int flags= c->sub_flags;
     LOAD_COMMON
-    
+
     assert(flags == 0);
 
     if(c->skip){
@@ -574,19 +574,19 @@ static inline int sad_hpel_motion_search(MpegEncContext * s,
         return dmin;
     }
 //    printf("N");
-        
+
     pix = c->src[src_index][0];
 
     mx = *mx_ptr;
     my = *my_ptr;
     ptr = c->ref[ref_index][0] + (my * stride) + mx;
-    
+
     dminh = dmin;
 
-    if (mx > xmin && mx < xmax && 
+    if (mx > xmin && mx < xmax &&
         my > ymin && my < ymax) {
         int dx=0, dy=0;
-        int d, pen_x, pen_y; 
+        int d, pen_x, pen_y;
         const int index= (my<<ME_MAP_SHIFT) + mx;
         const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)];
         const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)];
@@ -595,7 +595,7 @@ static inline int sad_hpel_motion_search(MpegEncContext * s,
         mx<<=1;
         my<<=1;
 
-        
+
         pen_x= pred_x + mx;
         pen_y= pred_y + my;
 
@@ -663,7 +663,7 @@ static inline int sad_hpel_motion_search(MpegEncContext * s,
 static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4)
 {
     const int xy= s->mb_x + s->mb_y*s->mb_stride;
-    
+
     s->p_mv_table[xy][0] = mx;
     s->p_mv_table[xy][1] = my;
 
@@ -703,7 +703,7 @@ static inline void get_limits(MpegEncContext *s, int x, int y)
         // Search range of H261 is different from other codec standards
         c->xmin = (x > 15) ? - 15 : 0;
         c->ymin = (y > 15) ? - 15 : 0;
-        c->xmax = (x < s->mb_width * 16 - 16) ? 15 : 0;              
+        c->xmax = (x < s->mb_width * 16 - 16) ? 15 : 0;
         c->ymax = (y < s->mb_height * 16 - 16) ? 15 : 0;
     } else {
         c->xmin = - x;
@@ -737,7 +737,7 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
     uint8_t *mv_penalty= c->current_mv_penalty;
 
     init_mv4_ref(c);
-    
+
     for(block=0; block<4; block++){
         int mx4, my4;
         int pred_x4, pred_y4;
@@ -764,7 +764,7 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
             if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
             if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
             if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
-    
+
             P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
             P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
 
@@ -777,7 +777,7 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
         dmin4 = epzs_motion_search4(s, &mx4, &my4, P, block, block, s->p_mv_table, (1<<16)>>shift);
 
         dmin4= c->sub_motion_search(s, &mx4, &my4, dmin4, block, block, size, h);
-        
+
         if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
             int dxy;
             const int offset= ((block&1) + (block>>1)*stride)*8;
@@ -810,20 +810,20 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
             mx4_sum+= mx4;
             my4_sum+= my4;
         }
-            
+
         s->current_picture.motion_val[0][ s->block_index[block] ][0]= mx4;
         s->current_picture.motion_val[0][ s->block_index[block] ][1]= my4;
 
         if(mx4 != mx || my4 != my) same=0;
     }
-    
+
     if(same)
         return INT_MAX;
-    
+
     if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
         dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*stride, c->scratchpad, stride, 16);
     }
-    
+
     if(c->avctx->mb_cmp&FF_CMP_CHROMA){
         int dxy;
         int mx, my;
@@ -832,9 +832,9 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
         mx= ff_h263_round_chroma(mx4_sum);
         my= ff_h263_round_chroma(my4_sum);
         dxy = ((my & 1) << 1) | (mx & 1);
-        
+
         offset= (s->mb_x*8 + (mx>>1)) + (s->mb_y*8 + (my>>1))*s->uvlinesize;
-       
+
         if(s->no_rounding){
             s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad    , s->last_picture.data[1] + offset, s->uvlinesize, 8);
             s->dsp.put_no_rnd_pixels_tab[1][dxy](c->scratchpad+8  , s->last_picture.data[2] + offset, s->uvlinesize, 8);
@@ -846,7 +846,7 @@ static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
         dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad  , s->uvlinesize, 8);
         dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, c->scratchpad+8, s->uvlinesize, 8);
     }
-    
+
     c->pred_x= mx;
     c->pred_y= my;
 
@@ -873,7 +873,7 @@ static inline void init_interlaced_ref(MpegEncContext *s, int ref_index){
     }
 }
 
-static int interlaced_search(MpegEncContext *s, int ref_index, 
+static int interlaced_search(MpegEncContext *s, int ref_index,
                              int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int mx, int my, int user_field_select)
 {
     MotionEstContext * const c= &s->me;
@@ -887,13 +887,13 @@ static int interlaced_search(MpegEncContext *s, int ref_index,
     int dmin_sum= 0;
     const int mot_stride= s->mb_stride;
     const int xy= s->mb_x + s->mb_y*mot_stride;
-    
+
     c->ymin>>=1;
     c->ymax>>=1;
     c->stride<<=1;
     c->uvstride<<=1;
     init_interlaced_ref(s, ref_index);
-    
+
     for(block=0; block<2; block++){
         int field_select;
         int best_dmin= INT_MAX;
@@ -902,19 +902,19 @@ static int interlaced_search(MpegEncContext *s, int ref_index,
         for(field_select=0; field_select<2; field_select++){
             int dmin, mx_i, my_i;
             int16_t (*mv_table)[2]= mv_tables[block][field_select];
-            
+
             if(user_field_select){
                 if(field_select_tables[block][xy] != field_select)
                     continue;
             }
-            
+
             P_LEFT[0] = mv_table[xy - 1][0];
             P_LEFT[1] = mv_table[xy - 1][1];
             if(P_LEFT[0]       > (c->xmax<<1)) P_LEFT[0]       = (c->xmax<<1);
-            
+
             c->pred_x= P_LEFT[0];
             c->pred_y= P_LEFT[1];
-            
+
             if(!s->first_slice_line){
                 P_TOP[0]      = mv_table[xy - mot_stride][0];
                 P_TOP[1]      = mv_table[xy - mot_stride][1];
@@ -924,20 +924,20 @@ static int interlaced_search(MpegEncContext *s, int ref_index,
                 if(P_TOPRIGHT[0] < (c->xmin<<1)) P_TOPRIGHT[0]= (c->xmin<<1);
                 if(P_TOPRIGHT[0] > (c->xmax<<1)) P_TOPRIGHT[0]= (c->xmax<<1);
                 if(P_TOPRIGHT[1] > (c->ymax<<1)) P_TOPRIGHT[1]= (c->ymax<<1);
-    
+
                 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
             }
             P_MV1[0]= mx; //FIXME not correct if block != field_select
             P_MV1[1]= my / 2;
-            
+
             dmin = epzs_motion_search2(s, &mx_i, &my_i, P, block, field_select+ref_index, mv_table, (1<<16)>>1);
 
             dmin= c->sub_motion_search(s, &mx_i, &my_i, dmin, block, field_select+ref_index, size, h);
-            
+
             mv_table[xy][0]= mx_i;
             mv_table[xy][1]= my_i;
-            
+
             if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
                 int dxy;
 
@@ -954,9 +954,9 @@ static int interlaced_search(MpegEncContext *s, int ref_index,
                 dmin+= (mv_penalty[mx_i-c->pred_x] + mv_penalty[my_i-c->pred_y] + 1)*c->mb_penalty_factor;
             }else
                 dmin+= c->mb_penalty_factor; //field_select bits
-                
+
             dmin += field_select != block; //slightly prefer same field
-            
+
             if(dmin < best_dmin){
                 best_dmin= dmin;
                 best_field= field_select;
@@ -967,14 +967,14 @@ static int interlaced_search(MpegEncContext *s, int ref_index,
 
             if(mv_table[xy][0] != mx) same=0; //FIXME check if these checks work and are any good at all
             if(mv_table[xy][1]&1) same=0;
-            if(mv_table[xy][1]*2 != my) same=0; 
+            if(mv_table[xy][1]*2 != my) same=0;
             if(best_field != block) same=0;
         }
 
         field_select_tables[block][xy]= best_field;
         dmin_sum += best_dmin;
     }
-    
+
     c->ymin<<=1;
     c->ymax<<=1;
     c->stride>>=1;
@@ -982,7 +982,7 @@ static int interlaced_search(MpegEncContext *s, int ref_index,
 
     if(same)
         return INT_MAX;
-    
+
     switch(c->avctx->mb_cmp&0xFF){
     /*case FF_CMP_SSE:
         return dmin_sum+ 32*s->qscale*s->qscale;*/
@@ -996,7 +996,7 @@ static int interlaced_search(MpegEncContext *s, int ref_index,
 static void clip_input_mv(MpegEncContext * s, int16_t *mv, int interlaced){
     int ymax= s->me.ymax>>interlaced;
     int ymin= s->me.ymin>>interlaced;
-    
+
     if(mv[0] < s->me.xmin) mv[0] = s->me.xmin;
     if(mv[0] > s->me.xmax) mv[0] = s->me.xmax;
     if(mv[1] <       ymin) mv[1] =       ymin;
@@ -1016,13 +1016,13 @@ static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int
     int d=0;
     me_cmp_func cmpf= s->dsp.sse[0];
     me_cmp_func chroma_cmpf= s->dsp.sse[1];
-    
+
     if(p_type && USES_LIST(mb_type, 1)){
         av_log(c->avctx, AV_LOG_ERROR, "backward motion vector in P frame\n");
         return INT_MAX/2;
     }
     assert(IS_INTRA(mb_type) || USES_LIST(mb_type,0) || USES_LIST(mb_type,1));
-    
+
     for(i=0; i<4; i++){
         int xy= s->block_index[i];
         clip_input_mv(s, p->motion_val[0][xy], !!IS_INTERLACED(mb_type));
@@ -1034,7 +1034,7 @@ static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int
         s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_INTRA;
         c->stride<<=1;
         c->uvstride<<=1;
-        
+
         if(!(s->flags & CODEC_FLAG_INTERLACED_ME)){
             av_log(c->avctx, AV_LOG_ERROR, "Interlaced macroblock selected but interlaced motion estimation disabled\n");
             return INT_MAX/2;
@@ -1061,10 +1061,10 @@ static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int
                 s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_FORWARD_I;
             }
 
-            x= p->motion_val[0][xy ][0]; 
+            x= p->motion_val[0][xy ][0];
             y= p->motion_val[0][xy ][1];
             d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0, 0, cmpf, chroma_cmpf, flags);
-            x= p->motion_val[0][xy2][0]; 
+            x= p->motion_val[0][xy2][0];
             y= p->motion_val[0][xy2][1];
             d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1, 1, cmpf, chroma_cmpf, flags);
         }
@@ -1085,10 +1085,10 @@ static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int
                 s->mb_type[mb_xy]= CANDIDATE_MB_TYPE_BACKWARD_I;
             }
 
-            x= p->motion_val[1][xy ][0]; 
+            x= p->motion_val[1][xy ][0];
             y= p->motion_val[1][xy ][1];
             d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select0+2, 0, cmpf, chroma_cmpf, flags);
-            x= p->motion_val[1][xy2][0]; 
+            x= p->motion_val[1][xy2][0];
             y= p->motion_val[1][xy2][1];
             d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 8, field_select1+2, 1, cmpf, chroma_cmpf, flags);
             //FIXME bidir scores
@@ -1105,7 +1105,7 @@ static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int
         init_mv4_ref(c);
         for(i=0; i<4; i++){
             xy= s->block_index[i];
-            x= p->motion_val[0][xy][0]; 
+            x= p->motion_val[0][xy][0];
             y= p->motion_val[0][xy][1];
             d+= cmp(s, x>>shift, y>>shift, x&mask, y&mask, 1, 8, i, i, cmpf, chroma_cmpf, flags);
         }
@@ -1123,14 +1123,14 @@ static inline int check_input_motion(MpegEncContext * s, int mb_x, int mb_y, int
                 *(uint32_t*)s->b_forw_mv_table[mb_xy]= *(uint32_t*)p->motion_val[0][xy];
                 s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_FORWARD;
             }
-            x= p->motion_val[0][xy][0]; 
+            x= p->motion_val[0][xy][0];
             y= p->motion_val[0][xy][1];
             d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 0, 0, cmpf, chroma_cmpf, flags);
         }else if(USES_LIST(mb_type, 1)){
             *(uint32_t*)s->b_back_mv_table[mb_xy]= *(uint32_t*)p->motion_val[1][xy];
             s->mb_type[mb_xy]=CANDIDATE_MB_TYPE_BACKWARD;
-           
-            x= p->motion_val[1][xy][0]; 
+
+            x= p->motion_val[1][xy][0];
             y= p->motion_val[1][xy][1];
             d = cmp(s, x>>shift, y>>shift, x&mask, y&mask, 0, 16, 2, 0, cmpf, chroma_cmpf, flags);
         }else
@@ -1149,7 +1149,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
     const int shift= 1+s->quarter_sample;
     int mb_type=0;
     Picture * const pic= &s->current_picture;
-    
+
     init_ref(c, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
 
     assert(s->quarter_sample==0 || s->quarter_sample==1);
@@ -1167,50 +1167,50 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
     /* intra / predictive decision */
     pix = c->src[0][0];
     sum = s->dsp.pix_sum(pix, s->linesize);
-    varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
+    varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500;
 
     pic->mb_mean[s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
-    pic->mb_var [s->mb_stride * mb_y + mb_x] = varc;
-    c->mb_var_sum_temp += varc;
+    pic->mb_var [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
+    c->mb_var_sum_temp += (varc+128)>>8;
 
     if(c->avctx->me_threshold){
-        vard= (check_input_motion(s, mb_x, mb_y, 1)+128)>>8;
-        
-        if(vard<c->avctx->me_threshold){
-            pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = vard;
-            c->mc_mb_var_sum_temp += vard;
-            if (vard <= 64 || vard < varc) { //FIXME
+        vard= check_input_motion(s, mb_x, mb_y, 1);
+
+        if((vard+128)>>8 < c->avctx->me_threshold){
+            pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
+            c->mc_mb_var_sum_temp += (vard+128)>>8;
+            if (vard <= 64<<8 || vard < varc) { //FIXME
                 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
             }else{
-                c->scene_change_score+= s->qscale;
+                c->scene_change_score+= s->qscale * s->avctx->scenechange_factor;
             }
             return;
         }
-        if(vard<c->avctx->mb_threshold)
+        if((vard+128)>>8 < c->avctx->mb_threshold)
             mb_type= s->mb_type[mb_x + mb_y*s->mb_stride];
     }
 
     switch(s->me_method) {
     case ME_ZERO:
     default:
-	no_motion_search(s, &mx, &my);
+        no_motion_search(s, &mx, &my);
         mx-= mb_x*16;
         my-= mb_y*16;
         dmin = 0;
         break;
 #if 0
     case ME_FULL:
-	dmin = full_motion_search(s, &mx, &my, range, ref_picture);
+        dmin = full_motion_search(s, &mx, &my, range, ref_picture);
         mx-= mb_x*16;
         my-= mb_y*16;
         break;
     case ME_LOG:
-	dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
+        dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
         mx-= mb_x*16;
         my-= mb_y*16;
         break;
     case ME_PHODS:
-	dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
+        dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
         mx-= mb_x*16;
         my-= mb_y*16;
         break;
@@ -1234,7 +1234,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
                 if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
                 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
                 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
-        
+
                 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
 
@@ -1251,29 +1251,29 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
             }
 
         }
-        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);       
+        dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
 
         break;
     }
 
     /* At this point (mx,my) are full-pell and the relative displacement */
     ppix = c->ref[0][0] + (my * s->linesize) + mx;
-        
-    vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize, 16)+128)>>8;
 
-    pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = vard;
-//    pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin; 
-    c->mc_mb_var_sum_temp += vard;
-    
+    vard = s->dsp.sse[0](NULL, pix, ppix, s->linesize, 16);
+
+    pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
+//    pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
+    c->mc_mb_var_sum_temp += (vard+128)>>8;
+
 #if 0
     printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n",
-	   varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
+           varc, s->avg_mb_var, sum, vard, mx - xx, my - yy);
 #endif
     if(mb_type){
-        if (vard <= 64 || vard < varc)
+        if (vard <= 64<<8 || vard < varc)
             c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
         else
-            c->scene_change_score+= s->qscale;
+            c->scene_change_score+= s->qscale * s->avctx->scenechange_factor;
 
         if(mb_type == CANDIDATE_MB_TYPE_INTER){
             c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
@@ -1291,14 +1291,14 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
             interlaced_search(s, 0, s->p_field_mv_table, s->p_field_select_table, mx, my, 1);
         }
     }else if(c->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
-        if (vard <= 64 || vard < varc)
+        if (vard <= 64<<8 || vard < varc)
             c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
         else
-            c->scene_change_score+= s->qscale;
+            c->scene_change_score+= s->qscale * s->avctx->scenechange_factor;
 
-        if (vard*2 + 200 > varc)
+        if (vard*2 + 200*256 > varc)
             mb_type|= CANDIDATE_MB_TYPE_INTRA;
-        if (varc*2 + 200 > vard){
+        if (varc*2 + 200*256 > vard){
             mb_type|= CANDIDATE_MB_TYPE_INTER;
             c->sub_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
             if(s->flags&CODEC_FLAG_MV0)
@@ -1309,7 +1309,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
             my <<=shift;
         }
         if((s->flags&CODEC_FLAG_4MV)
-           && !c->skip && varc>50 && vard>10){
+           && !c->skip && varc>50<<8 && vard>10<<8){
             if(h263_mv4_search(s, mx, my, shift) < INT_MAX)
                 mb_type|=CANDIDATE_MB_TYPE_INTER4V;
 
@@ -1330,7 +1330,7 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
             dmin= ff_get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
 
         if((s->flags&CODEC_FLAG_4MV)
-           && !c->skip && varc>50 && vard>10){
+           && !c->skip && varc>50<<8 && vard>10<<8){
             int dmin4= h263_mv4_search(s, mx, my, shift);
             if(dmin4 < dmin){
                 mb_type= CANDIDATE_MB_TYPE_INTER4V;
@@ -1345,17 +1345,17 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
                 dmin= dmin_i;
             }
         }
-                
-//        pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin; 
+
+//        pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
         set_p_mv_tables(s, mx, my, mb_type!=CANDIDATE_MB_TYPE_INTER4V);
 
         /* get intra luma score */
         if((c->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
-            intra_score= (varc<<8) - 500; //FIXME dont scale it down so we dont have to fix it
+            intra_score= varc - 500;
         }else{
             int mean= (sum+128)>>8;
             mean*= 0x01010101;
-            
+
             for(i=0; i<16; i++){
                 *(uint32_t*)(&c->scratchpad[i*s->linesize+ 0]) = mean;
                 *(uint32_t*)(&c->scratchpad[i*s->linesize+ 4]) = mean;
@@ -1371,36 +1371,36 @@ void ff_estimate_p_frame_motion(MpegEncContext * s,
             for(i=1; i<3; i++){
                 uint8_t *dest_c;
                 int mean;
-                
+
                 if(s->out_format == FMT_H263){
                     mean= (s->dc_val[i][mb_x + mb_y*s->b8_stride] + 4)>>3; //FIXME not exact but simple ;)
                 }else{
                     mean= (s->last_dc[i] + 4)>>3;
                 }
                 dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
-                
+
                 mean*= 0x01010101;
                 for(i=0; i<8; i++){
                     *(uint32_t*)(&c->scratchpad[i*s->uvlinesize+ 0]) = mean;
                     *(uint32_t*)(&c->scratchpad[i*s->uvlinesize+ 4]) = mean;
                 }
-                
+
                 intra_score+= s->dsp.mb_cmp[1](s, c->scratchpad, dest_c, s->uvlinesize);
-            }                
+            }
         }
 #endif
         intra_score += c->mb_penalty_factor*16;
-        
+
         if(intra_score < dmin){
             mb_type= CANDIDATE_MB_TYPE_INTRA;
             s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_INTRA; //FIXME cleanup
         }else
             s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= 0;
-        
-        if (vard <= 64 || vard < varc) { //FIXME
+
+        if (vard <= 64<<8 || vard < varc) { //FIXME
             c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
         }else{
-            c->scene_change_score+= s->qscale;
+            c->scene_change_score+= s->qscale * s->avctx->scenechange_factor;
         }
     }
 
@@ -1416,7 +1416,7 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
     const int shift= 1+s->quarter_sample;
     const int xy= mb_x + mb_y*s->mb_stride;
     init_ref(c, s->new_picture.data, s->last_picture.data, NULL, 16*mb_x, 16*mb_y, 0);
-    
+
     assert(s->quarter_sample==0 || s->quarter_sample==1);
 
     c->pre_penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_pre_cmp);
@@ -1435,7 +1435,7 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
         c->pred_x= P_LEFT[0];
         c->pred_y= P_LEFT[1];
         P_TOP[0]= P_TOPRIGHT[0]= P_MEDIAN[0]=
-        P_TOP[1]= P_TOPRIGHT[1]= P_MEDIAN[1]= 0; //FIXME 
+        P_TOP[1]= P_TOPRIGHT[1]= P_MEDIAN[1]= 0; //FIXME
     } else {
         P_TOP[0]      = s->p_mv_table[xy + s->mb_stride    ][0];
         P_TOP[1]      = s->p_mv_table[xy + s->mb_stride    ][1];
@@ -1444,7 +1444,7 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
         if(P_TOP[1]      < (c->ymin<<shift)) P_TOP[1]     = (c->ymin<<shift);
         if(P_TOPRIGHT[0] > (c->xmax<<shift)) P_TOPRIGHT[0]= (c->xmax<<shift);
         if(P_TOPRIGHT[1] < (c->ymin<<shift)) P_TOPRIGHT[1]= (c->ymin<<shift);
-    
+
         P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
         P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
 
@@ -1452,11 +1452,11 @@ int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
         c->pred_y = P_MEDIAN[1];
     }
 
-    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);       
+    dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16);
 
     s->p_mv_table[xy][0] = mx<<shift;
     s->p_mv_table[xy][1] = my<<shift;
-    
+
     return dmin;
 }
 
@@ -1471,7 +1471,7 @@ static int ff_estimate_motion_b(MpegEncContext * s,
     const int mot_xy = mb_y*mot_stride + mb_x;
     uint8_t * const mv_penalty= c->mv_penalty[f_code] + MAX_MV;
     int mv_scale;
-        
+
     c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
     c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
     c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
@@ -1482,24 +1482,24 @@ static int ff_estimate_motion_b(MpegEncContext * s,
     switch(s->me_method) {
     case ME_ZERO:
     default:
-	no_motion_search(s, &mx, &my);
+        no_motion_search(s, &mx, &my);
         dmin = 0;
         mx-= mb_x*16;
         my-= mb_y*16;
         break;
 #if 0
     case ME_FULL:
-	dmin = full_motion_search(s, &mx, &my, range, ref_picture);
+        dmin = full_motion_search(s, &mx, &my, range, ref_picture);
         mx-= mb_x*16;
         my-= mb_y*16;
         break;
     case ME_LOG:
-	dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
+        dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
         mx-= mb_x*16;
         my-= mb_y*16;
         break;
     case ME_PHODS:
-	dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
+        dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
         mx-= mb_x*16;
         my-= mb_y*16;
         break;
@@ -1521,27 +1521,27 @@ static int ff_estimate_motion_b(MpegEncContext * s,
                 if(P_TOP[1] > (c->ymax<<shift)) P_TOP[1]= (c->ymax<<shift);
                 if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
                 if(P_TOPRIGHT[1] > (c->ymax<<shift)) P_TOPRIGHT[1]= (c->ymax<<shift);
-        
+
                 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
                 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
             }
             c->pred_x= P_LEFT[0];
             c->pred_y= P_LEFT[1];
         }
-        
+
         if(mv_table == s->b_forw_mv_table){
             mv_scale= (s->pb_time<<16) / (s->pp_time<<shift);
         }else{
             mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift);
         }
-        
+
         dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale, 0, 16);
- 
+
         break;
     }
-    
+
     dmin= c->sub_motion_search(s, &mx, &my, dmin, 0, ref_index, 0, 16);
-                                   
+
     if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
         dmin= ff_get_mb_score(s, mx, my, 0, ref_index, 0, 16, 1);
 
@@ -1564,7 +1564,8 @@ static inline int check_bidir_mv(MpegEncContext * s,
     //FIXME better f_code prediction (max mv & distance)
     //FIXME pointers
     MotionEstContext * const c= &s->me;
-    uint8_t * const mv_penalty= c->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
+    uint8_t * const mv_penalty_f= c->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
+    uint8_t * const mv_penalty_b= c->mv_penalty[s->b_code] + MAX_MV; // f_code of the prev frame
     int stride= c->stride;
     uint8_t *dest_y = c->scratchpad;
     uint8_t *ptr;
@@ -1586,7 +1587,7 @@ static inline int check_bidir_mv(MpegEncContext * s,
         dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
         src_x = motion_bx >> 2;
         src_y = motion_by >> 2;
-    
+
         ptr = ref2_data[0] + (src_y * stride) + src_x;
         s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y    , ptr    , stride);
     }else{
@@ -1600,25 +1601,26 @@ static inline int check_bidir_mv(MpegEncContext * s,
         dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
         src_x = motion_bx >> 1;
         src_y = motion_by >> 1;
-    
+
         ptr = ref2_data[0] + (src_y * stride) + src_x;
         s->dsp.avg_pixels_tab[size][dxy](dest_y    , ptr    , stride, h);
     }
 
-    fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*c->mb_penalty_factor
-           +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*c->mb_penalty_factor
+    fbmin = (mv_penalty_f[motion_fx-pred_fx] + mv_penalty_f[motion_fy-pred_fy])*c->mb_penalty_factor
+           +(mv_penalty_b[motion_bx-pred_bx] + mv_penalty_b[motion_by-pred_by])*c->mb_penalty_factor
            + s->dsp.mb_cmp[size](s, src_data[0], dest_y, stride, h); //FIXME new_pic
-           
+
     if(c->avctx->mb_cmp&FF_CMP_CHROMA){
     }
     //FIXME CHROMA !!!
-           
+
     return fbmin;
 }
 
 /* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
 static inline int bidir_refine(MpegEncContext * s, int mb_x, int mb_y)
 {
+    MotionEstContext * const c= &s->me;
     const int mot_stride = s->mb_stride;
     const int xy = mb_y *mot_stride + mb_x;
     int fbmin;
@@ -1630,16 +1632,87 @@ static inline int bidir_refine(MpegEncContext * s, int mb_x, int mb_y)
     int motion_fy= s->b_bidir_forw_mv_table[xy][1]= s->b_forw_mv_table[xy][1];
     int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
     int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
+    const int flags= c->sub_flags;
+    const int qpel= flags&FLAG_QPEL;
+    const int shift= 1+qpel;
+    const int xmin= c->xmin<<shift;
+    const int ymin= c->ymin<<shift;
+    const int xmax= c->xmax<<shift;
+    const int ymax= c->ymax<<shift;
+    uint8_t map[8][8][8][8];
+
+    memset(map,0,sizeof(map));
+#define BIDIR_MAP(fx,fy,bx,by) \
+    map[(motion_fx+fx)&7][(motion_fy+fy)&7][(motion_bx+bx)&7][(motion_by+by)&7]
+    BIDIR_MAP(0,0,0,0) = 1;
 
-    //FIXME do refinement and add flag
-    
     fbmin= check_bidir_mv(s, motion_fx, motion_fy,
                           motion_bx, motion_by,
                           pred_fx, pred_fy,
                           pred_bx, pred_by,
                           0, 16);
 
-   return fbmin;
+    if(s->avctx->bidir_refine){
+        int score, end;
+#define CHECK_BIDIR(fx,fy,bx,by)\
+    if( !BIDIR_MAP(fx,fy,bx,by)\
+       &&(fx<=0 || motion_fx+fx<=xmax) && (fy<=0 || motion_fy+fy<=ymax) && (bx<=0 || motion_bx+bx<=xmax) && (by<=0 || motion_by+by<=ymax)\
+       &&(fx>=0 || motion_fx+fx>=xmin) && (fy>=0 || motion_fy+fy>=ymin) && (bx>=0 || motion_bx+bx>=xmin) && (by>=0 || motion_by+by>=ymin)){\
+        BIDIR_MAP(fx,fy,bx,by) = 1;\
+        score= check_bidir_mv(s, motion_fx+fx, motion_fy+fy, motion_bx+bx, motion_by+by, pred_fx, pred_fy, pred_bx, pred_by, 0, 16);\
+        if(score < fbmin){\
+            fbmin= score;\
+            motion_fx+=fx;\
+            motion_fy+=fy;\
+            motion_bx+=bx;\
+            motion_by+=by;\
+            end=0;\
+        }\
+    }
+#define CHECK_BIDIR2(a,b,c,d)\
+CHECK_BIDIR(a,b,c,d)\
+CHECK_BIDIR(-a,-b,-c,-d)
+
+#define CHECK_BIDIRR(a,b,c,d)\
+CHECK_BIDIR2(a,b,c,d)\
+CHECK_BIDIR2(b,c,d,a)\
+CHECK_BIDIR2(c,d,a,b)\
+CHECK_BIDIR2(d,a,b,c)
+
+        do{
+            end=1;
+
+            CHECK_BIDIRR( 0, 0, 0, 1)
+            if(s->avctx->bidir_refine > 1){
+                CHECK_BIDIRR( 0, 0, 1, 1)
+                CHECK_BIDIR2( 0, 1, 0, 1)
+                CHECK_BIDIR2( 1, 0, 1, 0)
+                CHECK_BIDIRR( 0, 0,-1, 1)
+                CHECK_BIDIR2( 0,-1, 0, 1)
+                CHECK_BIDIR2(-1, 0, 1, 0)
+                if(s->avctx->bidir_refine > 2){
+                    CHECK_BIDIRR( 0, 1, 1, 1)
+                    CHECK_BIDIRR( 0,-1, 1, 1)
+                    CHECK_BIDIRR( 0, 1,-1, 1)
+                    CHECK_BIDIRR( 0, 1, 1,-1)
+                    if(s->avctx->bidir_refine > 3){
+                        CHECK_BIDIR2( 1, 1, 1, 1)
+                        CHECK_BIDIRR( 1, 1, 1,-1)
+                        CHECK_BIDIR2( 1, 1,-1,-1)
+                        CHECK_BIDIR2( 1,-1,-1, 1)
+                        CHECK_BIDIR2( 1,-1, 1,-1)
+                    }
+                }
+            }
+        }while(!end);
+    }
+
+    s->b_bidir_forw_mv_table[xy][0]= motion_fx;
+    s->b_bidir_forw_mv_table[xy][1]= motion_fy;
+    s->b_bidir_back_mv_table[xy][0]= motion_bx;
+    s->b_bidir_back_mv_table[xy][1]= motion_by;
+
+    return fbmin;
 }
 
 static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
@@ -1654,7 +1727,7 @@ static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
     const int time_pb= s->pb_time;
     int mx, my, xmin, xmax, ymin, ymax;
     int16_t (*mv_table)[2]= s->b_direct_mv_table;
-    
+
     c->current_mv_penalty= c->mv_penalty[1] + MAX_MV;
     ymin= xmin=(-32)>>shift;
     ymax= xmax=   31>>shift;
@@ -1668,7 +1741,7 @@ static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
     for(i=0; i<4; i++){
         int index= s->block_index[i];
         int min, max;
-    
+
         c->co_located_mv[i][0]= s->next_picture.motion_val[0][index][0];
         c->co_located_mv[i][1]= s->next_picture.motion_val[0][index][1];
         c->direct_basis_mv[i][0]= c->co_located_mv[i][0]*time_pb/time_pp + ((i& 1)<<(shift+3));
@@ -1689,19 +1762,19 @@ static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
         min+= 16*mb_y - 1;
         ymax= FFMIN(ymax, s->height - max);
         ymin= FFMAX(ymin, - 16      - min);
-        
+
         if(s->mv_type == MV_TYPE_16X16) break;
     }
-    
+
     assert(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16);
-    
+
     if(xmax < 0 || xmin >0 || ymax < 0 || ymin > 0){
         s->b_direct_mv_table[mot_xy][0]= 0;
         s->b_direct_mv_table[mot_xy][1]= 0;
 
         return 256*256*256*64;
     }
-    
+
     c->xmin= xmin;
     c->ymin= ymin;
     c->xmax= xmax;
@@ -1720,20 +1793,20 @@ static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y)
         P_TOP[1]      = clip(mv_table[mot_xy - mot_stride             ][1], ymin<<shift, ymax<<shift);
         P_TOPRIGHT[0] = clip(mv_table[mot_xy - mot_stride + 1         ][0], xmin<<shift, xmax<<shift);
         P_TOPRIGHT[1] = clip(mv_table[mot_xy - mot_stride + 1         ][1], ymin<<shift, ymax<<shift);
-    
+
         P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
         P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
     }
- 
+
     dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, mv_table, 1<<(16-shift), 0, 16);
-    if(c->sub_flags&FLAG_QPEL) 
+    if(c->sub_flags&FLAG_QPEL)
         dmin = qpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
     else
         dmin = hpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16);
-    
+
     if(c->avctx->me_sub_cmp != c->avctx->mb_cmp && !c->skip)
         dmin= ff_get_mb_score(s, mx, my, 0, 0, 0, 16, 1);
-    
+
     get_limits(s, 16*mb_x, 16*mb_y); //restore c->?min/max, maybe not needed
 
     s->b_direct_mv_table[mot_xy][0]= mx;
@@ -1755,29 +1828,29 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
     init_ref(c, s->new_picture.data, s->last_picture.data, s->next_picture.data, 16*mb_x, 16*mb_y, 2);
 
     get_limits(s, 16*mb_x, 16*mb_y);
-    
+
     c->skip=0;
     if(c->avctx->me_threshold){
-        int vard= (check_input_motion(s, mb_x, mb_y, 0)+128)>>8;
-        
-        if(vard<c->avctx->me_threshold){
+        int vard= check_input_motion(s, mb_x, mb_y, 0);
+
+        if((vard+128)>>8 < c->avctx->me_threshold){
 //            pix = c->src[0][0];
 //            sum = s->dsp.pix_sum(pix, s->linesize);
-//            varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
-        
-//            pic->mb_var   [s->mb_stride * mb_y + mb_x] = varc;
-             s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] = vard;
+//            varc = s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500;
+
+//            pic->mb_var   [s->mb_stride * mb_y + mb_x] = (varc+128)>>8;
+             s->current_picture.mc_mb_var[s->mb_stride * mb_y + mb_x] = (vard+128)>>8;
 /*            pic->mb_mean  [s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
-            c->mb_var_sum_temp    += varc;*/
-            c->mc_mb_var_sum_temp += vard;
-/*            if (vard <= 64 || vard < varc) {
+            c->mb_var_sum_temp    += (varc+128)>>8;*/
+            c->mc_mb_var_sum_temp += (vard+128)>>8;
+/*            if (vard <= 64<<8 || vard < varc) {
                 c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
             }else{
-                c->scene_change_score+= s->qscale;
+                c->scene_change_score+= s->qscale * s->avctx->scenechange_factor;
             }*/
             return;
         }
-        if(vard<c->avctx->mb_threshold){
+        if((vard+128)>>8 < c->avctx->mb_threshold){
             type= s->mb_type[mb_y*s->mb_stride + mb_x];
             if(type == CANDIDATE_MB_TYPE_DIRECT){
                 direct_search(s, mb_x, mb_y);
@@ -1815,7 +1888,7 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
 //FIXME penalty stuff for non mpeg4
     c->skip=0;
     fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, 0, s->f_code) + 3*penalty_factor;
-    
+
     c->skip=0;
     bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, 2, s->b_code) + 2*penalty_factor;
 //printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
@@ -1823,7 +1896,7 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
     c->skip=0;
     fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor;
 //printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin);
-    
+
     if(s->flags & CODEC_FLAG_INTERLACED_ME){
 //FIXME mb type penalty
         c->skip=0;
@@ -1841,14 +1914,14 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
     {
         int score= fmin;
         type = CANDIDATE_MB_TYPE_FORWARD;
-        
+
         if (dmin <= score){
             score = dmin;
             type = CANDIDATE_MB_TYPE_DIRECT;
         }
         if(bmin<score){
             score=bmin;
-            type= CANDIDATE_MB_TYPE_BACKWARD; 
+            type= CANDIDATE_MB_TYPE_BACKWARD;
         }
         if(fbmin<score){
             score=fbmin;
@@ -1862,7 +1935,7 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
             score=bimin;
             type= CANDIDATE_MB_TYPE_BACKWARD_I;
         }
-        
+
         score= ((unsigned)(score*score + 128*256))>>16;
         c->mc_mb_var_sum_temp += score;
         s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
@@ -1879,7 +1952,7 @@ void ff_estimate_b_frame_motion(MpegEncContext * s,
         }
          //FIXME something smarter
         if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
-#if 0        
+#if 0
         if(s->out_format == FMT_MPEG1)
             type |= CANDIDATE_MB_TYPE_INTRA;
 #endif
@@ -1898,7 +1971,7 @@ int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
         int best_fcode=-1;
         int best_score=-10000000;
 
-        if(s->msmpeg4_version) 
+        if(s->msmpeg4_version)
             range= FFMIN(range, 16);
         else if(s->codec_id == CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL)
             range= FFMIN(range, 256);
@@ -1915,11 +1988,11 @@ int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
                     int fcode= FFMAX(fcode_tab[mx + MAX_MV],
                                      fcode_tab[my + MAX_MV]);
                     int j;
-                    
-                        if(mx >= range || mx < -range || 
+
+                        if(mx >= range || mx < -range ||
                            my >= range || my < -range)
                             continue;
-                    
+
                     for(j=0; j<fcode && j<8; j++){
                         if(s->pict_type==B_TYPE || s->current_picture.mc_mb_var[xy] < s->current_picture.mb_var[xy])
                             score[j]-= 170;
@@ -1928,7 +2001,7 @@ int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
                 xy++;
             }
         }
-        
+
         for(i=1; i<8; i++){
             if(score[i] > best_score){
                 best_score= score[i];
@@ -1959,9 +2032,9 @@ void ff_fix_long_p_mvs(MpegEncContext * s)
 
     assert(range <= 16 || !s->msmpeg4_version);
     assert(range <=256 || !(s->codec_id == CODEC_ID_MPEG2VIDEO && s->avctx->strict_std_compliance >= FF_COMPLIANCE_NORMAL));
-    
+
     if(c->avctx->me_range && range > c->avctx->me_range) range= c->avctx->me_range;
-    
+
 //printf("%d no:%d %d//\n", clip, noclip, f_code);
     if(s->flags&CODEC_FLAG_4MV){
         const int wrap= s->b8_stride;
@@ -1999,7 +2072,7 @@ void ff_fix_long_p_mvs(MpegEncContext * s)
  *
  * @param truncate 1 for truncation, 0 for using intra
  */
-void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select, 
+void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select,
                      int16_t (*mv_table)[2], int f_code, int type, int truncate)
 {
     MotionEstContext * const c= &s->me;
diff --git a/src/libffmpeg/libavcodec/motion_est_template.c b/src/libffmpeg/libavcodec/motion_est_template.c
index d8101ec33..23ead283c 100644
--- a/src/libffmpeg/libavcodec/motion_est_template.c
+++ b/src/libffmpeg/libavcodec/motion_est_template.c
@@ -1,5 +1,5 @@
 /*
- * Motion estimation 
+ * Motion estimation
  * Copyright (c) 2002-2004 Michael Niedermayer
  *
  * This library is free software; you can redistribute it and/or
@@ -14,10 +14,10 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
- 
+
 /**
  * @file motion_est_template.c
  * Motion estimation template.
@@ -45,8 +45,8 @@
 
 #if 0
 static int hpel_motion_search)(MpegEncContext * s,
-				  int *mx_ptr, int *my_ptr, int dmin,
-                                  uint8_t *ref_data[3], 
+                                  int *mx_ptr, int *my_ptr, int dmin,
+                                  uint8_t *ref_data[3],
                                   int size)
 {
     const int xx = 16 * s->mb_x + 8*(n&1);
@@ -54,9 +54,9 @@ static int hpel_motion_search)(MpegEncContext * s,
     const int mx = *mx_ptr;
     const int my = *my_ptr;
     const int penalty_factor= c->sub_penalty_factor;
-    
+
     LOAD_COMMON
-    
+
  //   INIT;
  //FIXME factorize
     me_cmp_func cmp, chroma_cmp, cmp_sub, chroma_cmp_sub;
@@ -78,25 +78,25 @@ static int hpel_motion_search)(MpegEncContext * s,
         *my_ptr = 0;
         return dmin;
     }
-        
+
     if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
         CMP_HPEL(dmin, 0, 0, mx, my, size);
         if(mx || my)
             dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
     }
-        
-    if (mx > xmin && mx < xmax && 
+
+    if (mx > xmin && mx < xmax &&
         my > ymin && my < ymax) {
         int bx=2*mx, by=2*my;
         int d= dmin;
-        
+
         CHECK_HALF_MV(1, 1, mx-1, my-1)
-        CHECK_HALF_MV(0, 1, mx  , my-1)        
+        CHECK_HALF_MV(0, 1, mx  , my-1)
         CHECK_HALF_MV(1, 1, mx  , my-1)
         CHECK_HALF_MV(1, 0, mx-1, my  )
         CHECK_HALF_MV(1, 0, mx  , my  )
         CHECK_HALF_MV(1, 1, mx-1, my  )
-        CHECK_HALF_MV(0, 1, mx  , my  )        
+        CHECK_HALF_MV(0, 1, mx  , my  )
         CHECK_HALF_MV(1, 1, mx  , my  )
 
         assert(bx >= xmin*2 || bx <= xmax*2 || by >= ymin*2 || by <= ymax*2);
@@ -113,20 +113,20 @@ static int hpel_motion_search)(MpegEncContext * s,
 
 #else
 static int hpel_motion_search(MpegEncContext * s,
-				  int *mx_ptr, int *my_ptr, int dmin,
+                                  int *mx_ptr, int *my_ptr, int dmin,
                                   int src_index, int ref_index,
                                   int size, int h)
 {
     MotionEstContext * const c= &s->me;
     const int mx = *mx_ptr;
-    const int my = *my_ptr;   
+    const int my = *my_ptr;
     const int penalty_factor= c->sub_penalty_factor;
     me_cmp_func cmp_sub, chroma_cmp_sub;
     int bx=2*mx, by=2*my;
 
     LOAD_COMMON
     int flags= c->sub_flags;
-    
+
  //FIXME factorize
 
     cmp_sub= s->dsp.me_sub_cmp[size];
@@ -137,18 +137,18 @@ static int hpel_motion_search(MpegEncContext * s,
         *my_ptr = 0;
         return dmin;
     }
-        
+
     if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
         dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
         if(mx || my || size>0)
             dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
     }
-        
-    if (mx > xmin && mx < xmax && 
+
+    if (mx > xmin && mx < xmax &&
         my > ymin && my < ymax) {
         int d= dmin;
         const int index= (my<<ME_MAP_SHIFT) + mx;
-        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] 
+        const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
                      + (mv_penalty[bx   - pred_x] + mv_penalty[by-2 - pred_y])*c->penalty_factor;
         const int l= score_map[(index- 1               )&(ME_MAP_SIZE-1)]
                      + (mv_penalty[bx-2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
@@ -156,7 +156,7 @@ static int hpel_motion_search(MpegEncContext * s,
                      + (mv_penalty[bx+2 - pred_x] + mv_penalty[by   - pred_y])*c->penalty_factor;
         const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)]
                      + (mv_penalty[bx   - pred_x] + mv_penalty[by+2 - pred_y])*c->penalty_factor;
-    
+
 #if 1
         int key;
         int map_generation= c->map_generation;
@@ -171,7 +171,7 @@ static int hpel_motion_search(MpegEncContext * s,
         assert(map[(index+1)&(ME_MAP_SIZE-1)] == key);
         key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation;
         assert(map[(index-1)&(ME_MAP_SIZE-1)] == key);
-#endif                
+#endif
         if(t<=b){
             CHECK_HALF_MV(0, 1, mx  ,my-1)
             if(l<=r){
@@ -216,7 +216,7 @@ static int hpel_motion_search(MpegEncContext * s,
 
     *mx_ptr = bx;
     *my_ptr = by;
-    
+
     return dmin;
 }
 #endif
@@ -244,12 +244,12 @@ int inline ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
     int d;
 
     LOAD_COMMON
-    
+
  //FIXME factorize
 
     cmp_sub= s->dsp.mb_cmp[size];
     chroma_cmp_sub= s->dsp.mb_cmp[size+1];
-    
+
 //    assert(!c->skip);
 //    assert(c->avctx->me_sub_cmp != c->avctx->mb_cmp);
 
@@ -257,7 +257,7 @@ int inline ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
     //FIXME check cbp before adding penalty for (0,0) vector
     if(add_rate && (mx || my || size>0))
         d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
-        
+
     return d;
 }
 
@@ -271,13 +271,13 @@ int inline ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
 }
 
 static int qpel_motion_search(MpegEncContext * s,
-				  int *mx_ptr, int *my_ptr, int dmin,
-                                  int src_index, int ref_index,                                  
+                                  int *mx_ptr, int *my_ptr, int dmin,
+                                  int src_index, int ref_index,
                                   int size, int h)
 {
     MotionEstContext * const c= &s->me;
     const int mx = *mx_ptr;
-    const int my = *my_ptr;   
+    const int my = *my_ptr;
     const int penalty_factor= c->sub_penalty_factor;
     const int map_generation= c->map_generation;
     const int subpel_quality= c->avctx->me_subpel_quality;
@@ -287,7 +287,7 @@ static int qpel_motion_search(MpegEncContext * s,
 
     LOAD_COMMON
     int flags= c->sub_flags;
-    
+
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
  //FIXME factorize
@@ -300,14 +300,14 @@ static int qpel_motion_search(MpegEncContext * s,
         *my_ptr = 0;
         return dmin;
     }
-        
+
     if(c->avctx->me_cmp != c->avctx->me_sub_cmp){
         dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
         if(mx || my || size>0)
             dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
     }
-        
-    if (mx > xmin && mx < xmax && 
+
+    if (mx > xmin && mx < xmax &&
         my > ymin && my < ymax) {
         int bx=4*mx, by=4*my;
         int d= dmin;
@@ -320,10 +320,10 @@ static int qpel_motion_search(MpegEncContext * s,
         const int c= score_map[(index                    )&(ME_MAP_SIZE-1)];
         int best[8];
         int best_pos[8][2];
-        
+
         memset(best, 64, sizeof(int)*8);
 #if 1
-        if(s->me.dia_size>=2){        
+        if(s->me.dia_size>=2){
             const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
             const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
             const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)];
@@ -337,14 +337,14 @@ static int qpel_motion_search(MpegEncContext * s,
                     const int64_t b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b;
                     int score= (ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2 + 512)>>10;
                     int i;
-                    
+
                     if((nx&3)==0 && (ny&3)==0) continue;
-                    
+
                     score += (mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
-                    
+
 //                    if(nx&1) score-=1024*c->penalty_factor;
 //                    if(ny&1) score-=1024*c->penalty_factor;
-                    
+
                     for(i=0; i<8; i++){
                         if(score < best[i]){
                             memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
@@ -361,37 +361,37 @@ static int qpel_motion_search(MpegEncContext * s,
             int tl;
             //FIXME this could overflow (unlikely though)
             const int cx = 4*(r - l);
-            const int cx2= r + l - 2*c; 
+            const int cx2= r + l - 2*c;
             const int cy = 4*(b - t);
             const int cy2= b + t - 2*c;
             int cxy;
-              
+
             if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
                 tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
             }else{
                 tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
             }
-            
-            cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c; 
-           
+
+            cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
+
             assert(16*cx2 + 4*cx + 32*c == 32*r);
             assert(16*cx2 - 4*cx + 32*c == 32*l);
             assert(16*cy2 + 4*cy + 32*c == 32*b);
             assert(16*cy2 - 4*cy + 32*c == 32*t);
             assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl);
-            
+
             for(ny= -3; ny <= 3; ny++){
                 for(nx= -3; nx <= 3; nx++){
                     //FIXME this could overflow (unlikely though)
                     int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor
                     int i;
-                    
+
                     if((nx&3)==0 && (ny&3)==0) continue;
-                
+
                     score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor;
 //                    if(nx&1) score-=32*c->penalty_factor;
   //                  if(ny&1) score-=32*c->penalty_factor;
-                    
+
                     for(i=0; i<8; i++){
                         if(score < best[i]){
                             memmove(&best[i+1], &best[i], sizeof(int)*(7-i));
@@ -403,7 +403,7 @@ static int qpel_motion_search(MpegEncContext * s,
                         }
                     }
                 }
-            }            
+            }
         }
         for(i=0; i<subpel_quality; i++){
             nx= best_pos[i][0];
@@ -421,7 +421,7 @@ static int qpel_motion_search(MpegEncContext * s,
 
 //            nx= FFMAX(4*mx - bx, bx - 4*mx);
 //            ny= FFMAX(4*my - by, by - 4*my);
-            
+
             static int stats[7][7], count;
             count++;
             stats[4*mx - bx + 3][4*my - by + 3]++;
@@ -437,17 +437,17 @@ static int qpel_motion_search(MpegEncContext * s,
 #else
 
         CHECK_QUARTER_MV(2, 2, mx-1, my-1)
-        CHECK_QUARTER_MV(0, 2, mx  , my-1)        
+        CHECK_QUARTER_MV(0, 2, mx  , my-1)
         CHECK_QUARTER_MV(2, 2, mx  , my-1)
         CHECK_QUARTER_MV(2, 0, mx  , my  )
         CHECK_QUARTER_MV(2, 2, mx  , my  )
         CHECK_QUARTER_MV(0, 2, mx  , my  )
         CHECK_QUARTER_MV(2, 2, mx-1, my  )
         CHECK_QUARTER_MV(2, 0, mx-1, my  )
-        
+
         nx= bx;
         ny= by;
-        
+
         for(i=0; i<8; i++){
             int ox[8]= {0, 1, 1, 1, 0,-1,-1,-1};
             int oy[8]= {1, 1, 0,-1,-1,-1, 0, 1};
@@ -563,7 +563,7 @@ static always_inline int small_diamond_search(MpegEncContext * s, int *best, int
     LOAD_COMMON
     LOAD_COMMON2
     int map_generation= c->map_generation;
-    
+
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
 
@@ -605,7 +605,7 @@ static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
     LOAD_COMMON
     LOAD_COMMON2
     int map_generation= c->map_generation;
-    
+
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
 
@@ -613,7 +613,7 @@ static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
         int dir;
         const int x= best[0];
         const int y= best[1];
-        
+
         if(dia_size&(dia_size-1)) continue;
 
         if(   x + dia_size > xmax
@@ -621,7 +621,7 @@ static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
            || y + dia_size > ymax
            || y - dia_size < ymin)
            continue;
-        
+
         for(dir= 0; dir<dia_size; dir+=2){
             int d;
 
@@ -653,7 +653,7 @@ if(256*256*256*64 % (stats[0]+1)==0){
 }
 #endif
     }
-    return dmin;    
+    return dmin;
 }
 
 #define SAB_CHECK_MV(ax,ay)\
@@ -698,19 +698,19 @@ static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
     LOAD_COMMON
     LOAD_COMMON2
     int map_generation= c->map_generation;
-    
+
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
-    
+
     for(j=i=0; i<ME_MAP_SIZE; i++){
         uint32_t key= map[i];
 
         key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
-        
+
         if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue;
-        
+
         assert(j<MAX_SAB_SIZE); //max j = number of predictors
-        
+
         minima[j].height= score_map[i];
         minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS;
         minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1);
@@ -719,25 +719,25 @@ static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
         minima[j].checked=0;
         if(minima[j].x || minima[j].y)
             minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor;
-        
+
         j++;
     }
-    
+
     qsort(minima, j, sizeof(Minima), minima_cmp);
-    
+
     for(; j<minima_count; j++){
         minima[j].height=256*256*256*64;
         minima[j].checked=0;
         minima[j].x= minima[j].y=0;
     }
-    
+
     for(i=0; i<minima_count; i++){
         const int x= minima[i].x;
         const int y= minima[i].y;
         int d;
-        
+
         if(minima[i].checked) continue;
-        
+
         if(   x >= xmax || x <= xmin
            || y >= ymax || y <= ymin)
            continue;
@@ -746,14 +746,14 @@ static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
         SAB_CHECK_MV(x+1, y)
         SAB_CHECK_MV(x  , y-1)
         SAB_CHECK_MV(x  , y+1)
-        
+
         minima[i].checked= 1;
     }
-    
+
     best[0]= minima[0].x;
     best[1]= minima[0].y;
     dmin= minima[0].height;
-    
+
     if(   best[0] < xmax && best[0] > xmin
        && best[1] < ymax && best[1] > ymin){
         int d;
@@ -763,7 +763,7 @@ static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
         CHECK_MV(best[0], best[1]-1)
         CHECK_MV(best[0], best[1]+1)
     }
-    return dmin;    
+    return dmin;
 }
 
 static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
@@ -776,7 +776,7 @@ static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
     LOAD_COMMON
     LOAD_COMMON2
     int map_generation= c->map_generation;
-    
+
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
 
@@ -840,7 +840,7 @@ if(256*256*256*64 % (stats[0]+1)==0){
 }
 #endif
     }
-    return dmin;    
+    return dmin;
 }
 
 static always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
@@ -858,7 +858,7 @@ static always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
 }
 
 static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
-                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2], 
+                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
                              int ref_mv_scale, int flags, int size, int h)
 {
     MotionEstContext * const c= &s->me;
@@ -869,10 +869,10 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx
     const int ref_mv_stride= s->mb_stride; //pass as arg  FIXME
     const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
     me_cmp_func cmpf, chroma_cmpf;
-    
+
     LOAD_COMMON
     LOAD_COMMON2
-    
+
     if(c->pre_pass){
         penalty_factor= c->pre_penalty_factor;
         cmpf= s->dsp.me_pre_cmp[size];
@@ -882,7 +882,7 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx
         cmpf= s->dsp.me_cmp[size];
         chroma_cmpf= s->dsp.me_cmp[size+1];
     }
-    
+
     map_generation= update_map_generation(c);
 
     assert(cmpf);
@@ -893,7 +893,7 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx
     /* first line */
     if (s->first_slice_line) {
         CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
-        CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, 
+        CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
     }else{
         if(dmin<h*h && ( P_LEFT[0]    |P_LEFT[1]
@@ -906,7 +906,7 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx
         }
         CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
         if(dmin>h*h*2){
-            CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, 
+            CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
                             (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
             CHECK_MV(P_LEFT[0]    >>shift, P_LEFT[1]    >>shift)
             CHECK_MV(P_TOP[0]     >>shift, P_TOP[1]     >>shift)
@@ -915,16 +915,16 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx
     }
     if(dmin>h*h*4){
         if(c->pre_pass){
-            CHECK_CLIPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16, 
+            CHECK_CLIPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16,
                             (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16)
             if(!s->first_slice_line)
-                CHECK_CLIPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, 
+                CHECK_CLIPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
                                 (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
         }else{
-            CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, 
+            CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
                             (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
             if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
-                CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, 
+                CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
                                 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
         }
     }
@@ -955,7 +955,7 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx
 
 //check(best[0],best[1],0, b1)
     *mx_ptr= best[0];
-    *my_ptr= best[1];    
+    *my_ptr= best[1];
 
 //    printf("%d %d %d \n", best[0], best[1], dmin);
     return dmin;
@@ -963,7 +963,7 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx
 
 //this function is dedicated to the braindamaged gcc
 inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
-                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2], 
+                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
                              int ref_mv_scale, int size, int h)
 {
     MotionEstContext * const c= &s->me;
@@ -979,12 +979,12 @@ inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
 
 static int epzs_motion_search4(MpegEncContext * s,
                              int *mx_ptr, int *my_ptr, int P[10][2],
-                             int src_index, int ref_index, int16_t (*last_mv)[2], 
+                             int src_index, int ref_index, int16_t (*last_mv)[2],
                              int ref_mv_scale)
 {
     MotionEstContext * const c= &s->me;
     int best[2]={0, 0};
-    int d, dmin; 
+    int d, dmin;
     int map_generation;
     const int penalty_factor= c->penalty_factor;
     const int size=1;
@@ -995,18 +995,18 @@ static int epzs_motion_search4(MpegEncContext * s,
     LOAD_COMMON
     int flags= c->flags;
     LOAD_COMMON2
-    
+
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
 
     map_generation= update_map_generation(c);
 
     dmin = 1000000;
-//printf("%d %d %d %d //",xmin, ymin, xmax, ymax); 
+//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
     /* first line */
     if (s->first_slice_line) {
-	CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
-        CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, 
+        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
+        CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
         CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
     }else{
@@ -1017,22 +1017,22 @@ static int epzs_motion_search4(MpegEncContext * s,
             CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
             CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
             CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
-            CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, 
+            CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
                             (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
         }
     }
     if(dmin>64*4){
-        CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, 
+        CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
                         (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
         if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
-            CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, 
+            CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
                             (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
     }
 
     dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
 
     *mx_ptr= best[0];
-    *my_ptr= best[1];    
+    *my_ptr= best[1];
 
 //    printf("%d %d %d \n", best[0], best[1], dmin);
     return dmin;
@@ -1041,12 +1041,12 @@ static int epzs_motion_search4(MpegEncContext * s,
 //try to merge with above FIXME (needs PSNR test)
 static int epzs_motion_search2(MpegEncContext * s,
                              int *mx_ptr, int *my_ptr, int P[10][2],
-                             int src_index, int ref_index, int16_t (*last_mv)[2], 
+                             int src_index, int ref_index, int16_t (*last_mv)[2],
                              int ref_mv_scale)
 {
     MotionEstContext * const c= &s->me;
     int best[2]={0, 0};
-    int d, dmin; 
+    int d, dmin;
     int map_generation;
     const int penalty_factor= c->penalty_factor;
     const int size=0; //FIXME pass as arg
@@ -1057,18 +1057,18 @@ static int epzs_motion_search2(MpegEncContext * s,
     LOAD_COMMON
     int flags= c->flags;
     LOAD_COMMON2
-    
+
     cmpf= s->dsp.me_cmp[size];
     chroma_cmpf= s->dsp.me_cmp[size+1];
 
     map_generation= update_map_generation(c);
 
     dmin = 1000000;
-//printf("%d %d %d %d //",xmin, ymin, xmax, ymax); 
+//printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
     /* first line */
     if (s->first_slice_line) {
-	CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
-        CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, 
+        CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
+        CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
                         (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
         CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift)
     }else{
@@ -1079,22 +1079,22 @@ static int epzs_motion_search2(MpegEncContext * s,
             CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
             CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift)
             CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
-            CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, 
+            CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16,
                             (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16)
         }
     }
     if(dmin>64*4){
-        CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, 
+        CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16,
                         (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16)
         if(s->mb_y+1<s->end_mb_y)  //FIXME replace at least with last_slice_line
-            CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, 
+            CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
                             (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
     }
 
     dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
 
     *mx_ptr= best[0];
-    *my_ptr= best[1];    
+    *my_ptr= best[1];
 
 //    printf("%d %d %d \n", best[0], best[1], dmin);
     return dmin;
diff --git a/src/libffmpeg/libavcodec/mpeg12.c b/src/libffmpeg/libavcodec/mpeg12.c
index 8b2b75d4e..ddecae85d 100644
--- a/src/libffmpeg/libavcodec/mpeg12.c
+++ b/src/libffmpeg/libavcodec/mpeg12.c
@@ -1,7 +1,7 @@
 /*
  * MPEG1 codec / MPEG2 decoder
  * Copyright (c) 2000,2001 Fabrice Bellard.
- * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> 
+ * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -15,14 +15,14 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
- 
+
 /**
  * @file mpeg12.c
  * MPEG1/2 codec
  */
- 
+
 //#define DEBUG
 #include "avcodec.h"
 #include "dsputil.h"
@@ -42,14 +42,14 @@
 
 
 /* Start codes. */
-#define SEQ_END_CODE		0x000001b7
-#define SEQ_START_CODE		0x000001b3
-#define GOP_START_CODE		0x000001b8
-#define PICTURE_START_CODE	0x00000100
-#define SLICE_MIN_START_CODE	0x00000101
-#define SLICE_MAX_START_CODE	0x000001af
-#define EXT_START_CODE		0x000001b5
-#define USER_START_CODE		0x000001b2
+#define SEQ_END_CODE            0x000001b7
+#define SEQ_START_CODE          0x000001b3
+#define GOP_START_CODE          0x000001b8
+#define PICTURE_START_CODE      0x00000100
+#define SLICE_MIN_START_CODE    0x00000101
+#define SLICE_MAX_START_CODE    0x000001af
+#define EXT_START_CODE          0x000001b5
+#define USER_START_CODE         0x000001b2
 
 #define DC_VLC_BITS 9
 #define MV_VLC_BITS 9
@@ -60,23 +60,23 @@
 #define TEX_VLC_BITS 9
 
 #ifdef CONFIG_ENCODERS
-static void mpeg1_encode_block(MpegEncContext *s, 
-                         DCTELEM *block, 
+static void mpeg1_encode_block(MpegEncContext *s,
+                         DCTELEM *block,
                          int component);
 static void mpeg1_encode_motion(MpegEncContext *s, int val, int f_or_b_code);    // RAL: f_code parameter added
 #endif //CONFIG_ENCODERS
-static inline int mpeg1_decode_block_inter(MpegEncContext *s, 
-                              DCTELEM *block, 
+static inline int mpeg1_decode_block_inter(MpegEncContext *s,
+                              DCTELEM *block,
                               int n);
-static inline int mpeg1_decode_block_intra(MpegEncContext *s, 
-                              DCTELEM *block, 
+static inline int mpeg1_decode_block_intra(MpegEncContext *s,
+                              DCTELEM *block,
                               int n);
 static inline int mpeg1_fast_decode_block_inter(MpegEncContext *s, DCTELEM *block, int n);
-static inline int mpeg2_decode_block_non_intra(MpegEncContext *s, 
-                                        DCTELEM *block, 
+static inline int mpeg2_decode_block_non_intra(MpegEncContext *s,
+                                        DCTELEM *block,
                                         int n);
-static inline int mpeg2_decode_block_intra(MpegEncContext *s, 
-                                    DCTELEM *block, 
+static inline int mpeg2_decode_block_intra(MpegEncContext *s,
+                                    DCTELEM *block,
                                     int n);
 static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s, DCTELEM *block, int n);
 static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s, DCTELEM *block, int n);
@@ -96,7 +96,7 @@ const enum PixelFormat pixfmt_yuv_444[]= {PIX_FMT_YUV444P,-1};
 const enum PixelFormat pixfmt_xvmc_mpg2_420[] = {
                                            PIX_FMT_XVMC_MPEG2_IDCT,
                                            PIX_FMT_XVMC_MPEG2_MC,
-					   -1};
+                                           -1};
 #ifdef CONFIG_ENCODERS
 static uint8_t (*mv_penalty)[MAX_MV*2+1]= NULL;
 static uint8_t fcode_tab[MAX_MV*2+1];
@@ -115,12 +115,12 @@ static int8_t mpeg1_max_level[2][64];
 static void init_2d_vlc_rl(RLTable *rl, int use_static)
 {
     int i;
-    
-    init_vlc(&rl->vlc, TEX_VLC_BITS, rl->n + 2, 
+
+    init_vlc(&rl->vlc, TEX_VLC_BITS, rl->n + 2,
              &rl->table_vlc[0][1], 4, 2,
              &rl->table_vlc[0][0], 4, 2, use_static);
 
-    if(use_static)    
+    if(use_static)
         rl->rl_vlc[0]= av_mallocz_static(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
     else
         rl->rl_vlc[0]= av_malloc(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
@@ -129,7 +129,7 @@ static void init_2d_vlc_rl(RLTable *rl, int use_static)
         int code= rl->vlc.table[i][0];
         int len = rl->vlc.table[i][1];
         int level, run;
-    
+
         if(len==0){ // illegal code
             run= 65;
             level= MAX_LEVEL;
@@ -163,7 +163,7 @@ static void init_uni_ac_vlc(RLTable *rl, uint32_t *uni_ac_vlc_bits, uint8_t *uni
         int run;
         for(run=0; run<64; run++){
             int len, bits, code;
-            
+
             int alevel= ABS(level);
             int sign= (level>>31)&1;
 
@@ -173,7 +173,7 @@ static void init_uni_ac_vlc(RLTable *rl, uint32_t *uni_ac_vlc_bits, uint8_t *uni
                 code= rl->index_run[0][run] + alevel - 1;
 
             if (code < 111 /* rl->n */) {
-	    	/* store the vlc & sign at once */
+                /* store the vlc & sign at once */
                 len=   mpeg1_vlc[code][1]+1;
                 bits= (mpeg1_vlc[code][0]<<1) + sign;
             } else {
@@ -239,7 +239,7 @@ static int encode_init(AVCodecContext *avctx)
             av_log(avctx, AV_LOG_INFO, "MPEG1/2 does not support %d/%d fps, there may be AV sync issues\n", avctx->time_base.den, avctx->time_base.num);
         }
     }
-    
+
     return 0;
 }
 
@@ -260,36 +260,36 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s)
         float best_aspect_error= 1E10;
         float aspect_ratio= av_q2d(s->avctx->sample_aspect_ratio);
         int constraint_parameter_flag;
-        
+
         if(aspect_ratio==0.0) aspect_ratio= 1.0; //pixel aspect 1:1 (VGA)
-        
+
         if (s->current_picture.key_frame) {
             AVRational framerate= frame_rate_tab[s->frame_rate_index];
 
             /* mpeg1 header repeated every gop */
             put_header(s, SEQ_START_CODE);
- 
+
             put_bits(&s->pb, 12, s->width);
             put_bits(&s->pb, 12, s->height);
-            
+
             for(i=1; i<15; i++){
                 float error= aspect_ratio;
                 if(s->codec_id == CODEC_ID_MPEG1VIDEO || i <=1)
                     error-= 1.0/mpeg1_aspect[i];
                 else
                     error-= av_q2d(mpeg2_aspect[i])*s->height/s->width;
-             
+
                 error= ABS(error);
-                
+
                 if(error < best_aspect_error){
                     best_aspect_error= error;
                     s->aspect_ratio_info= i;
                 }
             }
-            
+
             put_bits(&s->pb, 4, s->aspect_ratio_info);
             put_bits(&s->pb, 4, s->frame_rate_index);
-            
+
             if(s->avctx->rc_max_rate){
                 v = (s->avctx->rc_max_rate + 399) / 400;
                 if (v > 0x3ffff && s->codec_id == CODEC_ID_MPEG1VIDEO)
@@ -309,8 +309,8 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s)
             put_bits(&s->pb, 1, 1); /* marker */
             put_bits(&s->pb, 10, vbv_buffer_size & 0x3FF);
 
-            constraint_parameter_flag= 
-                s->width <= 768 && s->height <= 576 && 
+            constraint_parameter_flag=
+                s->width <= 768 && s->height <= 576 &&
                 s->mb_width * s->mb_height <= 396 &&
                 s->mb_width * s->mb_height * framerate.num <= framerate.den*396*25 &&
                 framerate.num <= framerate.den*30 &&
@@ -318,9 +318,9 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s)
                 vbv_buffer_size <= 20 &&
                 v <= 1856000/400 &&
                 s->codec_id == CODEC_ID_MPEG1VIDEO;
-                
+
             put_bits(&s->pb, 1, constraint_parameter_flag);
-            
+
             ff_write_quant_matrix(&s->pb, s->avctx->intra_matrix);
             ff_write_quant_matrix(&s->pb, s->avctx->inter_matrix);
 
@@ -328,7 +328,7 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s)
                 put_header(s, EXT_START_CODE);
                 put_bits(&s->pb, 4, 1); //seq ext
                 put_bits(&s->pb, 1, 0); //esc
-                
+
                 if(s->avctx->profile == FF_PROFILE_UNKNOWN){
                     put_bits(&s->pb, 3, 4); //profile
                 }else{
@@ -352,7 +352,7 @@ static void mpeg1_encode_sequence_header(MpegEncContext *s)
                 put_bits(&s->pb, 2, 0); // frame_rate_ext_n
                 put_bits(&s->pb, 5, 0); // frame_rate_ext_d
             }
-            
+
             put_header(s, GOP_START_CODE);
             put_bits(&s->pb, 1, 0); /* do drop frame */
             /* time code : we must convert from the real frame rate to a
@@ -376,7 +376,7 @@ static inline void encode_mb_skip_run(MpegEncContext *s, int run){
         put_bits(&s->pb, 11, 0x008);
         run -= 33;
     }
-    put_bits(&s->pb, mbAddrIncrTable[run][1], 
+    put_bits(&s->pb, mbAddrIncrTable[run][1],
              mbAddrIncrTable[run][0]);
 }
 #endif //CONFIG_ENCODERS
@@ -413,13 +413,13 @@ void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number)
     /* temporal reference */
 
     // RAL: s->picture_number instead of s->fake_picture_number
-    put_bits(&s->pb, 10, (s->picture_number - 
-                          s->gop_picture_number) & 0x3ff); 
+    put_bits(&s->pb, 10, (s->picture_number -
+                          s->gop_picture_number) & 0x3ff);
     put_bits(&s->pb, 3, s->pict_type);
 
     s->vbv_delay_ptr= s->pb.buf + put_bits_count(&s->pb)/8;
     put_bits(&s->pb, 16, 0xFFFF); /* vbv_delay */
-    
+
     // RAL: Forward f_code also needed for B frames
     if (s->pict_type == P_TYPE || s->pict_type == B_TYPE) {
         put_bits(&s->pb, 1, 0); /* half pel coordinates */
@@ -428,7 +428,7 @@ void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number)
         else
             put_bits(&s->pb, 3, 7); /* forward_f_code */
     }
-    
+
     // RAL: Backward f_code necessary for B frames
     if (s->pict_type == B_TYPE) {
         put_bits(&s->pb, 1, 0); /* half pel coordinates */
@@ -457,7 +457,7 @@ void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number)
             put_bits(&s->pb, 8, 255);
         }
         put_bits(&s->pb, 2, s->intra_dc_precision);
-        
+
         assert(s->picture_structure == PICT_FRAME);
         put_bits(&s->pb, 2, s->picture_structure);
         if (s->progressive_sequence) {
@@ -468,7 +468,7 @@ void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number)
         /* XXX: optimize the generation of this flag with entropy
            measures */
         s->frame_pred_frame_dct = s->progressive_sequence;
-        
+
         put_bits(&s->pb, 1, s->frame_pred_frame_dct);
         put_bits(&s->pb, 1, s->concealment_motion_vectors);
         put_bits(&s->pb, 1, s->q_scale_type);
@@ -488,17 +488,17 @@ void mpeg1_encode_picture_header(MpegEncContext *s, int picture_number)
             put_bits(&s->pb, 8, svcd_scan_offset_placeholder[i]);
         }
     }
-    
+
     s->mb_y=0;
     ff_mpeg1_encode_slice_header(s);
 }
 
-static inline void put_mb_modes(MpegEncContext *s, int n, int bits, 
+static inline void put_mb_modes(MpegEncContext *s, int n, int bits,
                                 int has_mv, int field_motion)
 {
     put_bits(&s->pb, n, bits);
     if (!s->frame_pred_frame_dct) {
-        if (has_mv) 
+        if (has_mv)
             put_bits(&s->pb, 2, 2 - field_motion); /* motion_type: frame/field */
         put_bits(&s->pb, 1, s->interlaced_dct);
     }
@@ -519,9 +519,9 @@ void mpeg1_encode_mb(MpegEncContext *s,
         if (s->block_last_index[i] >= 0)
             cbp |= 1 << (5 - i);
     }
-    
+
     if (cbp == 0 && !first_mb && s->mv_type == MV_TYPE_16X16 &&
-        (mb_x != s->mb_width - 1 || (mb_y != s->mb_height - 1 && s->codec_id == CODEC_ID_MPEG1VIDEO)) && 
+        (mb_x != s->mb_width - 1 || (mb_y != s->mb_height - 1 && s->codec_id == CODEC_ID_MPEG1VIDEO)) &&
         ((s->pict_type == P_TYPE && (motion_x | motion_y) == 0) ||
         (s->pict_type == B_TYPE && s->mv_dir == s->last_mv_dir && (((s->mv_dir & MV_DIR_FORWARD) ? ((s->mv[0][0][0] - s->last_mv[0][0][0])|(s->mv[0][0][1] - s->last_mv[0][0][1])) : 0) |
         ((s->mv_dir & MV_DIR_BACKWARD) ? ((s->mv[1][0][0] - s->last_mv[1][0][0])|(s->mv[1][0][1] - s->last_mv[1][0][1])) : 0)) == 0))) {
@@ -531,7 +531,7 @@ void mpeg1_encode_mb(MpegEncContext *s,
         s->misc_bits++;
         s->last_bits++;
         if(s->pict_type == P_TYPE){
-            s->last_mv[0][1][0]= s->last_mv[0][0][0]= 
+            s->last_mv[0][1][0]= s->last_mv[0][0][0]=
             s->last_mv[0][1][1]= s->last_mv[0][0][1]= 0;
         }
     } else {
@@ -541,7 +541,7 @@ void mpeg1_encode_mb(MpegEncContext *s,
         }else{
             encode_mb_skip_run(s, s->mb_skip_run);
         }
-        
+
         if (s->pict_type == I_TYPE) {
             if(s->dquant && cbp){
                 put_mb_modes(s, 2, 1, 0, 0); /* macroblock_type : macroblock_quant = 1 */
@@ -563,7 +563,7 @@ void mpeg1_encode_mb(MpegEncContext *s,
             s->misc_bits+= get_bits_diff(s);
             s->i_count++;
             memset(s->last_mv, 0, sizeof(s->last_mv));
-        } else if (s->pict_type == P_TYPE) { 
+        } else if (s->pict_type == P_TYPE) {
             if(s->mv_type == MV_TYPE_16X16){
                 if (cbp != 0) {
                     if ((motion_x|motion_y) == 0) {
@@ -626,7 +626,7 @@ void mpeg1_encode_mb(MpegEncContext *s,
             if(cbp)
                 put_bits(&s->pb, mbPatTable[cbp][1], mbPatTable[cbp][0]);
             s->f_count++;
-        } else{  
+        } else{
             static const int mb_type_len[4]={0,3,4,2}; //bak,for,bi
 
             if(s->mv_type == MV_TYPE_16X16){
@@ -648,15 +648,15 @@ void mpeg1_encode_mb(MpegEncContext *s,
                 }
                 s->misc_bits += get_bits_diff(s);
                 if (s->mv_dir&MV_DIR_FORWARD){
-                    mpeg1_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code); 
-                    mpeg1_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code); 
+                    mpeg1_encode_motion(s, s->mv[0][0][0] - s->last_mv[0][0][0], s->f_code);
+                    mpeg1_encode_motion(s, s->mv[0][0][1] - s->last_mv[0][0][1], s->f_code);
                     s->last_mv[0][0][0]=s->last_mv[0][1][0]= s->mv[0][0][0];
                     s->last_mv[0][0][1]=s->last_mv[0][1][1]= s->mv[0][0][1];
                     s->f_count++;
                 }
                 if (s->mv_dir&MV_DIR_BACKWARD){
-                    mpeg1_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code); 
-                    mpeg1_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code); 
+                    mpeg1_encode_motion(s, s->mv[1][0][0] - s->last_mv[1][0][0], s->b_code);
+                    mpeg1_encode_motion(s, s->mv[1][0][1] - s->last_mv[1][0][1], s->b_code);
                     s->last_mv[1][0][0]=s->last_mv[1][1][0]= s->mv[1][0][0];
                     s->last_mv[1][0][1]=s->last_mv[1][1][1]= s->mv[1][0][1];
                     s->b_count++;
@@ -727,8 +727,8 @@ static void mpeg1_encode_motion(MpegEncContext *s, int val, int f_or_b_code)
         /* zero vector */
         code = 0;
         put_bits(&s->pb,
-                 mbMotionVectorTable[0][1], 
-                 mbMotionVectorTable[0][0]); 
+                 mbMotionVectorTable[0][1],
+                 mbMotionVectorTable[0][0]);
     } else {
         bit_size = f_or_b_code - 1;
         range = 1 << bit_size;
@@ -752,8 +752,8 @@ static void mpeg1_encode_motion(MpegEncContext *s, int val, int f_or_b_code)
         assert(code > 0 && code <= 16);
 
         put_bits(&s->pb,
-                 mbMotionVectorTable[code][1], 
-                 mbMotionVectorTable[code][0]); 
+                 mbMotionVectorTable[code][1],
+                 mbMotionVectorTable[code][0]);
 
         put_bits(&s->pb, 1, sign);
         if (bit_size > 0) {
@@ -771,38 +771,38 @@ void ff_mpeg1_encode_init(MpegEncContext *s)
     if(!done){
         int f_code;
         int mv;
-	int i;
+        int i;
 
         done=1;
         init_rl(&rl_mpeg1, 1);
 
-	for(i=0; i<64; i++)
-	{
-		mpeg1_max_level[0][i]= rl_mpeg1.max_level[0][i];
-		mpeg1_index_run[0][i]= rl_mpeg1.index_run[0][i];
-	}
-        
+        for(i=0; i<64; i++)
+        {
+                mpeg1_max_level[0][i]= rl_mpeg1.max_level[0][i];
+                mpeg1_index_run[0][i]= rl_mpeg1.index_run[0][i];
+        }
+
         init_uni_ac_vlc(&rl_mpeg1, uni_mpeg1_ac_vlc_bits, uni_mpeg1_ac_vlc_len);
 
-	/* build unified dc encoding tables */
-	for(i=-255; i<256; i++)
-	{
-		int adiff, index;
-		int bits, code;
-		int diff=i;
-
-		adiff = ABS(diff);
-		if(diff<0) diff--;
-		index = av_log2(2*adiff);
-
-		bits= vlc_dc_lum_bits[index] + index;
-		code= (vlc_dc_lum_code[index]<<index) + (diff & ((1 << index) - 1));
-		mpeg1_lum_dc_uni[i+255]= bits + (code<<8);
-		
-		bits= vlc_dc_chroma_bits[index] + index;
-		code= (vlc_dc_chroma_code[index]<<index) + (diff & ((1 << index) - 1));
-		mpeg1_chr_dc_uni[i+255]= bits + (code<<8);
-	}
+        /* build unified dc encoding tables */
+        for(i=-255; i<256; i++)
+        {
+                int adiff, index;
+                int bits, code;
+                int diff=i;
+
+                adiff = ABS(diff);
+                if(diff<0) diff--;
+                index = av_log2(2*adiff);
+
+                bits= vlc_dc_lum_bits[index] + index;
+                code= (vlc_dc_lum_code[index]<<index) + (diff & ((1 << index) - 1));
+                mpeg1_lum_dc_uni[i+255]= bits + (code<<8);
+
+                bits= vlc_dc_chroma_bits[index] + index;
+                code= (vlc_dc_chroma_code[index]<<index) + (diff & ((1 << index) - 1));
+                mpeg1_chr_dc_uni[i+255]= bits + (code<<8);
+        }
 
         mv_penalty= av_mallocz( sizeof(uint8_t)*(MAX_FCODE+1)*(2*MAX_MV+1) );
 
@@ -818,7 +818,7 @@ void ff_mpeg1_encode_init(MpegEncContext *s)
                     range = 1 << bit_size;
 
                     val=mv;
-                    if (val < 0) 
+                    if (val < 0)
                         val = -val;
                     val--;
                     code = (val >> bit_size) + 1;
@@ -832,7 +832,7 @@ void ff_mpeg1_encode_init(MpegEncContext *s)
                 mv_penalty[f_code][mv+MAX_MV]= len;
             }
         }
-        
+
 
         for(f_code=MAX_FCODE; f_code>0; f_code--){
             for(mv=-(8<<f_code); mv<(8<<f_code); mv++){
@@ -868,32 +868,32 @@ static inline void encode_dc(MpegEncContext *s, int diff, int component)
         }
         if (component == 0) {
             put_bits(
-                &s->pb, 
+                &s->pb,
                 vlc_dc_lum_bits[index] + index,
                 (vlc_dc_lum_code[index]<<index) + (diff & ((1 << index) - 1)));
         }else{
             put_bits(
-                &s->pb, 
+                &s->pb,
                 vlc_dc_chroma_bits[index] + index,
                 (vlc_dc_chroma_code[index]<<index) + (diff & ((1 << index) - 1)));
         }
   }else{
     if (component == 0) {
         put_bits(
-	    &s->pb, 
-	    mpeg1_lum_dc_uni[diff+255]&0xFF,
-	    mpeg1_lum_dc_uni[diff+255]>>8);
+            &s->pb,
+            mpeg1_lum_dc_uni[diff+255]&0xFF,
+            mpeg1_lum_dc_uni[diff+255]>>8);
     } else {
         put_bits(
-            &s->pb, 
-	    mpeg1_chr_dc_uni[diff+255]&0xFF,
-	    mpeg1_chr_dc_uni[diff+255]>>8);
+            &s->pb,
+            mpeg1_chr_dc_uni[diff+255]&0xFF,
+            mpeg1_chr_dc_uni[diff+255]>>8);
     }
   }
 }
 
-static void mpeg1_encode_block(MpegEncContext *s, 
-                               DCTELEM *block, 
+static void mpeg1_encode_block(MpegEncContext *s,
+                               DCTELEM *block,
                                int n)
 {
     int alevel, level, last_non_zero, dc, diff, i, j, run, last_index, sign;
@@ -941,11 +941,11 @@ static void mpeg1_encode_block(MpegEncContext *s,
 #if 0
         if (level != 0)
             dprintf("level[%d]=%d\n", i, level);
-#endif            
+#endif
         /* encode using VLC */
         if (level != 0) {
             run = i - last_non_zero - 1;
-            
+
             alevel= level;
             MASK_ABS(sign, alevel)
             sign&=1;
@@ -953,10 +953,10 @@ static void mpeg1_encode_block(MpegEncContext *s,
 //            code = get_rl_index(rl, 0, run, alevel);
             if (alevel <= mpeg1_max_level[0][run]){
                 code= mpeg1_index_run[0][run] + alevel - 1;
-	    	/* store the vlc & sign at once */
+                /* store the vlc & sign at once */
                 put_bits(&s->pb, mpeg1_vlc[code][1]+1, (mpeg1_vlc[code][0]<<1) + sign);
             } else {
-		/* escape seems to be pretty rare <5% so i dont optimize it */
+                /* escape seems to be pretty rare <5% so i dont optimize it */
                 put_bits(&s->pb, mpeg1_vlc[111/*rl->n*/][1], mpeg1_vlc[111/*rl->n*/][0]);
                 /* escape: only clip in this case */
                 put_bits(&s->pb, 6, run);
@@ -1000,26 +1000,26 @@ static void init_vlcs(void)
     if (!done) {
         done = 1;
 
-        init_vlc(&dc_lum_vlc, DC_VLC_BITS, 12, 
+        init_vlc(&dc_lum_vlc, DC_VLC_BITS, 12,
                  vlc_dc_lum_bits, 1, 1,
                  vlc_dc_lum_code, 2, 2, 1);
-        init_vlc(&dc_chroma_vlc,  DC_VLC_BITS, 12, 
+        init_vlc(&dc_chroma_vlc,  DC_VLC_BITS, 12,
                  vlc_dc_chroma_bits, 1, 1,
                  vlc_dc_chroma_code, 2, 2, 1);
-        init_vlc(&mv_vlc, MV_VLC_BITS, 17, 
+        init_vlc(&mv_vlc, MV_VLC_BITS, 17,
                  &mbMotionVectorTable[0][1], 2, 1,
                  &mbMotionVectorTable[0][0], 2, 1, 1);
-        init_vlc(&mbincr_vlc, MBINCR_VLC_BITS, 36, 
+        init_vlc(&mbincr_vlc, MBINCR_VLC_BITS, 36,
                  &mbAddrIncrTable[0][1], 2, 1,
                  &mbAddrIncrTable[0][0], 2, 1, 1);
         init_vlc(&mb_pat_vlc, MB_PAT_VLC_BITS, 64,
                  &mbPatTable[0][1], 2, 1,
                  &mbPatTable[0][0], 2, 1, 1);
-        
-        init_vlc(&mb_ptype_vlc, MB_PTYPE_VLC_BITS, 7, 
+
+        init_vlc(&mb_ptype_vlc, MB_PTYPE_VLC_BITS, 7,
                  &table_mb_ptype[0][1], 2, 1,
                  &table_mb_ptype[0][0], 2, 1, 1);
-        init_vlc(&mb_btype_vlc, MB_BTYPE_VLC_BITS, 11, 
+        init_vlc(&mb_btype_vlc, MB_BTYPE_VLC_BITS, 11,
                  &table_mb_btype[0][1], 2, 1,
                  &table_mb_btype[0][0], 2, 1, 1);
         init_rl(&rl_mpeg1, 1);
@@ -1032,7 +1032,7 @@ static void init_vlcs(void)
 
 static inline int get_dmv(MpegEncContext *s)
 {
-    if(get_bits1(&s->gb)) 
+    if(get_bits1(&s->gb))
         return 1 - (get_bits1(&s->gb) << 1);
     else
         return 0;
@@ -1069,7 +1069,7 @@ static int mpeg_decode_mb(MpegEncContext *s,
             av_log(s->avctx, AV_LOG_ERROR, "skipped MB in I frame at %d %d\n", s->mb_x, s->mb_y);
             return -1;
         }
-    
+
         /* skip mb */
         s->mb_intra = 0;
         for(i=0;i<12;i++)
@@ -1089,25 +1089,25 @@ static int mpeg_decode_mb(MpegEncContext *s,
             s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride ]= MB_TYPE_SKIP | MB_TYPE_L0 | MB_TYPE_16x16;
         } else {
             int mb_type;
-            
+
             if(s->mb_x)
                 mb_type= s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride - 1];
             else
-                mb_type= s->current_picture.mb_type[ s->mb_width + (s->mb_y-1)*s->mb_stride - 1]; // FIXME not sure if this is allowed in mpeg at all, 
+                mb_type= s->current_picture.mb_type[ s->mb_width + (s->mb_y-1)*s->mb_stride - 1]; // FIXME not sure if this is allowed in mpeg at all,
             if(IS_INTRA(mb_type))
                 return -1;
-            
+
             /* if B type, reuse previous vectors and directions */
             s->mv[0][0][0] = s->last_mv[0][0][0];
             s->mv[0][0][1] = s->last_mv[0][0][1];
             s->mv[1][0][0] = s->last_mv[1][0][0];
             s->mv[1][0][1] = s->last_mv[1][0][1];
 
-            s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride ]= 
+            s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride ]=
                 mb_type | MB_TYPE_SKIP;
 //            assert(s->current_picture.mb_type[ s->mb_x + s->mb_y*s->mb_stride - 1]&(MB_TYPE_16x16|MB_TYPE_16x8));
 
-            if((s->mv[0][0][0]|s->mv[0][0][1]|s->mv[1][0][0]|s->mv[1][0][1])==0) 
+            if((s->mv[0][0][0]|s->mv[0][0][1]|s->mv[1][0][0]|s->mv[1][0][1])==0)
                 s->mb_skipped = 1;
         }
 
@@ -1148,11 +1148,11 @@ static int mpeg_decode_mb(MpegEncContext *s,
 //    motion_type = 0; /* avoid warning */
     if (IS_INTRA(mb_type)) {
         s->dsp.clear_blocks(s->block[0]);
-    
+
         if(!s->chroma_y_shift){
             s->dsp.clear_blocks(s->block[6]);
         }
-    
+
         /* compute dct type */
         if (s->picture_structure == PICT_FRAME && //FIXME add a interlaced_dct coded var?
             !s->frame_pred_frame_dct) {
@@ -1161,15 +1161,15 @@ static int mpeg_decode_mb(MpegEncContext *s,
 
         if (IS_QUANT(mb_type))
             s->qscale = get_qscale(s);
-        
+
         if (s->concealment_motion_vectors) {
             /* just parse them */
-            if (s->picture_structure != PICT_FRAME) 
+            if (s->picture_structure != PICT_FRAME)
                 skip_bits1(&s->gb); /* field select */
-            
-            s->mv[0][0][0]= s->last_mv[0][0][0]= s->last_mv[0][1][0] = 
+
+            s->mv[0][0][0]= s->last_mv[0][0][0]= s->last_mv[0][1][0] =
                 mpeg_decode_motion(s, s->mpeg_f_code[0][0], s->last_mv[0][0][0]);
-            s->mv[0][0][1]= s->last_mv[0][0][1]= s->last_mv[0][1][1] = 
+            s->mv[0][0][1]= s->last_mv[0][0][1]= s->last_mv[0][1][1] =
                 mpeg_decode_motion(s, s->mpeg_f_code[0][1], s->last_mv[0][0][1]);
 
             skip_bits1(&s->gb); /* marker */
@@ -1234,7 +1234,7 @@ static int mpeg_decode_mb(MpegEncContext *s,
             assert(mb_type & MB_TYPE_L0L1);
 //FIXME decide if MBs in field pictures are MB_TYPE_INTERLACED
             /* get additionnal motion vector type */
-            if (s->frame_pred_frame_dct) 
+            if (s->frame_pred_frame_dct)
                 motion_type = MT_FRAME;
             else{
                 motion_type = get_bits(&s->gb, 2);
@@ -1259,11 +1259,11 @@ static int mpeg_decode_mb(MpegEncContext *s,
                     case MT_FRAME: /* or MT_16X8 */
                         if (s->picture_structure == PICT_FRAME) {
                             /* MT_FRAME */
-                            mb_type |= MB_TYPE_16x16; 
+                            mb_type |= MB_TYPE_16x16;
                             s->mv_type = MV_TYPE_16X16;
-                            s->mv[i][0][0]= s->last_mv[i][0][0]= s->last_mv[i][1][0] = 
+                            s->mv[i][0][0]= s->last_mv[i][0][0]= s->last_mv[i][1][0] =
                                 mpeg_decode_motion(s, s->mpeg_f_code[i][0], s->last_mv[i][0][0]);
-                            s->mv[i][0][1]= s->last_mv[i][0][1]= s->last_mv[i][1][1] = 
+                            s->mv[i][0][1]= s->last_mv[i][0][1]= s->last_mv[i][1][1] =
                                 mpeg_decode_motion(s, s->mpeg_f_code[i][1], s->last_mv[i][0][1]);
                             /* full_pel: only for mpeg1 */
                             if (s->full_pel[i]){
@@ -1272,7 +1272,7 @@ static int mpeg_decode_mb(MpegEncContext *s,
                             }
                         } else {
                             /* MT_16X8 */
-                            mb_type |= MB_TYPE_16x8 | MB_TYPE_INTERLACED; 
+                            mb_type |= MB_TYPE_16x8 | MB_TYPE_INTERLACED;
                             s->mv_type = MV_TYPE_16X8;
                             for(j=0;j<2;j++) {
                                 s->field_select[i][j] = get_bits1(&s->gb);
@@ -1288,7 +1288,7 @@ static int mpeg_decode_mb(MpegEncContext *s,
                     case MT_FIELD:
                         s->mv_type = MV_TYPE_FIELD;
                         if (s->picture_structure == PICT_FRAME) {
-                            mb_type |= MB_TYPE_16x8 | MB_TYPE_INTERLACED; 
+                            mb_type |= MB_TYPE_16x8 | MB_TYPE_INTERLACED;
                             for(j=0;j<2;j++) {
                                 s->field_select[i][j] = get_bits1(&s->gb);
                                 val = mpeg_decode_motion(s, s->mpeg_f_code[i][0],
@@ -1303,7 +1303,7 @@ static int mpeg_decode_mb(MpegEncContext *s,
                                 dprintf("fmy=%d\n", val);
                             }
                         } else {
-                            mb_type |= MB_TYPE_16x16 | MB_TYPE_INTERLACED; 
+                            mb_type |= MB_TYPE_16x16 | MB_TYPE_INTERLACED;
                             s->field_select[i][0] = get_bits1(&s->gb);
                             for(k=0;k<2;k++) {
                                 val = mpeg_decode_motion(s, s->mpeg_f_code[i][k],
@@ -1318,12 +1318,12 @@ static int mpeg_decode_mb(MpegEncContext *s,
                         {
                             int dmx, dmy, mx, my, m;
 
-                            mx = mpeg_decode_motion(s, s->mpeg_f_code[i][0], 
+                            mx = mpeg_decode_motion(s, s->mpeg_f_code[i][0],
                                                     s->last_mv[i][0][0]);
                             s->last_mv[i][0][0] = mx;
                             s->last_mv[i][1][0] = mx;
                             dmx = get_dmv(s);
-                            my = mpeg_decode_motion(s, s->mpeg_f_code[i][1], 
+                            my = mpeg_decode_motion(s, s->mpeg_f_code[i][1],
                                                     s->last_mv[i][0][1] >> 1);
                             dmy = get_dmv(s);
                             s->mv_type = MV_TYPE_DMV;
@@ -1338,7 +1338,7 @@ static int mpeg_decode_mb(MpegEncContext *s,
                             s->mv[i][1][1] = my;//not used
 
                             if (s->picture_structure == PICT_FRAME) {
-                                mb_type |= MB_TYPE_16x16 | MB_TYPE_INTERLACED; 
+                                mb_type |= MB_TYPE_16x16 | MB_TYPE_INTERLACED;
 
                                 //m = 1 + 2 * s->top_field_first;
                                 m = s->top_field_first ? 1 : 3;
@@ -1356,7 +1356,7 @@ static int mpeg_decode_mb(MpegEncContext *s,
                                 s->mv[i][2][1] = ((my + (my > 0)) >> 1) + dmy;
                                 if(s->picture_structure == PICT_TOP_FIELD)
                                     s->mv[i][2][1]--;
-                                else 
+                                else
                                     s->mv[i][2][1]++;
                             }
                         }
@@ -1368,11 +1368,11 @@ static int mpeg_decode_mb(MpegEncContext *s,
                 }
             }
         }
-        
+
         s->mb_intra = 0;
         if (HAS_CBP(mb_type)) {
             s->dsp.clear_blocks(s->block[0]);
-        
+
             if(!s->chroma_y_shift){
                 s->dsp.clear_blocks(s->block[6]);
             }
@@ -1383,8 +1383,8 @@ static int mpeg_decode_mb(MpegEncContext *s,
                 return -1;
             }
             if(mb_block_count > 6){
-	         cbp<<= mb_block_count-6;
-		 cbp |= get_bits(&s->gb, mb_block_count-6);
+                 cbp<<= mb_block_count-6;
+                 cbp |= get_bits(&s->gb, mb_block_count-6);
             }
 
 #ifdef HAVE_XVMC
@@ -1394,7 +1394,7 @@ static int mpeg_decode_mb(MpegEncContext *s,
                 if(s->swap_uv){
                     exchange_uv(s);
                 }
-            }    
+            }
 #endif
 
             if (s->codec_id == CODEC_ID_MPEG2VIDEO) {
@@ -1409,7 +1409,7 @@ static int mpeg_decode_mb(MpegEncContext *s,
                     }
                 }else{
                     cbp<<= 12-mb_block_count;
-    
+
                     for(i=0;i<mb_block_count;i++) {
                         if ( cbp & (1<<11) ) {
                             if (mpeg2_decode_block_non_intra(s, s->pblocks[i], i) < 0)
@@ -1443,7 +1443,7 @@ static int mpeg_decode_mb(MpegEncContext *s,
                 }
             }
         }else{
-            for(i=0;i<6;i++)
+            for(i=0;i<12;i++)
                 s->block_last_index[i] = -1;
         }
     }
@@ -1477,7 +1477,7 @@ static int mpeg_decode_motion(MpegEncContext *s, int fcode, int pred)
     if (sign)
         val = -val;
     val += pred;
-    
+
     /* modulo decoding */
     l= INT_BIT - 5 - shift;
     val = (val<<l)>>l;
@@ -1505,8 +1505,8 @@ static inline int decode_dc(GetBitContext *gb, int component)
     return diff;
 }
 
-static inline int mpeg1_decode_block_intra(MpegEncContext *s, 
-                               DCTELEM *block, 
+static inline int mpeg1_decode_block_intra(MpegEncContext *s,
+                               DCTELEM *block,
                                int n)
 {
     int level, dc, diff, i, j, run;
@@ -1528,12 +1528,12 @@ static inline int mpeg1_decode_block_intra(MpegEncContext *s,
     dprintf("dc=%d diff=%d\n", dc, diff);
     i = 0;
     {
-        OPEN_READER(re, &s->gb);    
+        OPEN_READER(re, &s->gb);
         /* now quantify & encode AC coefs */
         for(;;) {
             UPDATE_CACHE(re, &s->gb);
             GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
-            
+
             if(level == 127){
                 break;
             } else if(level != 0) {
@@ -1578,8 +1578,8 @@ static inline int mpeg1_decode_block_intra(MpegEncContext *s,
    return 0;
 }
 
-static inline int mpeg1_decode_block_inter(MpegEncContext *s, 
-                               DCTELEM *block, 
+static inline int mpeg1_decode_block_inter(MpegEncContext *s,
+                               DCTELEM *block,
                                int n)
 {
     int level, i, j, run;
@@ -1608,7 +1608,7 @@ static inline int mpeg1_decode_block_inter(MpegEncContext *s,
         /* now quantify & encode AC coefs */
         for(;;) {
             GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
-            
+
             if(level != 0) {
                 i += run;
                 j = scantable[i];
@@ -1683,7 +1683,7 @@ static inline int mpeg1_fast_decode_block_inter(MpegEncContext *s, DCTELEM *bloc
         /* now quantify & encode AC coefs */
         for(;;) {
             GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
-            
+
             if(level != 0) {
                 i += run;
                 j = scantable[i];
@@ -1728,8 +1728,8 @@ end:
 }
 
 
-static inline int mpeg2_decode_block_non_intra(MpegEncContext *s, 
-                               DCTELEM *block, 
+static inline int mpeg2_decode_block_non_intra(MpegEncContext *s,
+                               DCTELEM *block,
                                int n)
 {
     int level, i, j, run;
@@ -1766,7 +1766,7 @@ static inline int mpeg2_decode_block_non_intra(MpegEncContext *s,
         /* now quantify & encode AC coefs */
         for(;;) {
             GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
-            
+
             if(level != 0) {
                 i += run;
                 j = scantable[i];
@@ -1792,7 +1792,7 @@ static inline int mpeg2_decode_block_non_intra(MpegEncContext *s,
                 av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
                 return -1;
             }
-            
+
             mismatch ^= level;
             block[j] = level;
             if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
@@ -1804,13 +1804,13 @@ end:
         CLOSE_READER(re, &s->gb);
     }
     block[63] ^= (mismatch & 1);
-    
+
     s->block_last_index[n] = i;
     return 0;
 }
 
-static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s, 
-                               DCTELEM *block, 
+static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s,
+                               DCTELEM *block,
                                int n)
 {
     int level, i, j, run;
@@ -1836,7 +1836,7 @@ static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s,
     /* now quantify & encode AC coefs */
     for(;;) {
         GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
-        
+
         if(level != 0) {
             i += run;
             j = scantable[i];
@@ -1858,22 +1858,22 @@ static inline int mpeg2_fast_decode_block_non_intra(MpegEncContext *s,
                 level= ((level*2+1)*qscale)>>1;
             }
         }
-        
+
         block[j] = level;
         if(((int32_t)GET_CACHE(re, &s->gb)) <= (int32_t)0xBFFFFFFF)
             break;
         UPDATE_CACHE(re, &s->gb);
     }
 end:
-    LAST_SKIP_BITS(re, &s->gb, 2);    
+    LAST_SKIP_BITS(re, &s->gb, 2);
     CLOSE_READER(re, &s->gb);
     s->block_last_index[n] = i;
     return 0;
 }
 
 
-static inline int mpeg2_decode_block_intra(MpegEncContext *s, 
-                               DCTELEM *block, 
+static inline int mpeg2_decode_block_intra(MpegEncContext *s,
+                               DCTELEM *block,
                                int n)
 {
     int level, dc, diff, i, j, run;
@@ -1887,7 +1887,7 @@ static inline int mpeg2_decode_block_intra(MpegEncContext *s,
     /* DC coef */
     if (n < 4){
         quant_matrix = s->intra_matrix;
-        component = 0; 
+        component = 0;
     }else{
         quant_matrix = s->chroma_intra_matrix;
         component = (n&1) + 1;
@@ -1908,12 +1908,12 @@ static inline int mpeg2_decode_block_intra(MpegEncContext *s,
         rl = &rl_mpeg1;
 
     {
-        OPEN_READER(re, &s->gb);    
+        OPEN_READER(re, &s->gb);
         /* now quantify & encode AC coefs */
         for(;;) {
             UPDATE_CACHE(re, &s->gb);
             GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
-            
+
             if(level == 127){
                 break;
             } else if(level != 0) {
@@ -1940,20 +1940,20 @@ static inline int mpeg2_decode_block_intra(MpegEncContext *s,
                 av_log(s->avctx, AV_LOG_ERROR, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
                 return -1;
             }
-            
+
             mismatch^= level;
             block[j] = level;
         }
         CLOSE_READER(re, &s->gb);
     }
     block[63]^= mismatch&1;
-    
+
     s->block_last_index[n] = i;
     return 0;
 }
 
-static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s, 
-                               DCTELEM *block, 
+static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s,
+                               DCTELEM *block,
                                int n)
 {
     int level, dc, diff, j, run;
@@ -1966,7 +1966,7 @@ static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s,
     /* DC coef */
     if (n < 4){
         quant_matrix = s->intra_matrix;
-        component = 0; 
+        component = 0;
     }else{
         quant_matrix = s->chroma_intra_matrix;
         component = (n&1) + 1;
@@ -1984,12 +1984,12 @@ static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s,
         rl = &rl_mpeg1;
 
     {
-        OPEN_READER(re, &s->gb);    
+        OPEN_READER(re, &s->gb);
         /* now quantify & encode AC coefs */
         for(;;) {
             UPDATE_CACHE(re, &s->gb);
             GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2, 0);
-            
+
             if(level == 127){
                 break;
             } else if(level != 0) {
@@ -2012,12 +2012,12 @@ static inline int mpeg2_fast_decode_block_intra(MpegEncContext *s,
                     level= (level*qscale*quant_matrix[j])>>4;
                 }
             }
-            
+
             block[j] = level;
         }
         CLOSE_READER(re, &s->gb);
     }
-    
+
     s->block_last_index[n] = scantable - s->intra_scantable.permutated;
     return 0;
 }
@@ -2039,15 +2039,15 @@ static int mpeg_decode_init(AVCodecContext *avctx)
     Mpeg1Context *s = avctx->priv_data;
     MpegEncContext *s2 = &s->mpeg_enc_ctx;
     int i;
-    
+
     //we need some parmutation to store
     //matrixes, until MPV_common_init()
-    //set the real permutatuon 
+    //set the real permutatuon
     for(i=0;i<64;i++)
        s2->dsp.idct_permutation[i]=i;
 
     MPV_decode_defaults(s2);
-    
+
     s->mpeg_enc_ctx.avctx= avctx;
     s->mpeg_enc_ctx.flags= avctx->flags;
     s->mpeg_enc_ctx.flags2= avctx->flags2;
@@ -2061,16 +2061,16 @@ static int mpeg_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
-static void quant_matrix_rebuild(uint16_t *matrix, const uint8_t *old_perm, 
+static void quant_matrix_rebuild(uint16_t *matrix, const uint8_t *old_perm,
                                      const uint8_t *new_perm){
     uint16_t temp_matrix[64];
     int i;
 
     memcpy(temp_matrix,matrix,64*sizeof(uint16_t));
-    
+
     for(i=0;i<64;i++){
         matrix[new_perm[i]] = temp_matrix[old_perm[i]];
-    }      
+    }
 }
 
 //Call this function when we know all parameters
@@ -2081,13 +2081,13 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){
     uint8_t old_permutation[64];
 
     if (
-    	(s1->mpeg_enc_ctx_allocated == 0)|| 
+        (s1->mpeg_enc_ctx_allocated == 0)||
         avctx->coded_width  != s->width ||
         avctx->coded_height != s->height||
         s1->save_aspect_info != s->aspect_ratio_info||
         0)
     {
-    
+
         if (s1->mpeg_enc_ctx_allocated) {
             ParseContext pc= s->parse_context;
             s->parse_context.buffer=0;
@@ -2095,8 +2095,8 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){
             s->parse_context= pc;
         }
 
-	if( (s->width == 0 )||(s->height == 0))
-	    return -2;
+        if( (s->width == 0 )||(s->height == 0))
+            return -2;
 
         avcodec_set_dimensions(avctx, s->width, s->height);
         avctx->bit_rate = s->bit_rate;
@@ -2117,28 +2117,28 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){
         }else{//mpeg2
         //mpeg2 fps
             av_reduce(
-                &s->avctx->time_base.den, 
-                &s->avctx->time_base.num, 
+                &s->avctx->time_base.den,
+                &s->avctx->time_base.num,
                 frame_rate_tab[s->frame_rate_index].num * s1->frame_rate_ext.num,
                 frame_rate_tab[s->frame_rate_index].den * s1->frame_rate_ext.den,
                 1<<30);
         //mpeg2 aspect
             if(s->aspect_ratio_info > 1){
                 if( (s1->pan_scan.width == 0 )||(s1->pan_scan.height == 0) ){
-                    s->avctx->sample_aspect_ratio= 
+                    s->avctx->sample_aspect_ratio=
                         av_div_q(
-                         mpeg2_aspect[s->aspect_ratio_info], 
+                         mpeg2_aspect[s->aspect_ratio_info],
                          (AVRational){s->width, s->height}
                          );
                 }else{
-                    s->avctx->sample_aspect_ratio= 
+                    s->avctx->sample_aspect_ratio=
                         av_div_q(
-                         mpeg2_aspect[s->aspect_ratio_info], 
+                         mpeg2_aspect[s->aspect_ratio_info],
                          (AVRational){s1->pan_scan.width, s1->pan_scan.height}
                         );
-        	}
+                }
             }else{
-                s->avctx->sample_aspect_ratio= 
+                s->avctx->sample_aspect_ratio=
                     mpeg2_aspect[s->aspect_ratio_info];
             }
         }//mpeg2
@@ -2161,7 +2161,7 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){
             if( avctx->idct_algo == FF_IDCT_AUTO )
                 avctx->idct_algo = FF_IDCT_SIMPLE;
 
-        //quantization matrixes may need reordering 
+        //quantization matrixes may need reordering
         //if dct permutation is changed
         memcpy(old_permutation,s->dsp.idct_permutation,64*sizeof(uint8_t));
 
@@ -2178,41 +2178,14 @@ static int mpeg_decode_postinit(AVCodecContext *avctx){
     return 0;
 }
 
-/* return the 8 bit start code value and update the search
-   state. Return -1 if no start code found */
-static int find_start_code(const uint8_t **pbuf_ptr, const uint8_t *buf_end)
-{
-    const uint8_t *buf_ptr= *pbuf_ptr;
-
-    buf_ptr++; //gurantees that -1 is within the array
-    buf_end -= 2; // gurantees that +2 is within the array
-
-    while (buf_ptr < buf_end) {
-        if(*buf_ptr==0){
-            while(buf_ptr < buf_end && buf_ptr[1]==0)
-                buf_ptr++;
-
-            if(buf_ptr[-1] == 0 && buf_ptr[1] == 1){
-                *pbuf_ptr = buf_ptr+3;
-                return buf_ptr[2] + 0x100;
-            }
-        }
-        buf_ptr += 2;
-    }
-    buf_end += 2; //undo the hack above
-    
-    *pbuf_ptr = buf_end;
-    return -1;
-}
-
-static int mpeg1_decode_picture(AVCodecContext *avctx, 
+static int mpeg1_decode_picture(AVCodecContext *avctx,
                                 const uint8_t *buf, int buf_size)
 {
     Mpeg1Context *s1 = avctx->priv_data;
     MpegEncContext *s = &s1->mpeg_enc_ctx;
     int ref, f_code, vbv_delay;
 
-    if(mpeg_decode_postinit(s->avctx) < 0) 
+    if(mpeg_decode_postinit(s->avctx) < 0)
        return -2;
 
     init_get_bits(&s->gb, buf, buf_size*8);
@@ -2241,10 +2214,10 @@ static int mpeg1_decode_picture(AVCodecContext *avctx,
     }
     s->current_picture.pict_type= s->pict_type;
     s->current_picture.key_frame= s->pict_type == I_TYPE;
-    
+
     if(avctx->debug & FF_DEBUG_PICT_INFO)
         av_log(avctx, AV_LOG_DEBUG, "vbv_delay %d, ref %d type:%d\n", vbv_delay, ref, s->pict_type);
-    
+
     s->y_dc_scale = 8;
     s->c_dc_scale = 8;
     s->first_slice = 1;
@@ -2282,7 +2255,7 @@ static void mpeg_decode_sequence_extension(Mpeg1Context *s1)
     s->avctx->sub_id = 2; /* indicates mpeg2 found */
 
     if(s->avctx->debug & FF_DEBUG_PICT_INFO)
-        av_log(s->avctx, AV_LOG_DEBUG, "profile: %d, level: %d vbv buffer: %d, bitrate:%d\n", 
+        av_log(s->avctx, AV_LOG_DEBUG, "profile: %d, level: %d vbv buffer: %d, bitrate:%d\n",
                s->avctx->profile, s->avctx->level, s->avctx->rc_buffer_size, s->bit_rate);
 
 }
@@ -2303,10 +2276,10 @@ static void mpeg_decode_sequence_display_extension(Mpeg1Context *s1)
     skip_bits(&s->gb, 1); //marker
     h= get_bits(&s->gb, 14);
     skip_bits(&s->gb, 1); //marker
-    
+
     s1->pan_scan.width= 16*w;
     s1->pan_scan.height=16*h;
-        
+
     if(s->avctx->debug & FF_DEBUG_PICT_INFO)
         av_log(s->avctx, AV_LOG_DEBUG, "sde w:%d, h:%d\n", w, h);
 }
@@ -2319,16 +2292,16 @@ static void mpeg_decode_picture_display_extension(Mpeg1Context *s1)
     nofco = 1;
     if(s->progressive_sequence){
         if(s->repeat_first_field){
-	    nofco++;
-	    if(s->top_field_first)
-	        nofco++;	
-	}
+            nofco++;
+            if(s->top_field_first)
+                nofco++;
+        }
     }else{
         if(s->picture_structure == PICT_FRAME){
             nofco++;
-	    if(s->repeat_first_field)
-	        nofco++;
-	}
+            if(s->repeat_first_field)
+                nofco++;
+        }
     }
     for(i=0; i<nofco; i++){
         s1->pan_scan.position[i][0]= get_sbits(&s->gb, 16);
@@ -2336,11 +2309,11 @@ static void mpeg_decode_picture_display_extension(Mpeg1Context *s1)
         s1->pan_scan.position[i][1]= get_sbits(&s->gb, 16);
         skip_bits(&s->gb, 1); //marker
     }
-   
+
     if(s->avctx->debug & FF_DEBUG_PICT_INFO)
-        av_log(s->avctx, AV_LOG_DEBUG, "pde (%d,%d) (%d,%d) (%d,%d)\n", 
-            s1->pan_scan.position[0][0], s1->pan_scan.position[0][1], 
-            s1->pan_scan.position[1][0], s1->pan_scan.position[1][1], 
+        av_log(s->avctx, AV_LOG_DEBUG, "pde (%d,%d) (%d,%d) (%d,%d)\n",
+            s1->pan_scan.position[0][0], s1->pan_scan.position[0][1],
+            s1->pan_scan.position[1][0], s1->pan_scan.position[1][1],
             s1->pan_scan.position[2][0], s1->pan_scan.position[2][1]
         );
 }
@@ -2408,7 +2381,7 @@ static void mpeg_decode_picture_coding_extension(MpegEncContext *s)
         s->first_field ^= 1;
         memset(s->mbskip_table, 0, s->mb_stride*s->mb_height);
     }
-    
+
     if(s->alternate_scan){
         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_alternate_vertical_scan);
@@ -2416,7 +2389,7 @@ static void mpeg_decode_picture_coding_extension(MpegEncContext *s)
         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_zigzag_direct);
         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , ff_zigzag_direct);
     }
-    
+
     /* composite display not parsed */
     dprintf("intra_dc_precision=%d\n", s->intra_dc_precision);
     dprintf("picture_structure=%d\n", s->picture_structure);
@@ -2429,7 +2402,7 @@ static void mpeg_decode_picture_coding_extension(MpegEncContext *s)
     dprintf("progressive_frame=%d\n", s->progressive_frame);
 }
 
-static void mpeg_decode_extension(AVCodecContext *avctx, 
+static void mpeg_decode_extension(AVCodecContext *avctx,
                                   const uint8_t *buf, int buf_size)
 {
     Mpeg1Context *s1 = avctx->priv_data;
@@ -2437,7 +2410,7 @@ static void mpeg_decode_extension(AVCodecContext *avctx,
     int ext_type;
 
     init_get_bits(&s->gb, buf, buf_size*8);
-    
+
     ext_type = get_bits(&s->gb, 4);
     switch(ext_type) {
     case 0x1:
@@ -2486,22 +2459,22 @@ static int mpeg_field_start(MpegEncContext *s){
             } else if (s->progressive_frame) {
                 s->current_picture_ptr->repeat_pict = 1;
             }
-        }         
+        }
 
         *s->current_picture_ptr->pan_scan= s1->pan_scan;
     }else{ //second field
             int i;
-            
+
             if(!s->current_picture_ptr){
                 av_log(s->avctx, AV_LOG_ERROR, "first field missing\n");
                 return -1;
             }
-            
+
             for(i=0; i<4; i++){
                 s->current_picture.data[i] = s->current_picture_ptr->data[i];
                 if(s->picture_structure == PICT_BOTTOM_FIELD){
                     s->current_picture.data[i] += s->current_picture_ptr->linesize[i];
-                } 
+                }
             }
     }
 #ifdef HAVE_XVMC
@@ -2538,7 +2511,7 @@ static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y,
         av_log(s->avctx, AV_LOG_ERROR, "slice below image (%d >= %d)\n", mb_y, s->mb_height);
         return -1;
     }
-    
+
     init_get_bits(&s->gb, *buf, buf_size*8);
 
     ff_mpeg1_clean_buffers(s);
@@ -2550,12 +2523,12 @@ static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y,
         av_log(s->avctx, AV_LOG_ERROR, "qscale == 0\n");
         return -1;
     }
-    
+
     /* extra slice info */
     while (get_bits1(&s->gb) != 0) {
         skip_bits(&s->gb, 8);
     }
-    
+
     s->mb_x=0;
 
     for(;;) {
@@ -2582,15 +2555,15 @@ static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y,
 
     if (s->mb_y==0 && s->mb_x==0 && (s->first_field || s->picture_structure==PICT_FRAME)) {
         if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-             av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%2d%2d%2d%2d %s %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n", 
+             av_log(s->avctx, AV_LOG_DEBUG, "qp:%d fc:%2d%2d%2d%2d %s %s %s %s %s dc:%d pstruct:%d fdct:%d cmv:%d qtype:%d ivlc:%d rff:%d %s\n",
                  s->qscale, s->mpeg_f_code[0][0],s->mpeg_f_code[0][1],s->mpeg_f_code[1][0],s->mpeg_f_code[1][1],
-                 s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")), 
-                 s->progressive_sequence ? "ps" :"", s->progressive_frame ? "pf" : "", s->alternate_scan ? "alt" :"", s->top_field_first ? "top" :"", 
+                 s->pict_type == I_TYPE ? "I" : (s->pict_type == P_TYPE ? "P" : (s->pict_type == B_TYPE ? "B" : "S")),
+                 s->progressive_sequence ? "ps" :"", s->progressive_frame ? "pf" : "", s->alternate_scan ? "alt" :"", s->top_field_first ? "top" :"",
                  s->intra_dc_precision, s->picture_structure, s->frame_pred_frame_dct, s->concealment_motion_vectors,
                  s->q_scale_type, s->intra_vlc_format, s->repeat_first_field, s->chroma_420_type ? "420" :"");
         }
-    }    
-    
+    }
+
     for(;;) {
 #ifdef HAVE_XVMC
         //one 1 we memcpy blocks in xvmcvideo
@@ -2641,7 +2614,7 @@ static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y,
         s->dest[2] += 16 >> (s->chroma_x_shift + lowres);
 
         MPV_decode_mb(s, s->block);
-        
+
         if (++s->mb_x >= s->mb_width) {
             const int mb_size= 16>>s->avctx->lowres;
 
@@ -2660,7 +2633,7 @@ static int mpeg_decode_slice(Mpeg1Context *s1, int mb_y,
                 }else
                     goto eos;
             }
-            
+
             ff_init_block_index(s);
         }
 
@@ -2710,7 +2683,7 @@ static int slice_decode_thread(AVCodecContext *c, void *arg){
 
         ret= mpeg_decode_slice((Mpeg1Context*)s, mb_y, &buf, s->gb.buffer_end - buf);
         emms_c();
-//av_log(c, AV_LOG_DEBUG, "ret:%d resync:%d/%d mb:%d/%d ts:%d/%d ec:%d\n", 
+//av_log(c, AV_LOG_DEBUG, "ret:%d resync:%d/%d mb:%d/%d ts:%d/%d ec:%d\n",
 //ret, s->resync_mb_x, s->resync_mb_y, s->mb_x, s->mb_y, s->start_mb_y, s->end_mb_y, s->error_count);
         if(ret < 0){
             if(s->resync_mb_x>=0 && s->resync_mb_y>=0)
@@ -2718,28 +2691,29 @@ static int slice_decode_thread(AVCodecContext *c, void *arg){
         }else{
             ff_er_add_slice(s, s->resync_mb_x, s->resync_mb_y, s->mb_x-1, s->mb_y, AC_END|DC_END|MV_END);
         }
-        
+
         if(s->mb_y == s->end_mb_y)
             return 0;
-        
-        start_code = find_start_code(&buf, s->gb.buffer_end);
+
+        start_code= -1;
+        buf = ff_find_start_code(buf, s->gb.buffer_end, &start_code);
         mb_y= start_code - SLICE_MIN_START_CODE;
         if(mb_y < 0 || mb_y >= s->end_mb_y)
             return -1;
     }
-    
+
     return 0; //not reached
 }
 
 /**
  * handles slice ends.
- * @return 1 if it seems to be the last slice of 
+ * @return 1 if it seems to be the last slice of
  */
 static int slice_end(AVCodecContext *avctx, AVFrame *pict)
 {
     Mpeg1Context *s1 = avctx->priv_data;
     MpegEncContext *s = &s1->mpeg_enc_ctx;
-       
+
     if (!s1->mpeg_enc_ctx_allocated || !s->current_picture_ptr)
         return 0;
 
@@ -2776,7 +2750,7 @@ static int slice_end(AVCodecContext *avctx, AVFrame *pict)
     }
 }
 
-static int mpeg1_decode_sequence(AVCodecContext *avctx, 
+static int mpeg1_decode_sequence(AVCodecContext *avctx,
                                  const uint8_t *buf, int buf_size)
 {
     Mpeg1Context *s1 = avctx->priv_data;
@@ -2861,7 +2835,7 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
             s->chroma_inter_matrix[j] = v;
         }
     }
-    
+
     if(show_bits(&s->gb, 23) != 0){
         av_log(s->avctx, AV_LOG_ERROR, "sequence header damaged\n");
         return -1;
@@ -2878,11 +2852,11 @@ static int mpeg1_decode_sequence(AVCodecContext *avctx,
     s->out_format = FMT_MPEG1;
     s->swap_uv = 0;//AFAIK VCR2 don't have SEQ_HEADER
     if(s->flags & CODEC_FLAG_LOW_DELAY) s->low_delay=1;
-    
+
     if(s->avctx->debug & FF_DEBUG_PICT_INFO)
-        av_log(s->avctx, AV_LOG_DEBUG, "vbv buffer: %d, bitrate:%d\n", 
+        av_log(s->avctx, AV_LOG_DEBUG, "vbv buffer: %d, bitrate:%d\n",
                s->avctx->rc_buffer_size, s->bit_rate);
-    
+
     return 0;
 }
 
@@ -2911,11 +2885,11 @@ static int vcr2_init_sequence(AVCodecContext *avctx)
     if( avctx->pix_fmt == PIX_FMT_XVMC_MPEG2_IDCT )
         if( avctx->idct_algo == FF_IDCT_AUTO )
             avctx->idct_algo = FF_IDCT_SIMPLE;
-    
+
     if (MPV_common_init(s) < 0)
         return -1;
     exchange_uv(s);//common init reset pblocks, so we swap them here
-    s->swap_uv = 1;// in case of xvmc we need to swap uv for each MB 
+    s->swap_uv = 1;// in case of xvmc we need to swap uv for each MB
     s1->mpeg_enc_ctx_allocated = 1;
 
     for(i=0;i<64;i++) {
@@ -2940,7 +2914,7 @@ static int vcr2_init_sequence(AVCodecContext *avctx)
 }
 
 
-static void mpeg_decode_user_data(AVCodecContext *avctx, 
+static void mpeg_decode_user_data(AVCodecContext *avctx,
                                   const uint8_t *buf, int buf_size)
 {
     const uint8_t *p;
@@ -2969,7 +2943,7 @@ static void mpeg_decode_user_data(AVCodecContext *avctx,
     }
 }
 
-static void mpeg_decode_gop(AVCodecContext *avctx, 
+static void mpeg_decode_gop(AVCodecContext *avctx,
                             const uint8_t *buf, int buf_size){
     Mpeg1Context *s1 = avctx->priv_data;
     MpegEncContext *s = &s1->mpeg_enc_ctx;
@@ -2982,7 +2956,7 @@ static void mpeg_decode_gop(AVCodecContext *avctx,
     init_get_bits(&s->gb, buf, buf_size*8);
 
     drop_frame_flag = get_bits1(&s->gb);
-    
+
     time_code_hours=get_bits(&s->gb,5);
     time_code_minutes = get_bits(&s->gb,6);
     skip_bits1(&s->gb);//marker bit
@@ -2996,8 +2970,8 @@ static void mpeg_decode_gop(AVCodecContext *avctx,
 
     if(s->avctx->debug & FF_DEBUG_PICT_INFO)
         av_log(s->avctx, AV_LOG_DEBUG, "GOP (%2d:%02d:%02d.[%02d]) broken_link=%d\n",
-	    time_code_hours, time_code_minutes, time_code_seconds,
-	    time_code_pictures, broken_link);
+            time_code_hours, time_code_minutes, time_code_seconds,
+            time_code_pictures, broken_link);
 }
 /**
  * finds the end of the current frame in the bitstream.
@@ -3006,14 +2980,12 @@ static void mpeg_decode_gop(AVCodecContext *avctx,
 int ff_mpeg1_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size)
 {
     int i;
-    uint32_t state;
-    
-    state= pc->state;
-    
+    uint32_t state= pc->state;
+
     i=0;
     if(!pc->frame_start_found){
         for(i=0; i<buf_size; i++){
-            state= (state<<8) | buf[i];
+            i= ff_find_start_code(buf+i, buf+buf_size, &state) - buf - 1;
             if(state >= SLICE_MIN_START_CODE && state <= SLICE_MAX_START_CODE){
                 i++;
                 pc->frame_start_found=1;
@@ -3021,28 +2993,28 @@ int ff_mpeg1_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size)
             }
         }
     }
-    
+
     if(pc->frame_start_found){
         /* EOF considered as end of frame */
         if (buf_size == 0)
             return 0;
         for(; i<buf_size; i++){
-            state= (state<<8) | buf[i];
+            i= ff_find_start_code(buf+i, buf+buf_size, &state) - buf - 1;
             if((state&0xFFFFFF00) == 0x100){
                 if(state < SLICE_MIN_START_CODE || state > SLICE_MAX_START_CODE){
                     pc->frame_start_found=0;
-                    pc->state=-1; 
+                    pc->state=-1;
                     return i-3;
                 }
             }
         }
-    }        
+    }
     pc->state= state;
     return END_NOT_FOUND;
 }
 
 /* handle buffering and image synchronisation */
-static int mpeg_decode_frame(AVCodecContext *avctx, 
+static int mpeg_decode_frame(AVCodecContext *avctx,
                              void *data, int *data_size,
                              uint8_t *buf, int buf_size)
 {
@@ -3055,28 +3027,28 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
     dprintf("fill_buffer\n");
 
     if (buf_size == 0) {
-	/* special case for last picture */
-	if (s2->low_delay==0 && s2->next_picture_ptr) {
-	    *picture= *(AVFrame*)s2->next_picture_ptr;
-	    s2->next_picture_ptr= NULL;
+        /* special case for last picture */
+        if (s2->low_delay==0 && s2->next_picture_ptr) {
+            *picture= *(AVFrame*)s2->next_picture_ptr;
+            s2->next_picture_ptr= NULL;
 
-	    *data_size = sizeof(AVFrame);
-	}
+            *data_size = sizeof(AVFrame);
+        }
         return 0;
     }
 
     if(s2->flags&CODEC_FLAG_TRUNCATED){
         int next= ff_mpeg1_find_frame_end(&s2->parse_context, buf, buf_size);
-        
+
         if( ff_combine_frame(&s2->parse_context, next, &buf, &buf_size) < 0 )
             return buf_size;
-    }    
-    
+    }
+
     buf_ptr = buf;
     buf_end = buf + buf_size;
 
-#if 0    
-    if (s->repeat_field % 2 == 1) { 
+#if 0
+    if (s->repeat_field % 2 == 1) {
         s->repeat_field++;
         //fprintf(stderr,"\nRepeating last frame: %d -> %d! pict: %d %d", avctx->frame_number-1, avctx->frame_number,
         //        s2->picture_number, s->repeat_field);
@@ -3089,12 +3061,13 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
 
     if(s->mpeg_enc_ctx_allocated==0 && avctx->codec_tag == ff_get_fourcc("VCR2"))
         vcr2_init_sequence(avctx);
-    
+
     s->slice_count= 0;
-        
+
     for(;;) {
         /* find start next code */
-        start_code = find_start_code(&buf_ptr, buf_end);
+        start_code = -1;
+        buf_ptr = ff_find_start_code(buf_ptr,buf_end, &start_code);
         if (start_code < 0){
             if(s2->pict_type != B_TYPE || avctx->skip_frame <= AVDISCARD_DEFAULT){
                 if(avctx->thread_count > 1){
@@ -3111,7 +3084,7 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
             }
             return FFMAX(0, buf_ptr - buf - s2->parse_context.last_index);
         }
-        
+
         input_size = buf_end - buf_ptr;
 
         if(avctx->debug & FF_DEBUG_STARTCODE){
@@ -3121,33 +3094,33 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
                 /* prepare data for next start code */
                 switch(start_code) {
                 case SEQ_START_CODE:
-                    mpeg1_decode_sequence(avctx, buf_ptr, 
-					  input_size);
+                    mpeg1_decode_sequence(avctx, buf_ptr,
+                                          input_size);
                     break;
-                            
+
                 case PICTURE_START_CODE:
                     /* we have a complete image : we try to decompress it */
-                    mpeg1_decode_picture(avctx, 
-					 buf_ptr, input_size);
+                    mpeg1_decode_picture(avctx,
+                                         buf_ptr, input_size);
                     break;
                 case EXT_START_CODE:
                     mpeg_decode_extension(avctx,
                                           buf_ptr, input_size);
                     break;
                 case USER_START_CODE:
-                    mpeg_decode_user_data(avctx, 
+                    mpeg_decode_user_data(avctx,
                                           buf_ptr, input_size);
                     break;
                 case GOP_START_CODE:
                     s2->first_field=0;
-                    mpeg_decode_gop(avctx, 
+                    mpeg_decode_gop(avctx,
                                           buf_ptr, input_size);
                     break;
                 default:
                     if (start_code >= SLICE_MIN_START_CODE &&
                         start_code <= SLICE_MAX_START_CODE) {
                         int mb_y= start_code - SLICE_MIN_START_CODE;
-                        
+
                         if(s2->last_picture_ptr==NULL){
                         /* skip b frames if we dont have reference frames */
                             if(s2->pict_type==B_TYPE) break;
@@ -3162,25 +3135,25 @@ static int mpeg_decode_frame(AVCodecContext *avctx,
                             break;
                         /* skip everything if we are in a hurry>=5 */
                         if(avctx->hurry_up>=5) break;
-                        
+
                         if (!s->mpeg_enc_ctx_allocated) break;
 
                         if(s2->codec_id == CODEC_ID_MPEG2VIDEO){
                             if(mb_y < avctx->skip_top || mb_y >= s2->mb_height - avctx->skip_bottom)
                                 break;
                         }
-                        
+
                         if(s2->first_slice){
                             s2->first_slice=0;
                             if(mpeg_field_start(s2) < 0)
                                 return -1;
                         }
-                        
+
                         if(avctx->thread_count > 1){
                             int threshold= (s2->mb_height*s->slice_count + avctx->thread_count/2) / avctx->thread_count;
                             if(threshold <= mb_y){
                                 MpegEncContext *thread_context= s2->thread_context[s->slice_count];
-                                
+
                                 thread_context->start_mb_y= mb_y;
                                 thread_context->end_mb_y  = s2->mb_height;
                                 if(s->slice_count){
@@ -3290,7 +3263,7 @@ AVCodec mpeg2video_encoder = {
 static int mpeg_mc_decode_init(AVCodecContext *avctx){
     Mpeg1Context *s;
 
-    if( avctx->thread_count > 1) 
+    if( avctx->thread_count > 1)
         return -1;
     if( !(avctx->slice_flags & SLICE_FLAG_CODED_ORDER) )
         return -1;
@@ -3321,7 +3294,7 @@ AVCodec mpeg_xvmc_decoder = {
 
 #endif
 
-/* this is ugly i know, but the alternative is too make 
+/* this is ugly i know, but the alternative is too make
    hundreds of vars global and prefix them with ff_mpeg1_
    which is far uglier. */
-#include "mdec.c"  
+#include "mdec.c"
diff --git a/src/libffmpeg/libavcodec/mpeg12data.h b/src/libffmpeg/libavcodec/mpeg12data.h
index d9c09001e..a6b49aa78 100644
--- a/src/libffmpeg/libavcodec/mpeg12data.h
+++ b/src/libffmpeg/libavcodec/mpeg12data.h
@@ -4,14 +4,14 @@
  */
 
 const int16_t ff_mpeg1_default_intra_matrix[64] = {
-	8, 16, 19, 22, 26, 27, 29, 34,
-	16, 16, 22, 24, 27, 29, 34, 37,
-	19, 22, 26, 27, 29, 34, 34, 38,
-	22, 22, 26, 27, 29, 34, 37, 40,
-	22, 26, 27, 29, 32, 35, 40, 48,
-	26, 27, 29, 32, 35, 40, 48, 58,
-	26, 27, 29, 34, 38, 46, 56, 69,
-	27, 29, 35, 38, 46, 56, 69, 83
+        8, 16, 19, 22, 26, 27, 29, 34,
+        16, 16, 22, 24, 27, 29, 34, 37,
+        19, 22, 26, 27, 29, 34, 34, 38,
+        22, 22, 26, 27, 29, 34, 37, 40,
+        22, 26, 27, 29, 32, 35, 40, 48,
+        26, 27, 29, 32, 35, 40, 48, 58,
+        26, 27, 29, 34, 38, 46, 56, 69,
+        27, 29, 35, 38, 46, 56, 69, 83
 };
 
 const int16_t ff_mpeg1_default_non_intra_matrix[64] = {
@@ -87,20 +87,20 @@ static const uint16_t mpeg2_vlc[113][2] = {
   {0x20, 8}, {0x16,13}, {0x15,13}, {0x1f,15},
   {0x1e,15}, {0x1d,15}, {0x1c,15}, {0x1b,15},
   {0x1a,15}, {0x19,15}, {0x13,16}, {0x12,16},
-  {0x11,16}, {0x10,16}, {0x05, 5}, {0x07, 7}, 
-  {0xfc, 8}, {0x0c,10}, {0x14,13}, {0x07, 5}, 
-  {0x26, 8}, {0x1c,12}, {0x13,13}, {0x06, 6}, 
-  {0xfd, 8}, {0x12,12}, {0x07, 6}, {0x04, 9}, 
-  {0x12,13}, {0x06, 7}, {0x1e,12}, {0x14,16}, 
-  {0x04, 7}, {0x15,12}, {0x05, 7}, {0x11,12}, 
-  {0x78, 7}, {0x11,13}, {0x7a, 7}, {0x10,13}, 
-  {0x21, 8}, {0x1a,16}, {0x25, 8}, {0x19,16}, 
-  {0x24, 8}, {0x18,16}, {0x05, 9}, {0x17,16}, 
-  {0x07, 9}, {0x16,16}, {0x0d,10}, {0x15,16}, 
-  {0x1f,12}, {0x1a,12}, {0x19,12}, {0x17,12}, 
-  {0x16,12}, {0x1f,13}, {0x1e,13}, {0x1d,13}, 
-  {0x1c,13}, {0x1b,13}, {0x1f,16}, {0x1e,16}, 
-  {0x1d,16}, {0x1c,16}, {0x1b,16}, 
+  {0x11,16}, {0x10,16}, {0x05, 5}, {0x07, 7},
+  {0xfc, 8}, {0x0c,10}, {0x14,13}, {0x07, 5},
+  {0x26, 8}, {0x1c,12}, {0x13,13}, {0x06, 6},
+  {0xfd, 8}, {0x12,12}, {0x07, 6}, {0x04, 9},
+  {0x12,13}, {0x06, 7}, {0x1e,12}, {0x14,16},
+  {0x04, 7}, {0x15,12}, {0x05, 7}, {0x11,12},
+  {0x78, 7}, {0x11,13}, {0x7a, 7}, {0x10,13},
+  {0x21, 8}, {0x1a,16}, {0x25, 8}, {0x19,16},
+  {0x24, 8}, {0x18,16}, {0x05, 9}, {0x17,16},
+  {0x07, 9}, {0x16,16}, {0x0d,10}, {0x15,16},
+  {0x1f,12}, {0x1a,12}, {0x19,12}, {0x17,12},
+  {0x16,12}, {0x1f,13}, {0x1e,13}, {0x1d,13},
+  {0x1c,13}, {0x1b,13}, {0x1f,16}, {0x1e,16},
+  {0x1d,16}, {0x1c,16}, {0x1b,16},
   {0x01,6}, /* escape */
   {0x06,4}, /* EOB */
 };
@@ -375,7 +375,7 @@ const static uint8_t mpeg2_dc_scale_table1[128]={
     4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
 };
 
-static const uint8_t mpeg2_dc_scale_table2[128]={ 
+static const uint8_t mpeg2_dc_scale_table2[128]={
 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
@@ -383,7 +383,7 @@ static const uint8_t mpeg2_dc_scale_table2[128]={
     2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
 };
 
-static const uint8_t mpeg2_dc_scale_table3[128]={ 
+static const uint8_t mpeg2_dc_scale_table3[128]={
 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
     1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
@@ -395,7 +395,7 @@ static const uint8_t *mpeg2_dc_scale_table[4]={
     ff_mpeg1_dc_scale_table,
     mpeg2_dc_scale_table1,
     mpeg2_dc_scale_table2,
-    mpeg2_dc_scale_table3,    
+    mpeg2_dc_scale_table3,
 };
 
 static const float mpeg1_aspect[16]={
@@ -403,7 +403,7 @@ static const float mpeg1_aspect[16]={
     1.0000,
     0.6735,
     0.7031,
-    
+
     0.7615,
     0.8055,
     0.8437,
diff --git a/src/libffmpeg/libavcodec/mpeg4data.h b/src/libffmpeg/libavcodec/mpeg4data.h
index 0ceef90b7..804d2ded8 100644
--- a/src/libffmpeg/libavcodec/mpeg4data.h
+++ b/src/libffmpeg/libavcodec/mpeg4data.h
@@ -45,13 +45,13 @@ const uint8_t DCtab_lum[13][2] =
 {
     {3,3}, {3,2}, {2,2}, {2,3}, {1,3}, {1,4}, {1,5}, {1,6}, {1,7},
     {1,8}, {1,9}, {1,10}, {1,11},
-}; 
+};
 
 const uint8_t DCtab_chrom[13][2] =
 {
     {3,2}, {2,2}, {1,2}, {1,3}, {1,4}, {1,5}, {1,6}, {1,7}, {1,8},
     {1,9}, {1,10}, {1,11}, {1,12},
-}; 
+};
 
 const uint16_t intra_vlc[103][2] = {
 { 0x2, 2 },
@@ -169,54 +169,54 @@ static const uint16_t inter_rvlc[170][2]={ //note this is identical to the intra
 {0x3F7C, 15},{0x3F7D, 15},{0x0000, 4}
 };
 
-static const uint8_t inter_rvlc_run[169]={
- 0,  0,  0,  0,  0,  0,  0,  0, 
- 0,  0,  0,  0,  0,  0,  0,  0, 
- 0,  0,  0,  1,  1,  1,  1,  1, 
- 1,  1,  1,  1,  1,  2,  2,  2, 
- 2,  2,  2,  2,  3,  3,  3,  3, 
- 3,  3,  3,  4,  4,  4,  4,  4, 
- 5,  5,  5,  5,  6,  6,  6,  6, 
- 7,  7,  7,  7,  8,  8,  8,  9, 
- 9,  9, 10, 10, 11, 11, 12, 12, 
-13, 13, 14, 14, 15, 15, 16, 16, 
-17, 17, 18, 19, 20, 21, 22, 23, 
-24, 25, 26, 27, 28, 29, 30, 31, 
-32, 33, 34, 35, 36, 37, 38, 
- 0,  0,  0,  0,  0,  1,  1,  1, 
- 1,  1,  2,  2,  2,  3,  3,  4, 
- 4,  5,  5,  6,  6,  7,  7,  8, 
- 8,  9,  9, 10, 10, 11, 11, 12, 
-12, 13, 13, 14, 15, 16, 17, 18, 
-19, 20, 21, 22, 23, 24, 25, 26, 
-27, 28, 29, 30, 31, 32, 33, 34, 
-35, 36, 37, 38, 39, 40, 41, 42, 
-43, 44,  
+static const int8_t inter_rvlc_run[169]={
+ 0,  0,  0,  0,  0,  0,  0,  0,
+ 0,  0,  0,  0,  0,  0,  0,  0,
+ 0,  0,  0,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  2,  2,  2,
+ 2,  2,  2,  2,  3,  3,  3,  3,
+ 3,  3,  3,  4,  4,  4,  4,  4,
+ 5,  5,  5,  5,  6,  6,  6,  6,
+ 7,  7,  7,  7,  8,  8,  8,  9,
+ 9,  9, 10, 10, 11, 11, 12, 12,
+13, 13, 14, 14, 15, 15, 16, 16,
+17, 17, 18, 19, 20, 21, 22, 23,
+24, 25, 26, 27, 28, 29, 30, 31,
+32, 33, 34, 35, 36, 37, 38,
+ 0,  0,  0,  0,  0,  1,  1,  1,
+ 1,  1,  2,  2,  2,  3,  3,  4,
+ 4,  5,  5,  6,  6,  7,  7,  8,
+ 8,  9,  9, 10, 10, 11, 11, 12,
+12, 13, 13, 14, 15, 16, 17, 18,
+19, 20, 21, 22, 23, 24, 25, 26,
+27, 28, 29, 30, 31, 32, 33, 34,
+35, 36, 37, 38, 39, 40, 41, 42,
+43, 44,
 };
 
-static const uint8_t inter_rvlc_level[169]={
- 1,  2,  3,  4,  5,  6,  7,  8, 
- 9, 10, 11, 12, 13, 14, 15, 16, 
-17, 18, 19,  1,  2,  3,  4,  5, 
- 6,  7,  8,  9, 10,  1,  2,  3, 
- 4,  5,  6,  7,  1,  2,  3,  4, 
- 5,  6,  7,  1,  2,  3,  4,  5, 
- 1,  2,  3,  4,  1,  2,  3,  4, 
- 1,  2,  3,  4,  1,  2,  3,  1, 
- 2,  3,  1,  2,  1,  2,  1,  2, 
- 1,  2,  1,  2,  1,  2,  1,  2, 
- 1,  2,  1,  1,  1,  1,  1,  1, 
- 1,  1,  1,  1,  1,  1,  1,  1, 
- 1,  1,  1,  1,  1,  1,  1, 
- 1,  2,  3,  4,  5,  1,  2,  3, 
- 4,  5,  1,  2,  3,  1,  2,  1, 
- 2,  1,  2,  1,  2,  1,  2,  1, 
- 2,  1,  2,  1,  2,  1,  2,  1, 
- 2,  1,  2,  1,  1,  1,  1,  1, 
- 1,  1,  1,  1,  1,  1,  1,  1, 
- 1,  1,  1,  1,  1,  1,  1,  1, 
- 1,  1,  1,  1,  1,  1,  1,  1, 
- 1,  1, 
+static const int8_t inter_rvlc_level[169]={
+ 1,  2,  3,  4,  5,  6,  7,  8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+17, 18, 19,  1,  2,  3,  4,  5,
+ 6,  7,  8,  9, 10,  1,  2,  3,
+ 4,  5,  6,  7,  1,  2,  3,  4,
+ 5,  6,  7,  1,  2,  3,  4,  5,
+ 1,  2,  3,  4,  1,  2,  3,  4,
+ 1,  2,  3,  4,  1,  2,  3,  1,
+ 2,  3,  1,  2,  1,  2,  1,  2,
+ 1,  2,  1,  2,  1,  2,  1,  2,
+ 1,  2,  1,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,
+ 1,  2,  3,  4,  5,  1,  2,  3,
+ 4,  5,  1,  2,  3,  1,  2,  1,
+ 2,  1,  2,  1,  2,  1,  2,  1,
+ 2,  1,  2,  1,  2,  1,  2,  1,
+ 2,  1,  2,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,  1,
+ 1,  1,
 };
 
 static RLTable rvlc_rl_inter = {
@@ -273,54 +273,54 @@ static const uint16_t intra_rvlc[170][2]={
 {0x3F7C, 15},{0x3F7D, 15},{0x0000,  4}
 };
 
-static const uint8_t intra_rvlc_run[169]={
- 0,  0,  0,  0,  0,  0,  0,  0, 
- 0,  0,  0,  0,  0,  0,  0,  0, 
- 0,  0,  0,  0,  0,  0,  0,  0, 
- 0,  0,  0,  1,  1,  1,  1,  1, 
- 1,  1,  1,  1,  1,  1,  1,  1, 
- 2,  2,  2,  2,  2,  2,  2,  2, 
- 2,  2,  2,  3,  3,  3,  3,  3, 
- 3,  3,  3,  3,  4,  4,  4,  4, 
- 4,  4,  5,  5,  5,  5,  5,  5, 
- 6,  6,  6,  6,  6,  7,  7,  7, 
- 7,  7,  8,  8,  8,  8,  9,  9, 
- 9,  9, 10, 10, 11, 11, 12, 12, 
-13, 14, 15, 16, 17, 18, 19, 
- 0,  0,  0,  0,  0,  1,  1,  1, 
- 1,  1,  2,  2,  2,  3,  3,  4, 
- 4,  5,  5,  6,  6,  7,  7,  8, 
- 8,  9,  9, 10, 10, 11, 11, 12, 
-12, 13, 13, 14, 15, 16, 17, 18, 
-19, 20, 21, 22, 23, 24, 25, 26, 
-27, 28, 29, 30, 31, 32, 33, 34, 
-35, 36, 37, 38, 39, 40, 41, 42, 
-43, 44, 
+static const int8_t intra_rvlc_run[169]={
+ 0,  0,  0,  0,  0,  0,  0,  0,
+ 0,  0,  0,  0,  0,  0,  0,  0,
+ 0,  0,  0,  0,  0,  0,  0,  0,
+ 0,  0,  0,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,  1,
+ 2,  2,  2,  2,  2,  2,  2,  2,
+ 2,  2,  2,  3,  3,  3,  3,  3,
+ 3,  3,  3,  3,  4,  4,  4,  4,
+ 4,  4,  5,  5,  5,  5,  5,  5,
+ 6,  6,  6,  6,  6,  7,  7,  7,
+ 7,  7,  8,  8,  8,  8,  9,  9,
+ 9,  9, 10, 10, 11, 11, 12, 12,
+13, 14, 15, 16, 17, 18, 19,
+ 0,  0,  0,  0,  0,  1,  1,  1,
+ 1,  1,  2,  2,  2,  3,  3,  4,
+ 4,  5,  5,  6,  6,  7,  7,  8,
+ 8,  9,  9, 10, 10, 11, 11, 12,
+12, 13, 13, 14, 15, 16, 17, 18,
+19, 20, 21, 22, 23, 24, 25, 26,
+27, 28, 29, 30, 31, 32, 33, 34,
+35, 36, 37, 38, 39, 40, 41, 42,
+43, 44,
 };
 
-static const uint8_t intra_rvlc_level[169]={
- 1,  2,  3,  4,  5,  6,  7,  8, 
- 9, 10, 11, 12, 13, 14, 15, 16, 
-17, 18, 19, 20, 21, 22, 23, 24, 
-25, 26, 27,  1,  2,  3,  4,  5, 
- 6,  7,  8,  9, 10, 11, 12, 13, 
- 1,  2,  3,  4,  5,  6,  7,  8, 
- 9, 10, 11,  1,  2,  3,  4,  5, 
- 6,  7,  8,  9,  1,  2,  3,  4, 
- 5,  6,  1,  2,  3,  4,  5,  6, 
- 1,  2,  3,  4,  5,  1,  2,  3, 
- 4,  5,  1,  2,  3,  4,  1,  2, 
- 3,  4,  1,  2,  1,  2,  1,  2, 
- 1,  1,  1,  1,  1,  1,  1,  
- 1,  2,  3,  4,  5,  1,  2,  3, 
- 4,  5,  1,  2,  3,  1,  2,  1, 
- 2,  1,  2,  1,  2,  1,  2,  1, 
- 2,  1,  2,  1,  2,  1,  2,  1, 
- 2,  1,  2,  1,  1,  1,  1,  1, 
- 1,  1,  1,  1,  1,  1,  1,  1, 
- 1,  1,  1,  1,  1,  1,  1,  1, 
- 1,  1,  1,  1,  1,  1,  1,  1, 
- 1,  1, 
+static const int8_t intra_rvlc_level[169]={
+ 1,  2,  3,  4,  5,  6,  7,  8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+17, 18, 19, 20, 21, 22, 23, 24,
+25, 26, 27,  1,  2,  3,  4,  5,
+ 6,  7,  8,  9, 10, 11, 12, 13,
+ 1,  2,  3,  4,  5,  6,  7,  8,
+ 9, 10, 11,  1,  2,  3,  4,  5,
+ 6,  7,  8,  9,  1,  2,  3,  4,
+ 5,  6,  1,  2,  3,  4,  5,  6,
+ 1,  2,  3,  4,  5,  1,  2,  3,
+ 4,  5,  1,  2,  3,  4,  1,  2,
+ 3,  4,  1,  2,  1,  2,  1,  2,
+ 1,  1,  1,  1,  1,  1,  1,
+ 1,  2,  3,  4,  5,  1,  2,  3,
+ 4,  5,  1,  2,  3,  1,  2,  1,
+ 2,  1,  2,  1,  2,  1,  2,  1,
+ 2,  1,  2,  1,  2,  1,  2,  1,
+ 2,  1,  2,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,  1,
+ 1,  1,  1,  1,  1,  1,  1,  1,
+ 1,  1,
 };
 
 static RLTable rvlc_rl_intra = {
@@ -333,7 +333,7 @@ static RLTable rvlc_rl_intra = {
 
 static const uint16_t sprite_trajectory_tab[15][2] = {
  {0x00, 2}, {0x02, 3},  {0x03, 3},  {0x04, 3}, {0x05, 3}, {0x06, 3},
- {0x0E, 4}, {0x1E, 5},  {0x3E, 6},  {0x7E, 7}, {0xFE, 8}, 
+ {0x0E, 4}, {0x1E, 5},  {0x3E, 6},  {0x7E, 7}, {0xFE, 8},
  {0x1FE, 9},{0x3FE, 10},{0x7FE, 11},{0xFFE, 12},
 };
 
@@ -369,7 +369,7 @@ const int16_t ff_mpeg4_default_intra_matrix[64] = {
  22, 23, 24, 26, 28, 30, 32, 35,
  23, 24, 26, 28, 30, 32, 35, 38,
  25, 26, 28, 30, 32, 35, 38, 41,
- 27, 28, 30, 32, 35, 38, 41, 45, 
+ 27, 28, 30, 32, 35, 38, 41, 45,
 };
 
 const int16_t ff_mpeg4_default_non_intra_matrix[64] = {
diff --git a/src/libffmpeg/libavcodec/mpegaudio.h b/src/libffmpeg/libavcodec/mpegaudio.h
index 2bcf38eac..0ee58240c 100644
--- a/src/libffmpeg/libavcodec/mpegaudio.h
+++ b/src/libffmpeg/libavcodec/mpegaudio.h
@@ -4,7 +4,7 @@
  */
 
 /* max frame size, in samples */
-#define MPA_FRAME_SIZE 1152 
+#define MPA_FRAME_SIZE 1152
 
 /* max compressed frame size */
 #define MPA_MAX_CODED_FRAME_SIZE 1792
@@ -55,7 +55,7 @@ int l2_select_table(int bitrate, int nb_channels, int freq, int lsf);
 int mpa_decode_header(AVCodecContext *avctx, uint32_t head);
 void ff_mpa_synth_init(MPA_INT *window);
 void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
-			 MPA_INT *window, int *dither_state,
+                         MPA_INT *window, int *dither_state,
                          OUT_INT *samples, int incr,
                          int32_t sb_samples[SBLIMIT]);
 
diff --git a/src/libffmpeg/libavcodec/mpegaudiodec.c b/src/libffmpeg/libavcodec/mpegaudiodec.c
index 32998a6eb..ff1f1113e 100644
--- a/src/libffmpeg/libavcodec/mpegaudiodec.c
+++ b/src/libffmpeg/libavcodec/mpegaudiodec.c
@@ -14,13 +14,13 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file mpegaudiodec.c
  * MPEG Audio decoder.
- */ 
+ */
 
 //#define DEBUG
 #include "avcodec.h"
@@ -64,14 +64,14 @@ static always_inline int MULH(int a, int b){
 struct GranuleDef;
 
 typedef struct MPADecodeContext {
-    uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE];	/* input buffer */
+    uint8_t inbuf1[2][MPA_MAX_CODED_FRAME_SIZE + BACKSTEP_SIZE];        /* input buffer */
     int inbuf_index;
     uint8_t *inbuf_ptr, *inbuf;
     int frame_size;
     int free_format_frame_size; /* frame size in case of free format
                                    (zero if currently unknown) */
     /* next header (used in free format parsing) */
-    uint32_t free_format_next_header; 
+    uint32_t free_format_next_header;
     int error_protection;
     int layer;
     int sample_rate;
@@ -140,7 +140,7 @@ static void compute_antialias_integer(MPADecodeContext *s, GranuleDef *g);
 static void compute_antialias_float(MPADecodeContext *s, GranuleDef *g);
 
 /* vlc structure for decoding layer 3 huffman tables */
-static VLC huff_vlc[16]; 
+static VLC huff_vlc[16];
 static uint8_t *huff_code_table[16];
 static VLC huff_quad_vlc[2];
 /* computed from band_size_long */
@@ -173,7 +173,7 @@ static const int32_t scale_factor_mult2[3][3] = {
 
 void ff_mpa_synth_init(MPA_INT *window);
 static MPA_INT window[512] __attribute__((aligned(16)));
-    
+
 /* layer 1 unscaling */
 /* n = number of bits of the mantissa minus 1 */
 static inline int l1_unscale(int n, int mant, int scale_factor)
@@ -257,7 +257,7 @@ static int int_pow(int i, int *exp_ptr)
 {
     int e, er, eq, j;
     int a, a1;
-    
+
     /* renormalize */
     a = i;
     e = POW_FRAC_BITS;
@@ -308,8 +308,8 @@ static int decode_init(AVCodecContext * avctx)
     avctx->sample_fmt= SAMPLE_FMT_S32;
 #else
     avctx->sample_fmt= SAMPLE_FMT_S16;
-#endif    
-    
+#endif
+
     if(avctx->antialias_algo != FF_AA_FLOAT)
         s->compute_antialias= compute_antialias_integer;
     else
@@ -334,28 +334,28 @@ static int decode_init(AVCodecContext * avctx)
             scale_factor_mult[i][1] = MULL(FIXR(0.7937005259 * 2.0), norm);
             scale_factor_mult[i][2] = MULL(FIXR(0.6299605249 * 2.0), norm);
             dprintf("%d: norm=%x s=%x %x %x\n",
-                    i, norm, 
+                    i, norm,
                     scale_factor_mult[i][0],
                     scale_factor_mult[i][1],
                     scale_factor_mult[i][2]);
         }
-        
-	ff_mpa_synth_init(window);
-        
+
+        ff_mpa_synth_init(window);
+
         /* huffman decode tables */
         huff_code_table[0] = NULL;
         for(i=1;i<16;i++) {
             const HuffTable *h = &mpa_huff_tables[i];
-	    int xsize, x, y;
+            int xsize, x, y;
             unsigned int n;
             uint8_t *code_table;
 
             xsize = h->xsize;
             n = xsize * xsize;
             /* XXX: fail test */
-            init_vlc(&huff_vlc[i], 8, n, 
+            init_vlc(&huff_vlc[i], 8, n,
                      h->bits, 1, 1, h->codes, 2, 2, 1);
-            
+
             code_table = av_mallocz(n);
             j = 0;
             for(x=0;x<xsize;x++) {
@@ -365,7 +365,7 @@ static int decode_init(AVCodecContext * avctx)
             huff_code_table[i] = code_table;
         }
         for(i=0;i<2;i++) {
-            init_vlc(&huff_quad_vlc[i], i == 0 ? 7 : 4, 16, 
+            init_vlc(&huff_quad_vlc[i], i == 0 ? 7 : 4, 16,
                      mpa_quad_bits[i], 1, 1, mpa_quad_codes[i], 1, 1, 1);
         }
 
@@ -378,14 +378,14 @@ static int decode_init(AVCodecContext * avctx)
             band_index_long[i][22] = k;
         }
 
-	/* compute n ^ (4/3) and store it in mantissa/exp format */
-	table_4_3_exp= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_exp[0]));
+        /* compute n ^ (4/3) and store it in mantissa/exp format */
+        table_4_3_exp= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_exp[0]));
         if(!table_4_3_exp)
-	    return -1;
-	table_4_3_value= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_value[0]));
+            return -1;
+        table_4_3_value= av_mallocz_static(TABLE_4_3_SIZE * sizeof(table_4_3_value[0]));
         if(!table_4_3_value)
             return -1;
-        
+
         int_pow_init();
         for(i=1;i<TABLE_4_3_SIZE;i++) {
             double f, fm;
@@ -400,7 +400,7 @@ static int decode_init(AVCodecContext * avctx)
 //            av_log(NULL, AV_LOG_DEBUG, "%d %d %f\n", i, m, pow((double)i, 4.0 / 3.0));
             table_4_3_exp[i] = -e;
         }
-        
+
         for(i=0;i<7;i++) {
             float f;
             int v;
@@ -427,7 +427,7 @@ static int decode_init(AVCodecContext * avctx)
                 k = i & 1;
                 is_table_lsf[j][k ^ 1][i] = FIXR(f);
                 is_table_lsf[j][k][i] = FIXR(1.0);
-                dprintf("is_table_lsf %d %d: %x %x\n", 
+                dprintf("is_table_lsf %d %d: %x %x\n",
                         i, j, is_table_lsf[j][0][i], is_table_lsf[j][1][i]);
             }
         }
@@ -440,11 +440,11 @@ static int decode_init(AVCodecContext * avctx)
             csa_table[i][0] = FIXHR(cs/4);
             csa_table[i][1] = FIXHR(ca/4);
             csa_table[i][2] = FIXHR(ca/4) + FIXHR(cs/4);
-            csa_table[i][3] = FIXHR(ca/4) - FIXHR(cs/4); 
+            csa_table[i][3] = FIXHR(ca/4) - FIXHR(cs/4);
             csa_table_float[i][0] = cs;
             csa_table_float[i][1] = ca;
             csa_table_float[i][2] = ca + cs;
-            csa_table_float[i][3] = ca - cs; 
+            csa_table_float[i][3] = ca - cs;
 //            printf("%d %d %d %d\n", FIX(cs), FIX(cs-1), FIX(ca), FIX(cs)-FIX(ca));
 //            av_log(NULL, AV_LOG_DEBUG,"%f %f %f %f\n", cs, ca, ca+cs, ca-cs);
         }
@@ -453,10 +453,10 @@ static int decode_init(AVCodecContext * avctx)
         for(i=0;i<36;i++) {
             for(j=0; j<4; j++){
                 double d;
-                
+
                 if(j==2 && i%3 != 1)
                     continue;
-                
+
                 d= sin(M_PI * (i + 0.5) / 36.0);
                 if(j==1){
                     if     (i>=30) d= 0;
@@ -609,7 +609,7 @@ static void dct32(int32_t *out, int32_t *tab)
     BF(5, 10, COS1_5);
     BF(6,  9, COS1_6);
     BF(7,  8, COS1_7);
-    
+
     BF(16, 31, -COS1_0);
     BF(17, 30, -COS1_1);
     BF(18, 29, -COS1_2);
@@ -618,23 +618,23 @@ static void dct32(int32_t *out, int32_t *tab)
     BF(21, 26, -COS1_5);
     BF(22, 25, -COS1_6);
     BF(23, 24, -COS1_7);
-    
+
     /* pass 3 */
     BF(0, 7, COS2_0);
     BF(1, 6, COS2_1);
     BF(2, 5, COS2_2);
     BF(3, 4, COS2_3);
-    
+
     BF(8, 15, -COS2_0);
     BF(9, 14, -COS2_1);
     BF(10, 13, -COS2_2);
     BF(11, 12, -COS2_3);
-    
+
     BF(16, 23, COS2_0);
     BF(17, 22, COS2_1);
     BF(18, 21, COS2_2);
     BF(19, 20, COS2_3);
-    
+
     BF(24, 31, -COS2_0);
     BF(25, 30, -COS2_1);
     BF(26, 29, -COS2_2);
@@ -643,28 +643,28 @@ static void dct32(int32_t *out, int32_t *tab)
     /* pass 4 */
     BF(0, 3, COS3_0);
     BF(1, 2, COS3_1);
-    
+
     BF(4, 7, -COS3_0);
     BF(5, 6, -COS3_1);
-    
+
     BF(8, 11, COS3_0);
     BF(9, 10, COS3_1);
-    
+
     BF(12, 15, -COS3_0);
     BF(13, 14, -COS3_1);
-    
+
     BF(16, 19, COS3_0);
     BF(17, 18, COS3_1);
-    
+
     BF(20, 23, -COS3_0);
     BF(21, 22, -COS3_1);
-    
+
     BF(24, 27, COS3_0);
     BF(25, 26, COS3_1);
-    
+
     BF(28, 31, -COS3_0);
     BF(29, 30, -COS3_1);
-    
+
     /* pass 5 */
     BF1(0, 1, 2, 3);
     BF2(4, 5, 6, 7);
@@ -674,9 +674,9 @@ static void dct32(int32_t *out, int32_t *tab)
     BF2(20, 21, 22, 23);
     BF1(24, 25, 26, 27);
     BF2(28, 29, 30, 31);
-    
+
     /* pass 6 */
-    
+
     ADD( 8, 12);
     ADD(12, 10);
     ADD(10, 14);
@@ -701,7 +701,7 @@ static void dct32(int32_t *out, int32_t *tab)
     out[22] = tab[13];
     out[14] = tab[14];
     out[30] = tab[15];
-    
+
     ADD(24, 28);
     ADD(28, 26);
     ADD(26, 30);
@@ -764,7 +764,7 @@ static inline int round_sample(int *sum)
 
 #else
 
-static inline int round_sample(int64_t *sum) 
+static inline int round_sample(int64_t *sum)
 {
     int sum1;
     sum1 = (int)((*sum) >> OUT_SHIFT);
@@ -837,15 +837,15 @@ void ff_mpa_synth_init(MPA_INT *window)
             v = -v;
         if (i != 0)
             window[512 - i] = v;
-    }	
+    }
 }
 
 /* 32 sub band synthesis filter. Input: 32 sub band samples, Output:
    32 samples. */
 /* XXX: optimize by avoiding ring buffer usage */
 void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
-			 MPA_INT *window, int *dither_state,
-                         OUT_INT *samples, int incr, 
+                         MPA_INT *window, int *dither_state,
+                         OUT_INT *samples, int incr,
                          int32_t sb_samples[SBLIMIT])
 {
     int32_t tmp[32];
@@ -860,7 +860,7 @@ void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
 #endif
 
     dct32(tmp, sb_samples);
-    
+
     offset = *synth_buf_offset;
     synth_buf = synth_buf_ptr + offset;
 
@@ -909,7 +909,7 @@ void ff_mpa_synth_filter(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
         w++;
         w2--;
     }
-    
+
     p = synth_buf + 32;
     SUM8(sum, -=, w + 32, p);
     *samples = round_sample(&sum);
@@ -951,11 +951,11 @@ static void imdct12(int *out, int *in)
 
     in2= MULH(2*in2, C3);
     in3= MULH(2*in3, C3);
-    
+
     t1 = in0 - in4;
     t2 = MULL(in1 - in5, icos36[4]);
 
-    out[ 7]= 
+    out[ 7]=
     out[10]= t1 + t2;
     out[ 1]=
     out[ 4]= t1 - t2;
@@ -963,18 +963,18 @@ static void imdct12(int *out, int *in)
     in0 += in4>>1;
     in4 = in0 + in2;
     in1 += in5>>1;
-    in5 = MULL(in1 + in3, icos36[1]);    
-    out[ 8]= 
+    in5 = MULL(in1 + in3, icos36[1]);
+    out[ 8]=
     out[ 9]= in4 + in5;
     out[ 2]=
     out[ 3]= in4 - in5;
-    
+
     in0 -= in2;
     in1 = MULL(in1 - in3, icos36[7]);
     out[ 0]=
     out[ 5]= in0 - in1;
     out[ 6]=
-    out[11]= in0 + in1;    
+    out[11]= in0 + in1;
 }
 
 /* cos(pi*i/18) */
@@ -1006,7 +1006,7 @@ static void imdct36(int *out, int *buf, int *in, int *win)
 //more accurate but slower
         int64_t t0, t1, t2, t3;
         t2 = in1[2*4] + in1[2*8] - in1[2*2];
-        
+
         t3 = (in1[2*0] + (int64_t)(in1[2*6]>>1))<<32;
         t1 = in1[2*0] - in1[2*6];
         tmp1[ 6] = t1 - (t2>>1);
@@ -1015,11 +1015,11 @@ static void imdct36(int *out, int *buf, int *in, int *win)
         t0 = MUL64(2*(in1[2*2] + in1[2*4]),    C2);
         t1 = MUL64(   in1[2*4] - in1[2*8] , -2*C8);
         t2 = MUL64(2*(in1[2*2] + in1[2*8]),   -C4);
-        
+
         tmp1[10] = (t3 - t0 - t2) >> 32;
         tmp1[ 2] = (t3 + t0 + t1) >> 32;
         tmp1[14] = (t3 + t2 - t1) >> 32;
-        
+
         tmp1[ 4] = MULH(2*(in1[2*5] + in1[2*7] - in1[2*1]), -C3);
         t2 = MUL64(2*(in1[2*1] + in1[2*5]),    C1);
         t3 = MUL64(   in1[2*5] - in1[2*7] , -2*C7);
@@ -1032,7 +1032,7 @@ static void imdct36(int *out, int *buf, int *in, int *win)
         tmp1[ 8] = (t3 - t1 - t0) >> 32;
 #else
         t2 = in1[2*4] + in1[2*8] - in1[2*2];
-        
+
         t3 = in1[2*0] + (in1[2*6]>>1);
         t1 = in1[2*0] - in1[2*6];
         tmp1[ 6] = t1 - (t2>>1);
@@ -1041,11 +1041,11 @@ static void imdct36(int *out, int *buf, int *in, int *win)
         t0 = MULH(2*(in1[2*2] + in1[2*4]),    C2);
         t1 = MULH(   in1[2*4] - in1[2*8] , -2*C8);
         t2 = MULH(2*(in1[2*2] + in1[2*8]),   -C4);
-        
+
         tmp1[10] = t3 - t0 - t2;
         tmp1[ 2] = t3 + t0 + t1;
         tmp1[14] = t3 + t2 - t1;
-        
+
         tmp1[ 4] = MULH(2*(in1[2*5] + in1[2*7] - in1[2*1]), -C3);
         t2 = MULH(2*(in1[2*1] + in1[2*5]),    C1);
         t3 = MULH(   in1[2*5] - in1[2*7] , -2*C7);
@@ -1070,14 +1070,14 @@ static void imdct36(int *out, int *buf, int *in, int *win)
         t3 = tmp[i + 3];
         s1 = MULL(t3 + t2, icos36[j]);
         s3 = MULL(t3 - t2, icos36[8 - j]);
-        
+
         t0 = s0 + s1;
         t1 = s0 - s1;
         out[(9 + j)*SBLIMIT] =  MULH(t1, win[9 + j]) + buf[9 + j];
         out[(8 - j)*SBLIMIT] =  MULH(t1, win[8 - j]) + buf[8 - j];
         buf[9 + j] = MULH(t0, win[18 + 9 + j]);
         buf[8 - j] = MULH(t0, win[18 + 8 - j]);
-        
+
         t0 = s2 + s3;
         t1 = s2 - s3;
         out[(9 + 8 - j)*SBLIMIT] =  MULH(t1, win[9 + 8 - j]) + buf[9 + 8 - j];
@@ -1111,7 +1111,7 @@ static int decode_header(MPADecodeContext *s, uint32_t header)
         s->lsf = 1;
         mpeg25 = 1;
     }
-    
+
     s->layer = 4 - ((header >> 17) & 3);
     /* extract frequency */
     sample_rate_index = (header >> 10) & 3;
@@ -1134,7 +1134,7 @@ static int decode_header(MPADecodeContext *s, uint32_t header)
         s->nb_channels = 1;
     else
         s->nb_channels = 2;
-    
+
     if (bitrate_index != 0) {
         frame_size = mpa_bitrate_tab[s->lsf][s->layer - 1][bitrate_index];
         s->bit_rate = frame_size * 1000;
@@ -1177,7 +1177,7 @@ static int decode_header(MPADecodeContext *s, uint32_t header)
             break;
         }
     }
-    
+
 #if defined(DEBUG)
     printf("layer%d, %d Hz, %d kbits/s, ",
            s->layer, s->sample_rate, s->bit_rate);
@@ -1241,7 +1241,7 @@ static int mp_decode_layer1(MPADecodeContext *s)
     uint8_t allocation[MPA_MAX_CHANNELS][SBLIMIT];
     uint8_t scale_factors[MPA_MAX_CHANNELS][SBLIMIT];
 
-    if (s->mode == MPA_JSTEREO) 
+    if (s->mode == MPA_JSTEREO)
         bound = (s->mode_ext + 1) * 4;
     else
         bound = SBLIMIT;
@@ -1269,7 +1269,7 @@ static int mp_decode_layer1(MPADecodeContext *s)
             scale_factors[1][i] = get_bits(&s->gb, 6);
         }
     }
-    
+
     /* compute samples */
     for(j=0;j<12;j++) {
         for(i=0;i<bound;i++) {
@@ -1305,17 +1305,17 @@ static int mp_decode_layer1(MPADecodeContext *s)
 int l2_select_table(int bitrate, int nb_channels, int freq, int lsf)
 {
     int ch_bitrate, table;
-    
+
     ch_bitrate = bitrate / nb_channels;
     if (!lsf) {
         if ((freq == 48000 && ch_bitrate >= 56) ||
-            (ch_bitrate >= 56 && ch_bitrate <= 80)) 
+            (ch_bitrate >= 56 && ch_bitrate <= 80))
             table = 0;
-        else if (freq != 48000 && ch_bitrate >= 96) 
+        else if (freq != 48000 && ch_bitrate >= 96)
             table = 1;
-        else if (freq != 32000 && ch_bitrate <= 48) 
+        else if (freq != 32000 && ch_bitrate <= 48)
             table = 2;
-        else 
+        else
             table = 3;
     } else {
         table = 4;
@@ -1334,12 +1334,12 @@ static int mp_decode_layer2(MPADecodeContext *s)
     int scale, qindex, bits, steps, k, l, m, b;
 
     /* select decoding table */
-    table = l2_select_table(s->bit_rate / 1000, s->nb_channels, 
+    table = l2_select_table(s->bit_rate / 1000, s->nb_channels,
                             s->sample_rate, s->lsf);
     sblimit = sblimit_table[table];
     alloc_table = alloc_tables[table];
 
-    if (s->mode == MPA_JSTEREO) 
+    if (s->mode == MPA_JSTEREO)
         bound = (s->mode_ext + 1) * 4;
     else
         bound = sblimit;
@@ -1379,11 +1379,11 @@ static int mp_decode_layer2(MPADecodeContext *s)
     /* scale codes */
     for(i=0;i<sblimit;i++) {
         for(ch=0;ch<s->nb_channels;ch++) {
-            if (bit_alloc[ch][i]) 
+            if (bit_alloc[ch][i])
                 scale_code[ch][i] = get_bits(&s->gb, 2);
         }
     }
-    
+
     /* scale factors */
     for(i=0;i<sblimit;i++) {
         for(ch=0;ch<s->nb_channels;ch++) {
@@ -1446,13 +1446,13 @@ static int mp_decode_layer2(MPADecodeContext *s)
                             /* 3 values at the same time */
                             v = get_bits(&s->gb, -bits);
                             steps = quant_steps[qindex];
-                            s->sb_samples[ch][k * 12 + l + 0][i] = 
+                            s->sb_samples[ch][k * 12 + l + 0][i] =
                                 l2_unscale_group(steps, v % steps, scale);
                             v = v / steps;
-                            s->sb_samples[ch][k * 12 + l + 1][i] = 
+                            s->sb_samples[ch][k * 12 + l + 1][i] =
                                 l2_unscale_group(steps, v % steps, scale);
                             v = v / steps;
-                            s->sb_samples[ch][k * 12 + l + 2][i] = 
+                            s->sb_samples[ch][k * 12 + l + 2][i] =
                                 l2_unscale_group(steps, v, scale);
                         } else {
                             for(m=0;m<3;m++) {
@@ -1468,7 +1468,7 @@ static int mp_decode_layer2(MPADecodeContext *s)
                     }
                 }
                 /* next subband in alloc table */
-                j += 1 << bit_alloc_bits; 
+                j += 1 << bit_alloc_bits;
             }
             /* XXX: find a way to avoid this duplication of code */
             for(i=bound;i<sblimit;i++) {
@@ -1486,26 +1486,26 @@ static int mp_decode_layer2(MPADecodeContext *s)
                         steps = quant_steps[qindex];
                         mant = v % steps;
                         v = v / steps;
-                        s->sb_samples[0][k * 12 + l + 0][i] = 
+                        s->sb_samples[0][k * 12 + l + 0][i] =
                             l2_unscale_group(steps, mant, scale0);
-                        s->sb_samples[1][k * 12 + l + 0][i] = 
+                        s->sb_samples[1][k * 12 + l + 0][i] =
                             l2_unscale_group(steps, mant, scale1);
                         mant = v % steps;
                         v = v / steps;
-                        s->sb_samples[0][k * 12 + l + 1][i] = 
+                        s->sb_samples[0][k * 12 + l + 1][i] =
                             l2_unscale_group(steps, mant, scale0);
-                        s->sb_samples[1][k * 12 + l + 1][i] = 
+                        s->sb_samples[1][k * 12 + l + 1][i] =
                             l2_unscale_group(steps, mant, scale1);
-                        s->sb_samples[0][k * 12 + l + 2][i] = 
+                        s->sb_samples[0][k * 12 + l + 2][i] =
                             l2_unscale_group(steps, v, scale0);
-                        s->sb_samples[1][k * 12 + l + 2][i] = 
+                        s->sb_samples[1][k * 12 + l + 2][i] =
                             l2_unscale_group(steps, v, scale1);
                     } else {
                         for(m=0;m<3;m++) {
                             mant = get_bits(&s->gb, bits);
-                            s->sb_samples[0][k * 12 + l + m][i] = 
+                            s->sb_samples[0][k * 12 + l + m][i] =
                                 l1_unscale(bits - 1, mant, scale0);
-                            s->sb_samples[1][k * 12 + l + m][i] = 
+                            s->sb_samples[1][k * 12 + l + m][i] =
                                 l1_unscale(bits - 1, mant, scale1);
                         }
                     }
@@ -1518,7 +1518,7 @@ static int mp_decode_layer2(MPADecodeContext *s)
                     s->sb_samples[1][k * 12 + l + 2][i] = 0;
                 }
                 /* next subband in alloc table */
-                j += 1 << bit_alloc_bits; 
+                j += 1 << bit_alloc_bits;
             }
             /* fill remaining samples to zero */
             for(i=sblimit;i<SBLIMIT;i++) {
@@ -1545,7 +1545,7 @@ static void seek_to_maindata(MPADecodeContext *s, unsigned int backstep)
 
     /* copy old data before current one */
     ptr -= backstep;
-    memcpy(ptr, s->inbuf1[s->inbuf_index ^ 1] + 
+    memcpy(ptr, s->inbuf1[s->inbuf_index ^ 1] +
            BACKSTEP_SIZE + s->old_frame_size - backstep, backstep);
     /* init get bits again */
     init_get_bits(&s->gb, ptr, (s->frame_size + backstep)*8);
@@ -1576,7 +1576,7 @@ static inline void lsf_sf_expand(int *slen,
     slen[0] = sf;
 }
 
-static void exponents_from_scale_factors(MPADecodeContext *s, 
+static void exponents_from_scale_factors(MPADecodeContext *s,
                                          GranuleDef *g,
                                          int16_t *exponents)
 {
@@ -1650,7 +1650,7 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g,
             if (get_bits_count(&s->gb) >= end_pos)
                 break;
             if (code_table) {
-                code = get_vlc(&s->gb, vlc);
+                code = get_vlc2(&s->gb, vlc->table, 8, 2);
                 if (code < 0)
                     return -1;
                 y = code_table[code];
@@ -1660,7 +1660,7 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g,
                 x = 0;
                 y = 0;
             }
-            dprintf("region=%d n=%d x=%d y=%d exp=%d\n", 
+            dprintf("region=%d n=%d x=%d y=%d exp=%d\n",
                     i, g->region_size[i] - j, x, y, exponents[s_index]);
             if (x) {
                 if (x == 15)
@@ -1684,7 +1684,7 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g,
             g->sb_hybrid[s_index++] = v;
         }
     }
-            
+
     /* high frequencies */
     vlc = &huff_quad_vlc[g->count1table_select];
     last_gb.buffer = NULL;
@@ -1701,7 +1701,7 @@ static int huffman_decode(MPADecodeContext *s, GranuleDef *g,
         }
         last_gb= s->gb;
 
-        code = get_vlc(&s->gb, vlc);
+        code = get_vlc2(&s->gb, vlc->table, vlc->bits, 2);
         dprintf("t=%d code=%d\n", g->count1table_select, code);
         if (code < 0)
             return -1;
@@ -1744,7 +1744,7 @@ static void reorder_block(MPADecodeContext *s, GranuleDef *g)
     } else {
         ptr = g->sb_hybrid;
     }
-    
+
     for(i=g->short_start;i<13;i++) {
         len = band_size_short[s->sample_rate_index][i];
         ptr1 = ptr;
@@ -1780,7 +1780,7 @@ static void compute_stereo(MPADecodeContext *s,
             is_tab = is_table_lsf[g1->scalefac_compress & 1];
             sf_max = 16;
         }
-            
+
         tab0 = g0->sb_hybrid + 576;
         tab1 = g1->sb_hybrid + 576;
 
@@ -1831,8 +1831,8 @@ static void compute_stereo(MPADecodeContext *s,
             }
         }
 
-        non_zero_found = non_zero_found_short[0] | 
-            non_zero_found_short[1] | 
+        non_zero_found = non_zero_found_short[0] |
+            non_zero_found_short[1] |
             non_zero_found_short[2];
 
         for(i = g1->long_end - 1;i >= 0;i--) {
@@ -1903,7 +1903,7 @@ static void compute_antialias_integer(MPADecodeContext *s,
     } else {
         n = SBLIMIT - 1;
     }
-    
+
     ptr = g->sb_hybrid + 18;
     for(i = n;i > 0;i--) {
         int tmp0, tmp1, tmp2;
@@ -1923,8 +1923,8 @@ static void compute_antialias_integer(MPADecodeContext *s,
         INT_AA(5)
         INT_AA(6)
         INT_AA(7)
-            
-        ptr += 18;       
+
+        ptr += 18;
     }
 }
 
@@ -1943,17 +1943,17 @@ static void compute_antialias_float(MPADecodeContext *s,
     } else {
         n = SBLIMIT - 1;
     }
-    
+
     ptr = g->sb_hybrid + 18;
     for(i = n;i > 0;i--) {
         float tmp0, tmp1;
-        float *csa = &csa_table_float[0][0];       
+        float *csa = &csa_table_float[0][0];
 #define FLOAT_AA(j)\
         tmp0= ptr[-1-j];\
         tmp1= ptr[   j];\
         ptr[-1-j] = lrintf(tmp0 * csa[0+4*j] - tmp1 * csa[1+4*j]);\
         ptr[   j] = lrintf(tmp0 * csa[1+4*j] + tmp1 * csa[0+4*j]);
-        
+
         FLOAT_AA(0)
         FLOAT_AA(1)
         FLOAT_AA(2)
@@ -1963,12 +1963,12 @@ static void compute_antialias_float(MPADecodeContext *s,
         FLOAT_AA(6)
         FLOAT_AA(7)
 
-        ptr += 18;       
+        ptr += 18;
     }
 }
 
 static void compute_imdct(MPADecodeContext *s,
-                          GranuleDef *g, 
+                          GranuleDef *g,
                           int32_t *sb_samples,
                           int32_t *mdct_buf)
 {
@@ -2018,7 +2018,7 @@ static void compute_imdct(MPADecodeContext *s,
         /* select frequency inversion */
         win = mdct_win[2] + ((4 * 36) & -(j & 1));
         out_ptr = sb_samples + j;
-        
+
         for(i=0; i<6; i++){
             *out_ptr = buf[i];
             out_ptr += SBLIMIT;
@@ -2064,11 +2064,11 @@ void sample_dump(int fnum, int32_t *tab, int n)
     char buf[512];
     int i;
     int32_t v;
-    
+
     f = files[fnum];
     if (!f) {
-        snprintf(buf, sizeof(buf), "/tmp/out%d.%s.pcm", 
-                fnum, 
+        snprintf(buf, sizeof(buf), "/tmp/out%d.%s.pcm",
+                fnum,
 #ifdef USE_HIGHPRECISION
                 "hp"
 #else
@@ -2080,7 +2080,7 @@ void sample_dump(int fnum, int32_t *tab, int n)
             return;
         files[fnum] = f;
     }
-    
+
     if (fnum == 0) {
         static int pos = 0;
         av_log(NULL, AV_LOG_DEBUG, "pos=%d\n", pos);
@@ -2128,7 +2128,7 @@ static int mp_decode_layer3(MPADecodeContext *s)
             granules[ch][1].scfsi = get_bits(&s->gb, 4);
         }
     }
-    
+
     for(gr=0;gr<nb_granules;gr++) {
         for(ch=0;ch<s->nb_channels;ch++) {
             dprintf("gr=%d ch=%d: side_info\n", gr, ch);
@@ -2138,7 +2138,7 @@ static int mp_decode_layer3(MPADecodeContext *s)
             g->global_gain = get_bits(&s->gb, 8);
             /* if MS stereo only is selected, we precompute the
                1/sqrt(2) renormalization factor */
-            if ((s->mode_ext & (MODE_EXT_MS_STEREO | MODE_EXT_I_STEREO)) == 
+            if ((s->mode_ext & (MODE_EXT_MS_STEREO | MODE_EXT_I_STEREO)) ==
                 MODE_EXT_MS_STEREO)
                 g->global_gain -= 2;
             if (s->lsf)
@@ -2153,15 +2153,15 @@ static int mp_decode_layer3(MPADecodeContext *s)
                 g->switch_point = get_bits(&s->gb, 1);
                 for(i=0;i<2;i++)
                     g->table_select[i] = get_bits(&s->gb, 5);
-                for(i=0;i<3;i++) 
+                for(i=0;i<3;i++)
                     g->subblock_gain[i] = get_bits(&s->gb, 3);
                 /* compute huffman coded region sizes */
                 if (g->block_type == 2)
                     g->region_size[0] = (36 / 2);
                 else {
-                    if (s->sample_rate_index <= 2) 
+                    if (s->sample_rate_index <= 2)
                         g->region_size[0] = (36 / 2);
-                    else if (s->sample_rate_index != 8) 
+                    else if (s->sample_rate_index != 8)
                         g->region_size[0] = (54 / 2);
                     else
                         g->region_size[0] = (108 / 2);
@@ -2176,15 +2176,15 @@ static int mp_decode_layer3(MPADecodeContext *s)
                 /* compute huffman coded region sizes */
                 region_address1 = get_bits(&s->gb, 4);
                 region_address2 = get_bits(&s->gb, 3);
-                dprintf("region1=%d region2=%d\n", 
+                dprintf("region1=%d region2=%d\n",
                         region_address1, region_address2);
-                g->region_size[0] = 
+                g->region_size[0] =
                     band_index_long[s->sample_rate_index][region_address1 + 1] >> 1;
                 l = region_address1 + region_address2 + 2;
                 /* should not overflow */
                 if (l > 22)
                     l = 22;
-                g->region_size[1] = 
+                g->region_size[1] =
                     band_index_long[s->sample_rate_index][l] >> 1;
             }
             /* convert region offsets to region sizes and truncate
@@ -2211,11 +2211,11 @@ static int mp_decode_layer3(MPADecodeContext *s)
                         g->long_end = 6;
                     else
                         g->long_end = 4; /* 8000 Hz */
-                    
+
                     if (s->sample_rate_index != 8)
                         g->short_start = 3;
                     else
-                        g->short_start = 2; 
+                        g->short_start = 2;
                 } else {
                     g->long_end = 0;
                     g->short_start = 0;
@@ -2224,7 +2224,7 @@ static int mp_decode_layer3(MPADecodeContext *s)
                 g->short_start = 13;
                 g->long_end = 22;
             }
-            
+
             g->preflag = 0;
             if (!s->lsf)
                 g->preflag = get_bits(&s->gb, 1);
@@ -2244,9 +2244,9 @@ static int mp_decode_layer3(MPADecodeContext *s)
     for(gr=0;gr<nb_granules;gr++) {
         for(ch=0;ch<s->nb_channels;ch++) {
             g = &granules[ch][gr];
-            
+
             bits_pos = get_bits_count(&s->gb);
-            
+
             if (!s->lsf) {
                 uint8_t *sc;
                 int slen, slen1, slen2;
@@ -2285,7 +2285,7 @@ static int mp_decode_layer3(MPADecodeContext *s)
                 }
 #if defined(DEBUG)
                 {
-                    printf("scfsi=%x gr=%d ch=%d scale_factors:\n", 
+                    printf("scfsi=%x gr=%d ch=%d scale_factors:\n",
                            g->scfsi, gr, ch);
                     for(i=0;i<j;i++)
                         printf(" %d", g->scale_factors[i]);
@@ -2342,7 +2342,7 @@ static int mp_decode_layer3(MPADecodeContext *s)
                     g->scale_factors[j] = 0;
 #if defined(DEBUG)
                 {
-                    printf("gr=%d ch=%d scale_factors:\n", 
+                    printf("gr=%d ch=%d scale_factors:\n",
                            gr, ch);
                     for(i=0;i<40;i++)
                         printf(" %d", g->scale_factors[i]);
@@ -2389,7 +2389,7 @@ static int mp_decode_layer3(MPADecodeContext *s)
 #if defined(DEBUG)
             sample_dump(1, g->sb_hybrid, 576);
 #endif
-            compute_imdct(s, g, &s->sb_samples[ch][18 * gr][0], s->mdct_buf[ch]); 
+            compute_imdct(s, g, &s->sb_samples[ch][18 * gr][0], s->mdct_buf[ch]);
 #if defined(DEBUG)
             sample_dump(2, &s->sb_samples[ch][18 * gr][0], 576);
 #endif
@@ -2398,15 +2398,15 @@ static int mp_decode_layer3(MPADecodeContext *s)
     return nb_granules * 18;
 }
 
-static int mp_decode_frame(MPADecodeContext *s, 
+static int mp_decode_frame(MPADecodeContext *s,
                            OUT_INT *samples)
 {
     int i, nb_frames, ch;
     OUT_INT *samples_ptr;
 
-    init_get_bits(&s->gb, s->inbuf + HEADER_SIZE, 
+    init_get_bits(&s->gb, s->inbuf + HEADER_SIZE,
                   (s->inbuf_ptr - s->inbuf - HEADER_SIZE)*8);
-    
+
     /* skip error protection field */
     if (s->error_protection)
         get_bits(&s->gb, 16);
@@ -2440,21 +2440,21 @@ static int mp_decode_frame(MPADecodeContext *s,
         samples_ptr = samples + ch;
         for(i=0;i<nb_frames;i++) {
             ff_mpa_synth_filter(s->synth_buf[ch], &(s->synth_buf_offset[ch]),
-			 window, &s->dither_state,
-			 samples_ptr, s->nb_channels,
+                         window, &s->dither_state,
+                         samples_ptr, s->nb_channels,
                          s->sb_samples[ch][i]);
             samples_ptr += 32 * s->nb_channels;
         }
     }
 #ifdef DEBUG
-    s->frame_count++;        
+    s->frame_count++;
 #endif
     return nb_frames * 32 * sizeof(OUT_INT) * s->nb_channels;
 }
 
 static int decode_frame(AVCodecContext * avctx,
-			void *data, int *data_size,
-			uint8_t * buf, int buf_size)
+                        void *data, int *data_size,
+                        uint8_t * buf, int buf_size)
 {
     MPADecodeContext *s = avctx->priv_data;
     uint32_t header;
@@ -2464,8 +2464,8 @@ static int decode_frame(AVCodecContext * avctx,
 
     buf_ptr = buf;
     while (buf_size > 0) {
-	len = s->inbuf_ptr - s->inbuf;
-	if (s->frame_size == 0) {
+        len = s->inbuf_ptr - s->inbuf;
+        if (s->frame_size == 0) {
             /* special case for next header for first frame in free
                format case (XXX: find a simpler method) */
             if (s->free_format_next_header != 0) {
@@ -2477,34 +2477,34 @@ static int decode_frame(AVCodecContext * avctx,
                 s->free_format_next_header = 0;
                 goto got_header;
             }
-	    /* no header seen : find one. We need at least HEADER_SIZE
+            /* no header seen : find one. We need at least HEADER_SIZE
                bytes to parse it */
-	    len = HEADER_SIZE - len;
-	    if (len > buf_size)
-		len = buf_size;
-	    if (len > 0) {
-		memcpy(s->inbuf_ptr, buf_ptr, len);
-		buf_ptr += len;
-		buf_size -= len;
-		s->inbuf_ptr += len;
-	    }
-	    if ((s->inbuf_ptr - s->inbuf) >= HEADER_SIZE) {
+            len = HEADER_SIZE - len;
+            if (len > buf_size)
+                len = buf_size;
+            if (len > 0) {
+                memcpy(s->inbuf_ptr, buf_ptr, len);
+                buf_ptr += len;
+                buf_size -= len;
+                s->inbuf_ptr += len;
+            }
+            if ((s->inbuf_ptr - s->inbuf) >= HEADER_SIZE) {
             got_header:
-		header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
-		    (s->inbuf[2] << 8) | s->inbuf[3];
+                header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
+                    (s->inbuf[2] << 8) | s->inbuf[3];
 
-		if (ff_mpa_check_header(header) < 0) {
-		    /* no sync found : move by one byte (inefficient, but simple!) */
-		    memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
-		    s->inbuf_ptr--;
+                if (ff_mpa_check_header(header) < 0) {
+                    /* no sync found : move by one byte (inefficient, but simple!) */
+                    memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
+                    s->inbuf_ptr--;
                     dprintf("skip %x\n", header);
                     /* reset free format frame size to give a chance
                        to get a new bitrate */
                     s->free_format_frame_size = 0;
-		} else {
-		    if (decode_header(s, header) == 1) {
+                } else {
+                    if (decode_header(s, header) == 1) {
                         /* free format: prepare to compute frame size */
-			s->frame_size = -1;
+                        s->frame_size = -1;
                     }
                     /* update codec info */
                     avctx->sample_rate = s->sample_rate;
@@ -2525,18 +2525,18 @@ static int decode_frame(AVCodecContext * avctx,
                             avctx->frame_size = 1152;
                         break;
                     }
-		}
-	    }
+                }
+            }
         } else if (s->frame_size == -1) {
             /* free format : find next sync to compute frame size */
-	    len = MPA_MAX_CODED_FRAME_SIZE - len;
-	    if (len > buf_size)
-		len = buf_size;
+            len = MPA_MAX_CODED_FRAME_SIZE - len;
+            if (len > buf_size)
+                len = buf_size;
             if (len == 0) {
-		/* frame too long: resync */
+                /* frame too long: resync */
                 s->frame_size = 0;
-		memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
-		s->inbuf_ptr--;
+                memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
+                s->inbuf_ptr--;
             } else {
                 uint8_t *p, *pend;
                 uint32_t header1;
@@ -2568,7 +2568,7 @@ static int decode_frame(AVCodecContext * avctx,
                             s->free_format_frame_size -= padding * 4;
                         else
                             s->free_format_frame_size -= padding;
-                        dprintf("free frame size=%d padding=%d\n", 
+                        dprintf("free frame size=%d padding=%d\n",
                                 s->free_format_frame_size, padding);
                         decode_header(s, header1);
                         goto next_data;
@@ -2580,19 +2580,19 @@ static int decode_frame(AVCodecContext * avctx,
                 s->inbuf_ptr += len;
                 buf_size -= len;
             }
-	} else if (len < s->frame_size) {
+        } else if (len < s->frame_size) {
             if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE)
                 s->frame_size = MPA_MAX_CODED_FRAME_SIZE;
-	    len = s->frame_size - len;
-	    if (len > buf_size)
-		len = buf_size;
-	    memcpy(s->inbuf_ptr, buf_ptr, len);
-	    buf_ptr += len;
-	    s->inbuf_ptr += len;
-	    buf_size -= len;
-	}
+            len = s->frame_size - len;
+            if (len > buf_size)
+                len = buf_size;
+            memcpy(s->inbuf_ptr, buf_ptr, len);
+            buf_ptr += len;
+            s->inbuf_ptr += len;
+            buf_size -= len;
+        }
     next_data:
-        if (s->frame_size > 0 && 
+        if (s->frame_size > 0 &&
             (s->inbuf_ptr - s->inbuf) >= s->frame_size) {
             if (avctx->parse_only) {
                 /* simply return the frame data */
@@ -2601,22 +2601,22 @@ static int decode_frame(AVCodecContext * avctx,
             } else {
                 out_size = mp_decode_frame(s, out_samples);
             }
-	    s->inbuf_ptr = s->inbuf;
-	    s->frame_size = 0;
+            s->inbuf_ptr = s->inbuf;
+            s->frame_size = 0;
             if(out_size>=0)
-	        *data_size = out_size;
+                *data_size = out_size;
             else
                 av_log(avctx, AV_LOG_DEBUG, "Error while decoding mpeg audio frame\n"); //FIXME return -1 / but also return the number of bytes consumed
-	    break;
-	}
+            break;
+        }
     }
     return buf_ptr - buf;
 }
 
 
 static int decode_frame_adu(AVCodecContext * avctx,
-			void *data, int *data_size,
-			uint8_t * buf, int buf_size)
+                        void *data, int *data_size,
+                        uint8_t * buf, int buf_size)
 {
     MPADecodeContext *s = avctx->priv_data;
     uint32_t header;
@@ -2747,8 +2747,8 @@ static int decode_close_mp3on4(AVCodecContext * avctx)
 
 
 static int decode_frame_mp3on4(AVCodecContext * avctx,
-			void *data, int *data_size,
-			uint8_t * buf, int buf_size)
+                        void *data, int *data_size,
+                        uint8_t * buf, int buf_size)
 {
     MP3On4DecodeContext *s = avctx->priv_data;
     MPADecodeContext *m;
diff --git a/src/libffmpeg/libavcodec/mpegaudiodectab.h b/src/libffmpeg/libavcodec/mpegaudiodectab.h
index a60edb646..572f7acb5 100644
--- a/src/libffmpeg/libavcodec/mpegaudiodectab.h
+++ b/src/libffmpeg/libavcodec/mpegaudiodectab.h
@@ -1,6 +1,6 @@
 /**
  * @file mpegaudiodectab.h
- * mpeg audio layer decoder tables. 
+ * mpeg audio layer decoder tables.
  */
 
 const uint16_t mpa_bitrate_tab[2][3][15] = {
@@ -67,137 +67,137 @@ const int quant_steps[17] = {
 
 /* we use a negative value if grouped */
 const int quant_bits[17] = {
-    -5,  -7,  3, -10, 4, 
+    -5,  -7,  3, -10, 4,
      5,  6,  7,  8,  9,
     10, 11, 12, 13, 14,
-    15, 16 
+    15, 16
 };
 
 /* encoding tables which give the quantization index. Note how it is
    possible to store them efficiently ! */
 static const unsigned char alloc_table_0[] = {
- 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 
- 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 
- 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 
- 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
- 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
- 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
- 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
- 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
- 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
- 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
- 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 2,  0,  1, 16, 
- 2,  0,  1, 16, 
- 2,  0,  1, 16, 
- 2,  0,  1, 16, 
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
 };
 
 static const unsigned char alloc_table_1[] = {
- 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 
- 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 
- 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 
- 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
- 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
- 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
- 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
- 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
- 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
- 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
- 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 3,  0,  1,  2,  3,  4,  5, 16, 
- 2,  0,  1, 16, 
- 2,  0,  1, 16, 
- 2,  0,  1, 16, 
- 2,  0,  1, 16, 
- 2,  0,  1, 16, 
- 2,  0,  1, 16, 
- 2,  0,  1, 16, 
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+ 4,  0,  2,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 3,  0,  1,  2,  3,  4,  5, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
+ 2,  0,  1, 16,
 };
 
 static const unsigned char alloc_table_2[] = {
- 4,  0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 
- 4,  0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
+ 4,  0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+ 4,  0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
 };
 
 static const unsigned char alloc_table_3[] = {
- 4,  0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 
- 4,  0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
+ 4,  0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+ 4,  0,  1,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
 };
 
 static const unsigned char alloc_table_4[] = {
  4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
- 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 
- 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 
- 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 3,  0,  1,  3,  4,  5,  6,  7, 
- 2,  0,  1,  3, 
- 2,  0,  1,  3, 
- 2,  0,  1,  3, 
- 2,  0,  1,  3, 
- 2,  0,  1,  3, 
- 2,  0,  1,  3, 
- 2,  0,  1,  3, 
- 2,  0,  1,  3, 
- 2,  0,  1,  3, 
- 2,  0,  1,  3, 
- 2,  0,  1,  3, 
- 2,  0,  1,  3, 
- 2,  0,  1,  3, 
- 2,  0,  1,  3, 
- 2,  0,  1,  3, 
- 2,  0,  1,  3, 
- 2,  0,  1,  3, 
- 2,  0,  1,  3, 
- 2,  0,  1,  3, 
-};
-
-const unsigned char *alloc_tables[5] = 
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+ 4,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 3,  0,  1,  3,  4,  5,  6,  7,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+ 2,  0,  1,  3,
+};
+
+const unsigned char *alloc_tables[5] =
 { alloc_table_0, alloc_table_1, alloc_table_2, alloc_table_3, alloc_table_4, };
 
 /*******************************************************/
@@ -214,7 +214,7 @@ static const uint8_t lsf_nsf_table[6][3][4] = {
     { {  6,  5,  5, 5 }, {  9,  9,  9, 9 }, {  6,  9,  9, 9 } },
     { {  6,  5,  7, 3 }, {  9,  9, 12, 6 }, {  6,  9, 12, 6 } },
     { { 11, 10,  0, 0 }, { 18, 18,  0, 0 }, { 15, 18,  0, 0 } },
-    { {  7,  7,  7, 0 }, { 12, 12, 12, 0 }, {  6, 15, 12, 0 } }, 
+    { {  7,  7,  7, 0 }, { 12, 12, 12, 0 }, {  6, 15, 12, 0 } },
     { {  6,  6,  6, 3 }, { 12,  9,  9, 6 }, {  6, 12,  9, 6 } },
     { {  8,  8,  5, 0 }, { 15, 12,  9, 0 }, {  6, 18,  9, 0 } },
 };
diff --git a/src/libffmpeg/libavcodec/mpegaudiotab.h b/src/libffmpeg/libavcodec/mpegaudiotab.h
index d2c13edd6..2e7d3372f 100644
--- a/src/libffmpeg/libavcodec/mpegaudiotab.h
+++ b/src/libffmpeg/libavcodec/mpegaudiotab.h
@@ -1,30 +1,30 @@
 /*
  * mpeg audio layer 2 tables. Most of them come from the mpeg audio
  * specification.
- * 
+ *
  * Copyright (c) 2000, 2001 Fabrice Bellard.
  *
  * The licence of this code is contained in file LICENCE found in the
- * same archive 
+ * same archive
  */
 
 /**
  * @file mpegaudiotab.h
- * mpeg audio layer 2 tables. 
+ * mpeg audio layer 2 tables.
  * Most of them come from the mpeg audio specification.
  */
- 
+
 #define SQRT2 1.41421356237309514547
 
 static const int costab32[30] = {
     FIX(0.54119610014619701222),
     FIX(1.3065629648763763537),
-    
+
     FIX(0.50979557910415917998),
     FIX(2.5629154477415054814),
     FIX(0.89997622313641556513),
     FIX(0.60134488693504528634),
-    
+
     FIX(0.5024192861881556782),
     FIX(5.1011486186891552563),
     FIX(0.78815462345125020249),
@@ -33,7 +33,7 @@ static const int costab32[30] = {
     FIX(1.0606776859903470633),
     FIX(1.7224470982383341955),
     FIX(0.52249861493968885462),
-    
+
     FIX(10.19000812354803287),
     FIX(0.674808341455005678),
     FIX(1.1694399334328846596),
@@ -75,13 +75,13 @@ static unsigned char scale_diff_table[128];
 static unsigned short total_quant_bits[17];
 
 /* signal to noise ratio of each quantification step (could be
-   computed from quant_steps[]). The values are dB multiplied by 10 
+   computed from quant_steps[]). The values are dB multiplied by 10
 */
-static const unsigned short quant_snr[17] = { 
+static const unsigned short quant_snr[17] = {
      70, 110, 160, 208,
     253, 316, 378, 439,
-    499, 559, 620, 680, 
-    740, 800, 861, 920, 
+    499, 559, 620, 680,
+    740, 800, 861, 920,
     980
 };
 
diff --git a/src/libffmpeg/libavcodec/mpegvideo.c b/src/libffmpeg/libavcodec/mpegvideo.c
index 847250fff..3ecf6d29d 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.c
+++ b/src/libffmpeg/libavcodec/mpegvideo.c
@@ -15,16 +15,16 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * 4MV & hq & b-frame encoding stuff by Michael Niedermayer <michaelni@gmx.at>
  */
- 
+
 /**
  * @file mpegvideo.c
  * The simplest mpeg encoder (well, it was the simplest!).
- */ 
- 
+ */
+
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
@@ -49,17 +49,17 @@
 #ifdef CONFIG_ENCODERS
 static void encode_picture(MpegEncContext *s, int picture_number);
 #endif //CONFIG_ENCODERS
-static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s, 
+static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
                                    DCTELEM *block, int n, int qscale);
-static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s, 
+static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
                                    DCTELEM *block, int n, int qscale);
 static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
                                    DCTELEM *block, int n, int qscale);
 static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
                                    DCTELEM *block, int n, int qscale);
-static void dct_unquantize_h263_intra_c(MpegEncContext *s, 
+static void dct_unquantize_h263_intra_c(MpegEncContext *s,
                                   DCTELEM *block, int n, int qscale);
-static void dct_unquantize_h263_inter_c(MpegEncContext *s, 
+static void dct_unquantize_h263_inter_c(MpegEncContext *s,
                                   DCTELEM *block, int n, int qscale);
 static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w);
 #ifdef CONFIG_ENCODERS
@@ -124,7 +124,7 @@ static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[
 
     for(qscale=qmin; qscale<=qmax; qscale++){
         int i;
-        if (dsp->fdct == ff_jpeg_fdct_islow 
+        if (dsp->fdct == ff_jpeg_fdct_islow
 #ifdef FAAN_POSTSCALE
             || dsp->fdct == ff_faandct
 #endif
@@ -135,8 +135,8 @@ static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[
                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
-                
-                qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / 
+
+                qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) /
                                 (qscale * quant_matrix[j]));
             }
         } else if (dsp->fdct == fdct_ifast
@@ -150,8 +150,8 @@ static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[
                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
                 /* 3444240       >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */
-                
-                qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) / 
+
+                qmat[qscale][i] = (int)((uint64_t_C(1) << (QMAT_SHIFT + 14)) /
                                 (aanscales[i] * qscale * quant_matrix[j]));
             }
         } else {
@@ -170,7 +170,7 @@ static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[
                 qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]);
             }
         }
-        
+
         for(i=intra; i<64; i++){
             int64_t max= 8191;
             if (dsp->fdct == fdct_ifast
@@ -180,7 +180,7 @@ static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[
                    ) {
                 max= (8191LL*aanscales[i]) >> 14;
             }
-            while(((max * qmat[qscale][i]) >> shift) > INT_MAX){ 
+            while(((max * qmat[qscale][i]) >> shift) > INT_MAX){
                 shift++;
             }
         }
@@ -193,7 +193,7 @@ static void convert_matrix(DSPContext *dsp, int (*qmat)[64], uint16_t (*qmat16)[
 static inline void update_qscale(MpegEncContext *s){
     s->qscale= (s->lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
     s->qscale= clip(s->qscale, s->avctx->qmin, s->avctx->qmax);
-    
+
     s->lambda2= (s->lambda*s->lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
 }
 #endif //CONFIG_ENCODERS
@@ -201,7 +201,7 @@ static inline void update_qscale(MpegEncContext *s){
 void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_scantable){
     int i;
     int end;
-    
+
     st->scantable= src_scantable;
 
     for(i=0; i<64; i++){
@@ -212,7 +212,7 @@ void ff_init_scantable(uint8_t *permutation, ScanTable *st, const uint8_t *src_s
         st->inverse[j] = i;
 #endif
     }
-    
+
     end=-1;
     for(i=0; i<64; i++){
         int j;
@@ -236,6 +236,32 @@ void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix){
 }
 #endif //CONFIG_ENCODERS
 
+const uint8_t *ff_find_start_code(const uint8_t * restrict p, const uint8_t *end, uint32_t * restrict state){
+    int i;
+
+    for(i=0; i<3; i++){
+        uint32_t tmp= *state << 8;
+        *state= tmp + *(p++);
+        if(tmp == 0x100 || p==end)
+            return p;
+    }
+
+    while(p<end){
+        if     (p[-1] > 1      ) p+= 3;
+        else if(p[-2]          ) p+= 2;
+        else if(p[-3]|(p[-1]-1)) p++;
+        else{
+            p++;
+            break;
+        }
+    }
+
+    p= FFMIN(p, end)-4;
+    *state=  be2me_32(unaligned32(p));
+
+    return p+4;
+}
+
 /* init common dct for both encoder and decoder */
 int DCT_common_init(MpegEncContext *s)
 {
@@ -250,7 +276,7 @@ int DCT_common_init(MpegEncContext *s)
     s->dct_quantize= dct_quantize_c;
     s->denoise_dct= denoise_dct_c;
 #endif //CONFIG_ENCODERS
-        
+
 #ifdef HAVE_MMX
     MPV_common_init_mmx(s);
 #endif
@@ -280,7 +306,7 @@ int DCT_common_init(MpegEncContext *s)
 #endif //CONFIG_ENCODERS
 
     /* load & permutate scantables
-       note: only wmv uses different ones 
+       note: only wmv uses different ones
     */
     if(s->alternate_scan){
         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , ff_alternate_vertical_scan);
@@ -324,7 +350,7 @@ static void copy_picture_attributes(MpegEncContext *s, AVFrame *dst, AVFrame *sr
             src->motion_subsample_log2, dst->motion_subsample_log2);
 
         memcpy(dst->mb_type, src->mb_type, s->mb_stride * s->mb_height * sizeof(dst->mb_type[0]));
-        
+
         for(i=0; i<2; i++){
             int stride= ((16*s->mb_width )>>src->motion_subsample_log2) + 1;
             int height= ((16*s->mb_height)>>src->motion_subsample_log2);
@@ -349,20 +375,20 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
     const int b8_array_size= s->b8_stride*s->mb_height*2;
     const int b4_array_size= s->b4_stride*s->mb_height*4;
     int i;
-    
+
     if(shared){
         assert(pic->data[0]);
         assert(pic->type == 0 || pic->type == FF_BUFFER_TYPE_SHARED);
         pic->type= FF_BUFFER_TYPE_SHARED;
     }else{
         int r;
-        
+
         assert(!pic->data[0]);
-        
+
         r= s->avctx->get_buffer(s->avctx, (AVFrame*)pic);
-        
+
         if(r<0 || !pic->age || !pic->type || !pic->data[0]){
-	    av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
+            av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed (%d %d %d %p)\n", r, pic->age, pic->type, pic->data[0]);
             return -1;
         }
 
@@ -379,9 +405,9 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
         s->linesize  = pic->linesize[0];
         s->uvlinesize= pic->linesize[1];
     }
-    
+
     if(pic->qscale_table==NULL){
-        if (s->encoding) {        
+        if (s->encoding) {
             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
@@ -418,7 +444,7 @@ static int alloc_picture(MpegEncContext *s, Picture *pic, int shared){
     s->prev_pict_types[0]= s->pict_type;
     if(pic->age < PREV_PICT_TYPES_BUFFER_SIZE && s->prev_pict_types[pic->age] == B_TYPE)
         pic->age= INT_MAX; // skipped MBs in b frames are quite rare in mpeg1/2 and its a bit tricky to skip them anyway
-    
+
     return 0;
 fail: //for the CHECKED_ALLOCZ macro
     return -1;
@@ -447,25 +473,25 @@ static void free_picture(MpegEncContext *s, Picture *pic){
         av_freep(&pic->motion_val_base[i]);
         av_freep(&pic->ref_index[i]);
     }
-    
+
     if(pic->type == FF_BUFFER_TYPE_SHARED){
         for(i=0; i<4; i++){
             pic->base[i]=
             pic->data[i]= NULL;
         }
-        pic->type= 0;        
+        pic->type= 0;
     }
 }
 
 static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
     int i;
 
-    // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264) 
+    // edge emu needs blocksize + filter length - 1 (=17x17 for halfpel / 21x21 for h264)
     CHECKED_ALLOCZ(s->allocated_edge_emu_buffer, (s->width+64)*2*17*2); //(width + edge + align)*interlaced*MBsize*tolerance
     s->edge_emu_buffer= s->allocated_edge_emu_buffer + (s->width+64)*2*17;
 
      //FIXME should be linesize instead of s->width*2 but that isnt known before get_buffer()
-    CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t)) 
+    CHECKED_ALLOCZ(s->me.scratchpad,  (s->width+64)*4*16*2*sizeof(uint8_t))
     s->rd_scratchpad=   s->me.scratchpad;
     s->b_scratchpad=    s->me.scratchpad;
     s->obmc_scratchpad= s->me.scratchpad + 16;
@@ -475,7 +501,7 @@ static int init_duplicate_context(MpegEncContext *s, MpegEncContext *base){
         if(s->avctx->noise_reduction){
             CHECKED_ALLOCZ(s->dct_error_sum, 2 * 64 * sizeof(int))
         }
-    }   
+    }
     CHECKED_ALLOCZ(s->blocks, 64*12*2 * sizeof(DCTELEM))
     s->block= s->blocks[0];
 
@@ -492,10 +518,10 @@ static void free_duplicate_context(MpegEncContext *s){
 
     av_freep(&s->allocated_edge_emu_buffer); s->edge_emu_buffer= NULL;
     av_freep(&s->me.scratchpad);
-    s->rd_scratchpad=   
-    s->b_scratchpad=    
+    s->rd_scratchpad=
+    s->b_scratchpad=
     s->obmc_scratchpad= NULL;
-    
+
     av_freep(&s->dct_error_sum);
     av_freep(&s->me.map);
     av_freep(&s->me.score_map);
@@ -594,9 +620,9 @@ void MPV_decode_defaults(MpegEncContext *s){
 #ifdef CONFIG_ENCODERS
 static void MPV_encode_defaults(MpegEncContext *s){
     static int done=0;
-    
+
     MPV_common_defaults(s);
-    
+
     if(!done){
         int i;
         done=1;
@@ -613,7 +639,7 @@ static void MPV_encode_defaults(MpegEncContext *s){
 }
 #endif //CONFIG_ENCODERS
 
-/** 
+/**
  * init common structure for both encoder and decoder.
  * this assumes that some variables like width/height are already set
  */
@@ -621,7 +647,9 @@ int MPV_common_init(MpegEncContext *s)
 {
     int y_size, c_size, yc_size, i, mb_array_size, mv_table_size, x, y;
 
-    if(s->avctx->thread_count > MAX_THREADS || (16*s->avctx->thread_count > s->height && s->height)){
+    s->mb_height = (s->height + 15) / 16;
+
+    if(s->avctx->thread_count > MAX_THREADS || (s->avctx->thread_count > s->mb_height && s->mb_height)){
         av_log(s->avctx, AV_LOG_ERROR, "too many threads\n");
         return -1;
     }
@@ -636,7 +664,6 @@ int MPV_common_init(MpegEncContext *s)
     s->flags2= s->avctx->flags2;
 
     s->mb_width  = (s->width  + 15) / 16;
-    s->mb_height = (s->height + 15) / 16;
     s->mb_stride = s->mb_width + 1;
     s->b8_stride = s->mb_width*2 + 1;
     s->b4_stride = s->mb_width*4 + 1;
@@ -652,27 +679,27 @@ int MPV_common_init(MpegEncContext *s)
     s->v_edge_pos= s->mb_height*16;
 
     s->mb_num = s->mb_width * s->mb_height;
-    
+
     s->block_wrap[0]=
     s->block_wrap[1]=
     s->block_wrap[2]=
     s->block_wrap[3]= s->b8_stride;
     s->block_wrap[4]=
     s->block_wrap[5]= s->mb_stride;
- 
+
     y_size = s->b8_stride * (2 * s->mb_height + 1);
     c_size = s->mb_stride * (s->mb_height + 1);
     yc_size = y_size + 2 * c_size;
-    
+
     /* convert fourcc to upper case */
-    s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)          
+    s->avctx->codec_tag=   toupper( s->avctx->codec_tag     &0xFF)
                         + (toupper((s->avctx->codec_tag>>8 )&0xFF)<<8 )
-                        + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16) 
+                        + (toupper((s->avctx->codec_tag>>16)&0xFF)<<16)
                         + (toupper((s->avctx->codec_tag>>24)&0xFF)<<24);
 
-    s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)          
+    s->avctx->stream_codec_tag=   toupper( s->avctx->stream_codec_tag     &0xFF)
                                + (toupper((s->avctx->stream_codec_tag>>8 )&0xFF)<<8 )
-                               + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16) 
+                               + (toupper((s->avctx->stream_codec_tag>>16)&0xFF)<<16)
                                + (toupper((s->avctx->stream_codec_tag>>24)&0xFF)<<24);
 
     s->avctx->coded_frame= (AVFrame*)&s->current_picture;
@@ -684,7 +711,7 @@ int MPV_common_init(MpegEncContext *s)
         }
     }
     s->mb_index2xy[ s->mb_height*s->mb_width ] = (s->mb_height-1)*s->mb_stride + s->mb_width; //FIXME really needed?
-    
+
     if (s->encoding) {
         /* Allocate MV tables */
         CHECKED_ALLOCZ(s->p_mv_table_base            , mv_table_size * 2 * sizeof(int16_t))
@@ -707,16 +734,16 @@ int MPV_common_init(MpegEncContext *s)
 
         /* Allocate MB type table */
         CHECKED_ALLOCZ(s->mb_type  , mb_array_size * sizeof(uint16_t)) //needed for encoding
-        
+
         CHECKED_ALLOCZ(s->lambda_table, mb_array_size * sizeof(int))
-        
+
         CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int))
         CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int))
         CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t))
         CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t))
         CHECKED_ALLOCZ(s->input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
         CHECKED_ALLOCZ(s->reordered_input_picture, MAX_PICTURE_COUNT * sizeof(Picture*))
-        
+
         if(s->avctx->noise_reduction){
             CHECKED_ALLOCZ(s->dct_offset, 2 * 64 * sizeof(uint16_t))
         }
@@ -724,7 +751,7 @@ int MPV_common_init(MpegEncContext *s)
     CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture))
 
     CHECKED_ALLOCZ(s->error_status_table, mb_array_size*sizeof(uint8_t))
-    
+
     if(s->codec_id==CODEC_ID_MPEG4 || (s->flags & CODEC_FLAG_INTERLACED_ME)){
         /* interlaced direct mode decoding tables */
             for(i=0; i<2; i++){
@@ -747,16 +774,16 @@ int MPV_common_init(MpegEncContext *s)
         s->ac_val[0] = s->ac_val_base + s->b8_stride + 1;
         s->ac_val[1] = s->ac_val_base + y_size + s->mb_stride + 1;
         s->ac_val[2] = s->ac_val[1] + c_size;
-        
+
         /* cbp values */
         CHECKED_ALLOCZ(s->coded_block_base, y_size);
         s->coded_block= s->coded_block_base + s->b8_stride + 1;
-        
+
         /* cbp, ac_pred, pred_dir */
         CHECKED_ALLOCZ(s->cbp_table  , mb_array_size * sizeof(uint8_t))
         CHECKED_ALLOCZ(s->pred_dir_table, mb_array_size * sizeof(uint8_t))
     }
-    
+
     if (s->h263_pred || s->h263_plus || !s->encoding) {
         /* dc values */
         //MN: we need these for error resilience of intra-frames
@@ -771,12 +798,12 @@ int MPV_common_init(MpegEncContext *s)
     /* which mb is a intra block */
     CHECKED_ALLOCZ(s->mbintra_table, mb_array_size);
     memset(s->mbintra_table, 1, mb_array_size);
-    
+
     /* init macroblock skip table */
     CHECKED_ALLOCZ(s->mbskip_table, mb_array_size+2);
     //Note the +1 is for a quicker mpeg4 slice_end detection
     CHECKED_ALLOCZ(s->prev_pict_types, PREV_PICT_TYPES_BUFFER_SIZE);
-    
+
     s->parse_context.state= -1;
     if((s->avctx->debug&(FF_DEBUG_VIS_QP|FF_DEBUG_VIS_MB_TYPE)) || (s->avctx->debug_mv)){
        s->visualization_buffer[0] = av_malloc((s->mb_width*16 + 2*EDGE_WIDTH) * s->mb_height*16 + 2*EDGE_WIDTH);
@@ -845,14 +872,14 @@ void MPV_common_end(MpegEncContext *s)
         }
         av_freep(&s->p_field_select_table[i]);
     }
-    
+
     av_freep(&s->dc_val_base);
     av_freep(&s->ac_val_base);
     av_freep(&s->coded_block_base);
     av_freep(&s->mbintra_table);
     av_freep(&s->cbp_table);
     av_freep(&s->pred_dir_table);
-    
+
     av_freep(&s->mbskip_table);
     av_freep(&s->prev_pict_types);
     av_freep(&s->bitstream_buffer);
@@ -897,7 +924,7 @@ int MPV_encode_init(AVCodecContext *avctx)
     MpegEncContext *s = avctx->priv_data;
     int i;
     int chroma_h_shift, chroma_v_shift;
-    
+
     MPV_encode_defaults(s);
 
     if(avctx->pix_fmt != PIX_FMT_YUVJ420P && avctx->pix_fmt != PIX_FMT_YUV420P){
@@ -921,7 +948,7 @@ int MPV_encode_init(AVCodecContext *avctx)
     s->width = avctx->width;
     s->height = avctx->height;
     if(avctx->gop_size > 600){
-	av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
+        av_log(avctx, AV_LOG_ERROR, "Warning keyframe interval too large! reducing it ...\n");
         avctx->gop_size=600;
     }
     s->gop_size = avctx->gop_size;
@@ -951,16 +978,16 @@ int MPV_encode_init(AVCodecContext *avctx)
 
     /* Fixed QSCALE */
     s->fixed_qscale = !!(avctx->flags & CODEC_FLAG_QSCALE);
-    
+
     s->adaptive_quant= (   s->avctx->lumi_masking
                         || s->avctx->dark_masking
-                        || s->avctx->temporal_cplx_masking 
+                        || s->avctx->temporal_cplx_masking
                         || s->avctx->spatial_cplx_masking
                         || s->avctx->p_masking
                         || s->avctx->border_masking
                         || (s->flags&CODEC_FLAG_QP_RD))
                        && !s->fixed_qscale;
-    
+
     s->obmc= !!(s->flags & CODEC_FLAG_OBMC);
     s->loop_filter= !!(s->flags & CODEC_FLAG_LOOP_FILTER);
     s->alternate_scan= !!(s->flags & CODEC_FLAG_ALT_SCAN);
@@ -968,45 +995,45 @@ int MPV_encode_init(AVCodecContext *avctx)
     if(avctx->rc_max_rate && !avctx->rc_buffer_size){
         av_log(avctx, AV_LOG_ERROR, "a vbv buffer size is needed, for encoding with a maximum bitrate\n");
         return -1;
-    }    
+    }
 
     if(avctx->rc_min_rate && avctx->rc_max_rate != avctx->rc_min_rate){
         av_log(avctx, AV_LOG_INFO, "Warning min_rate > 0 but min_rate != max_rate isn't recommended!\n");
     }
-    
+
     if(avctx->rc_min_rate && avctx->rc_min_rate > avctx->bit_rate){
         av_log(avctx, AV_LOG_INFO, "bitrate below min bitrate\n");
         return -1;
     }
-    
+
     if(avctx->rc_max_rate && avctx->rc_max_rate < avctx->bit_rate){
         av_log(avctx, AV_LOG_INFO, "bitrate above max bitrate\n");
         return -1;
     }
-        
-    if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate 
+
+    if(   s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate
        && (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO)
        && 90000LL * (avctx->rc_buffer_size-1) > s->avctx->rc_max_rate*0xFFFFLL){
-        
+
         av_log(avctx, AV_LOG_INFO, "Warning vbv_delay will be set to 0xFFFF (=VBR) as the specified vbv buffer is too large for the given bitrate!\n");
     }
-       
-    if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4 
+
+    if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
        && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P && s->codec_id != CODEC_ID_FLV1){
         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
         return -1;
     }
-        
+
     if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decision\n");
         return -1;
     }
-    
+
     if(s->obmc && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
         av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with H263(+)\n");
         return -1;
     }
-    
+
     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
         return -1;
@@ -1016,23 +1043,23 @@ int MPV_encode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "data partitioning not supported by codec\n");
         return -1;
     }
-    
+
     if(s->max_b_frames && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO){
         av_log(avctx, AV_LOG_ERROR, "b frames not supported by codec\n");
         return -1;
     }
 
-    if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN)) 
+    if((s->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME|CODEC_FLAG_ALT_SCAN))
        && s->codec_id != CODEC_ID_MPEG4 && s->codec_id != CODEC_ID_MPEG2VIDEO){
         av_log(avctx, AV_LOG_ERROR, "interlacing not supported by codec\n");
         return -1;
     }
-        
+
     if(s->mpeg_quant && s->codec_id != CODEC_ID_MPEG4){ //FIXME mpeg2 uses that too
         av_log(avctx, AV_LOG_ERROR, "mpeg2 style quantization not supported by codec\n");
         return -1;
     }
-        
+
     if((s->flags & CODEC_FLAG_CBP_RD) && !(s->flags & CODEC_FLAG_TRELLIS_QUANT)){
         av_log(avctx, AV_LOG_ERROR, "CBP RD needs trellis quant\n");
         return -1;
@@ -1042,19 +1069,19 @@ int MPV_encode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "QP RD needs mbd=2\n");
         return -1;
     }
-    
+
     if(s->avctx->scenechange_threshold < 1000000000 && (s->flags & CODEC_FLAG_CLOSED_GOP)){
         av_log(avctx, AV_LOG_ERROR, "closed gop with scene change detection arent supported yet\n");
         return -1;
     }
-    
-    if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4 
-       && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO 
+
+    if(s->avctx->thread_count > 1 && s->codec_id != CODEC_ID_MPEG4
+       && s->codec_id != CODEC_ID_MPEG1VIDEO && s->codec_id != CODEC_ID_MPEG2VIDEO
        && (s->codec_id != CODEC_ID_H263P || !(s->flags & CODEC_FLAG_H263P_SLICE_STRUCT))){
         av_log(avctx, AV_LOG_ERROR, "multi threaded encoding not supported by codec\n");
         return -1;
     }
-    
+
     if(s->avctx->thread_count > 1)
         s->rtp_mode= 1;
 
@@ -1062,7 +1089,7 @@ int MPV_encode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "framerate not set\n");
         return -1;
     }
-    
+
     i= (INT_MAX/2+128)>>8;
     if(avctx->me_threshold >= i){
         av_log(avctx, AV_LOG_ERROR, "me_threshold too large, max is %d\n", i - 1);
@@ -1072,9 +1099,9 @@ int MPV_encode_init(AVCodecContext *avctx)
         av_log(avctx, AV_LOG_ERROR, "mb_threshold too large, max is %d\n", i - 1);
         return -1;
     }
-        
+
     if(avctx->b_frame_strategy && (avctx->flags&CODEC_FLAG_PASS2)){
-        av_log(avctx, AV_LOG_ERROR, "b_frame_strategy must be 0 on the second pass");
+        av_log(avctx, AV_LOG_ERROR, "b_frame_strategy must be 0 on the second pass\n");
         return -1;
     }
 
@@ -1085,7 +1112,7 @@ int MPV_encode_init(AVCodecContext *avctx)
         avctx->time_base.num /= i;
 //        return -1;
     }
-    
+
     if(s->codec_id==CODEC_ID_MJPEG){
         s->intra_quant_bias= 1<<(QUANT_BIAS_SHIFT-1); //(a + x/2)/x
         s->inter_quant_bias= 0;
@@ -1096,17 +1123,17 @@ int MPV_encode_init(AVCodecContext *avctx)
         s->intra_quant_bias=0;
         s->inter_quant_bias=-(1<<(QUANT_BIAS_SHIFT-2)); //(a - x/4)/x
     }
-    
+
     if(avctx->intra_quant_bias != FF_DEFAULT_QUANT_BIAS)
         s->intra_quant_bias= avctx->intra_quant_bias;
     if(avctx->inter_quant_bias != FF_DEFAULT_QUANT_BIAS)
         s->inter_quant_bias= avctx->inter_quant_bias;
-        
+
     avcodec_get_chroma_sub_sample(avctx->pix_fmt, &chroma_h_shift, &chroma_v_shift);
 
     if(avctx->codec_id == CODEC_ID_MPEG4 && s->avctx->time_base.den > (1<<16)-1){
         av_log(avctx, AV_LOG_ERROR, "timebase not supported by mpeg 4 standard\n");
-        return -1;        
+        return -1;
     }
     s->time_increment_bits = av_log2(s->avctx->time_base.den - 1) + 1;
 
@@ -1125,17 +1152,18 @@ int MPV_encode_init(AVCodecContext *avctx)
         s->rtp_mode= 1;
         break;
     case CODEC_ID_LJPEG:
+    case CODEC_ID_JPEGLS:
     case CODEC_ID_MJPEG:
         s->out_format = FMT_MJPEG;
         s->intra_only = 1; /* force intra only for jpeg */
-        s->mjpeg_write_tables = 1; /* write all tables */
-	s->mjpeg_data_only_frames = 0; /* write all the needed headers */
+        s->mjpeg_write_tables = avctx->codec->id != CODEC_ID_JPEGLS;
+        s->mjpeg_data_only_frames = 0; /* write all the needed headers */
         s->mjpeg_vsample[0] = 1<<chroma_v_shift;
         s->mjpeg_vsample[1] = 1;
-        s->mjpeg_vsample[2] = 1; 
+        s->mjpeg_vsample[2] = 1;
         s->mjpeg_hsample[0] = 1<<chroma_h_shift;
-        s->mjpeg_hsample[1] = 1; 
-        s->mjpeg_hsample[2] = 1; 
+        s->mjpeg_hsample[1] = 1;
+        s->mjpeg_hsample[2] = 1;
         if (mjpeg_init(s) < 0)
             return -1;
         avctx->delay=0;
@@ -1148,28 +1176,28 @@ int MPV_encode_init(AVCodecContext *avctx)
         break;
     case CODEC_ID_H263:
         if (h263_get_picture_format(s->width, s->height) == 7) {
-            av_log(avctx, AV_LOG_INFO, "Input picture size isn't suitable for h263 codec! try h263+\n");
+            av_log(avctx, AV_LOG_INFO, "The specified picture size of %dx%d is not valid for the H.263 codec.\nValid sizes are 128x96, 176x144, 352x288, 704x576, and 1408x1152. Try H.263+.\n", s->width, s->height);
             return -1;
         }
         s->out_format = FMT_H263;
-	s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
+        s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
         avctx->delay=0;
         s->low_delay=1;
         break;
     case CODEC_ID_H263P:
         s->out_format = FMT_H263;
         s->h263_plus = 1;
-	/* Fx */
+        /* Fx */
         s->umvplus = (avctx->flags & CODEC_FLAG_H263P_UMV) ? 1:0;
-	s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
-	s->modified_quant= s->h263_aic;
-	s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
-	s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
-	s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
-	s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
+        s->h263_aic= (avctx->flags & CODEC_FLAG_H263P_AIC) ? 1:0;
+        s->modified_quant= s->h263_aic;
+        s->alt_inter_vlc= (avctx->flags & CODEC_FLAG_H263P_AIV) ? 1:0;
+        s->obmc= (avctx->flags & CODEC_FLAG_OBMC) ? 1:0;
+        s->loop_filter= (avctx->flags & CODEC_FLAG_LOOP_FILTER) ? 1:0;
+        s->unrestricted_mv= s->obmc || s->loop_filter || s->umvplus;
         s->h263_slice_structured= (s->flags & CODEC_FLAG_H263P_SLICE_STRUCT) ? 1:0;
 
-	/* /Fx */
+        /* /Fx */
         /* These are just to be sure */
         avctx->delay=0;
         s->low_delay=1;
@@ -1256,7 +1284,7 @@ int MPV_encode_init(AVCodecContext *avctx)
     default:
         return -1;
     }
-    
+
     avctx->has_b_frames= !s->low_delay;
 
     s->encoding = 1;
@@ -1267,13 +1295,13 @@ int MPV_encode_init(AVCodecContext *avctx)
 
     if(s->modified_quant)
         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
-    s->progressive_frame= 
+    s->progressive_frame=
     s->progressive_sequence= !(avctx->flags & (CODEC_FLAG_INTERLACED_DCT|CODEC_FLAG_INTERLACED_ME));
     s->quant_precision=5;
-    
+
     ff_set_cmp(&s->dsp, s->dsp.ildct_cmp, s->avctx->ildct_cmp);
     ff_set_cmp(&s->dsp, s->dsp.frame_skip_cmp, s->avctx->frame_skip_cmp);
-    
+
 /* xine: do not need this for decode or MPEG-1 encoding modes */
 #if 0
 #ifdef CONFIG_H261_ENCODER
@@ -1312,15 +1340,15 @@ int MPV_encode_init(AVCodecContext *avctx)
     /* precompute matrix */
     /* for mjpeg, we do include qscale in the matrix */
     if (s->out_format != FMT_MJPEG) {
-        convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, 
+        convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
                        s->intra_matrix, s->intra_quant_bias, avctx->qmin, 31, 1);
-        convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16, 
+        convert_matrix(&s->dsp, s->q_inter_matrix, s->q_inter_matrix16,
                        s->inter_matrix, s->inter_quant_bias, avctx->qmin, 31, 0);
     }
 
     if(ff_rate_control_init(s) < 0)
         return -1;
-    
+
     return 0;
 }
 
@@ -1342,7 +1370,7 @@ int MPV_encode_end(AVCodecContext *avctx)
 #endif /* #if 0 */
 
     av_freep(&avctx->extradata);
-      
+
     return 0;
 }
 
@@ -1430,7 +1458,7 @@ static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w)
 
 int ff_find_unused_picture(MpegEncContext *s, int shared){
     int i;
-    
+
     if(shared){
         for(i=0; i<MAX_PICTURE_COUNT; i++){
             if(s->picture[i].data[0]==NULL && s->picture[i].type==0) return i;
@@ -1458,7 +1486,7 @@ static void update_noise_reduction(MpegEncContext *s){
             }
             s->dct_count[intra] >>= 1;
         }
-        
+
         for(i=0; i<64; i++){
             s->dct_offset[intra][i]= (s->avctx->noise_reduction * s->dct_count[intra] + s->dct_error_sum[intra][i]/2) / (s->dct_error_sum[intra][i]+1);
         }
@@ -1486,7 +1514,7 @@ int MPV_frame_start(MpegEncContext *s, AVCodecContext *avctx)
             for(i=0; i<MAX_PICTURE_COUNT; i++){
                 if(s->picture[i].data[0] && &s->picture[i] != s->next_picture_ptr && s->picture[i].reference){
                     av_log(avctx, AV_LOG_ERROR, "releasing zombie picture\n");
-                    avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);                
+                    avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
                 }
             }
         }
@@ -1511,7 +1539,7 @@ alloc:
                         && !s->dropable ? 3 : 0;
 
         pic->coded_picture_number= s->coded_picture_number++;
-        
+
         if( alloc_picture(s, (Picture*)pic, 0) < 0)
             return -1;
 
@@ -1521,12 +1549,12 @@ alloc:
     }
 
     s->current_picture_ptr->pict_type= s->pict_type;
-//    if(s->flags && CODEC_FLAG_QSCALE) 
+//    if(s->flags && CODEC_FLAG_QSCALE)
   //      s->current_picture_ptr->quality= s->new_picture_ptr->quality;
     s->current_picture_ptr->key_frame= s->pict_type == I_TYPE;
 
     copy_picture(&s->current_picture, s->current_picture_ptr);
-  
+
   if(s->out_format != FMT_H264 || s->codec_id == CODEC_ID_SVQ3){
     if (s->pict_type != B_TYPE) {
         s->last_picture_ptr= s->next_picture_ptr;
@@ -1534,14 +1562,14 @@ alloc:
             s->next_picture_ptr= s->current_picture_ptr;
     }
 /*    av_log(s->avctx, AV_LOG_DEBUG, "L%p N%p C%p L%p N%p C%p type:%d drop:%d\n", s->last_picture_ptr, s->next_picture_ptr,s->current_picture_ptr,
-        s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL, 
-        s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL, 
+        s->last_picture_ptr    ? s->last_picture_ptr->data[0] : NULL,
+        s->next_picture_ptr    ? s->next_picture_ptr->data[0] : NULL,
         s->current_picture_ptr ? s->current_picture_ptr->data[0] : NULL,
         s->pict_type, s->dropable);*/
-    
+
     if(s->last_picture_ptr) copy_picture(&s->last_picture, s->last_picture_ptr);
     if(s->next_picture_ptr) copy_picture(&s->next_picture, s->next_picture_ptr);
-    
+
     if(s->pict_type != I_TYPE && (s->last_picture_ptr==NULL || s->last_picture_ptr->data[0]==NULL)){
         av_log(avctx, AV_LOG_ERROR, "warning: first frame is no keyframe\n");
         assert(s->pict_type != B_TYPE); //these should have been dropped if we don't have a reference
@@ -1555,14 +1583,14 @@ alloc:
         for(i=0; i<4; i++){
             if(s->picture_structure == PICT_BOTTOM_FIELD){
                  s->current_picture.data[i] += s->current_picture.linesize[i];
-            } 
+            }
             s->current_picture.linesize[i] *= 2;
             s->last_picture.linesize[i] *=2;
             s->next_picture.linesize[i] *=2;
         }
     }
   }
-   
+
     s->hurry_up= s->avctx->hurry_up;
     s->error_resilience= avctx->error_resilience;
 
@@ -1584,7 +1612,7 @@ alloc:
 
         update_noise_reduction(s);
     }
-        
+
 #ifdef HAVE_XVMC
     if(s->avctx->xvmc_acceleration)
         return XVMC_field_start(s, avctx);
@@ -1609,8 +1637,9 @@ void MPV_frame_end(MpegEncContext *s)
             draw_edges(s->current_picture.data[2], s->uvlinesize, s->h_edge_pos>>1, s->v_edge_pos>>1, EDGE_WIDTH/2);
     }
     emms_c();
-    
+
     s->last_pict_type    = s->pict_type;
+    s->last_lambda_for[s->pict_type]= s->current_picture_ptr->quality;
     if(s->pict_type!=B_TYPE){
         s->last_non_b_pict_type= s->pict_type;
     }
@@ -1620,10 +1649,10 @@ void MPV_frame_end(MpegEncContext *s)
         if(s->picture[i].data[0] == s->current_picture.data[0]){
             s->picture[i]= s->current_picture;
             break;
-        }    
+        }
     }
     assert(i<MAX_PICTURE_COUNT);
-#endif    
+#endif
 
     if(s->encoding){
         /* release non-reference frames */
@@ -1651,14 +1680,14 @@ void MPV_frame_end(MpegEncContext *s)
  */
 static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
     int t, x, y, fr, f;
-    
+
     sx= clip(sx, 0, w-1);
     sy= clip(sy, 0, h-1);
     ex= clip(ex, 0, w-1);
     ey= clip(ey, 0, h-1);
-    
+
     buf[sy*stride + sx]+= color;
-    
+
     if(ABS(ex - sx) > ABS(ey - sy)){
         if(sx > ex){
             t=sx; sx=ex; ex=t;
@@ -1698,26 +1727,26 @@ static void draw_line(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h
  * @param stride stride/linesize of the image
  * @param color color of the arrow
  */
-static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){ 
+static void draw_arrow(uint8_t *buf, int sx, int sy, int ex, int ey, int w, int h, int stride, int color){
     int dx,dy;
 
     sx= clip(sx, -100, w+100);
     sy= clip(sy, -100, h+100);
     ex= clip(ex, -100, w+100);
     ey= clip(ey, -100, h+100);
-    
+
     dx= ex - sx;
     dy= ey - sy;
-    
+
     if(dx*dx + dy*dy > 3*3){
         int rx=  dx + dy;
         int ry= -dx + dy;
         int length= ff_sqrt((rx*rx + ry*ry)<<8);
-        
+
         //FIXME subpixel accuracy
         rx= ROUNDED_DIV(rx*3<<4, length);
         ry= ROUNDED_DIV(ry*3<<4, length);
-        
+
         draw_line(buf, sx, sy, sx + rx, sy + ry, w, h, stride, color);
         draw_line(buf, sx, sy, sx - ry, sy + rx, w, h, stride, color);
     }
@@ -1733,7 +1762,7 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
 
     if(s->avctx->debug&(FF_DEBUG_SKIP | FF_DEBUG_QP | FF_DEBUG_MB_TYPE)){
         int x,y;
-        
+
         av_log(s->avctx,AV_LOG_DEBUG,"New frame, type: ");
         switch (pict->pict_type) {
             case FF_I_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"I\n"); break;
@@ -1741,7 +1770,7 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
             case FF_B_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"B\n"); break;
             case FF_S_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"S\n"); break;
             case FF_SI_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SI\n"); break;
-            case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;            
+            case FF_SP_TYPE: av_log(s->avctx,AV_LOG_DEBUG,"SP\n"); break;
         }
         for(y=0; y<s->mb_height; y++){
             for(x=0; x<s->mb_width; x++){
@@ -1782,7 +1811,7 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
                         assert(USES_LIST(mb_type, 0) && USES_LIST(mb_type, 1));
                         av_log(s->avctx, AV_LOG_DEBUG, "X");
                     }
-                    
+
                     //segmentation
                     if(IS_8X8(mb_type))
                         av_log(s->avctx, AV_LOG_DEBUG, "+");
@@ -1794,8 +1823,8 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
                         av_log(s->avctx, AV_LOG_DEBUG, " ");
                     else
                         av_log(s->avctx, AV_LOG_DEBUG, "?");
-                    
-                        
+
+
                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264)
                         av_log(s->avctx, AV_LOG_DEBUG, "=");
                     else
@@ -1870,10 +1899,10 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
                         int xy= (mb_x*2 + (mb_y*2 + i)*mv_stride) << (mv_sample_log2-1);
                         int mx=(pict->motion_val[direction][xy][0]>>shift);
                         int my=(pict->motion_val[direction][xy][1]>>shift);
-                        
+
                         if(IS_INTERLACED(pict->mb_type[mb_index]))
                             my*=2;
-                        
+
                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
                       }
                     }else if(IS_8X16(pict->mb_type[mb_index])){
@@ -1884,10 +1913,10 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
                         int xy= (mb_x*2 + i + mb_y*2*mv_stride) << (mv_sample_log2-1);
                         int mx=(pict->motion_val[direction][xy][0]>>shift);
                         int my=(pict->motion_val[direction][xy][1]>>shift);
-                        
+
                         if(IS_INTERLACED(pict->mb_type[mb_index]))
                             my*=2;
-                        
+
                         draw_arrow(ptr, sx, sy, mx+sx, my+sy, width, height, s->linesize, 100);
                       }
                     }else{
@@ -1898,7 +1927,7 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
                       int my= (pict->motion_val[direction][xy][1]>>shift) + sy;
                       draw_arrow(ptr, sx, sy, mx, my, width, height, s->linesize, 100);
                     }
-                  }                  
+                  }
                 }
                 if((s->avctx->debug&FF_DEBUG_VIS_QP) && pict->motion_val){
                     uint64_t c= (pict->qscale_table[mb_index]*128/31) * 0x0101010101010101ULL;
@@ -1916,7 +1945,7 @@ void ff_print_debug_info(MpegEncContext *s, AVFrame *pict){
 u= (int)(128 + r*cos(theta*3.141592/180));\
 v= (int)(128 + r*sin(theta*3.141592/180));
 
-                    
+
                     u=v=128;
                     if(IS_PCM(mb_type)){
                         COLOR(120,48)
@@ -1974,7 +2003,7 @@ v= (int)(128 + r*sin(theta*3.141592/180));
                                 *(uint64_t*)(pict->data[0] + sx + (sy + 4)*pict->linesize[0])^= 0x8080808080808080ULL;
                         }
                     }
-                        
+
                     if(IS_INTERLACED(mb_type) && s->codec_id == CODEC_ID_H264){
                         // hmm
                     }
@@ -1990,30 +2019,30 @@ v= (int)(128 + r*sin(theta*3.141592/180));
 static int get_sae(uint8_t *src, int ref, int stride){
     int x,y;
     int acc=0;
-    
+
     for(y=0; y<16; y++){
         for(x=0; x<16; x++){
             acc+= ABS(src[x+y*stride] - ref);
         }
     }
-    
+
     return acc;
 }
 
 static int get_intra_count(MpegEncContext *s, uint8_t *src, uint8_t *ref, int stride){
     int x, y, w, h;
     int acc=0;
-    
+
     w= s->width &~15;
     h= s->height&~15;
-    
+
     for(y=0; y<h; y+=16){
         for(x=0; x<w; x+=16){
             int offset= x + y*stride;
             int sad = s->dsp.sad[0](NULL, src + offset, ref + offset, stride, 16);
             int mean= (s->dsp.pix_sum(src + offset, stride) + 128)>>8;
             int sae = get_sae(src + offset, mean, stride);
-            
+
             acc+= sae + 500 < sad;
         }
     }
@@ -2027,27 +2056,27 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
     int i;
     const int encoding_delay= s->max_b_frames;
     int direct=1;
-    
+
     if(pic_arg){
         pts= pic_arg->pts;
         pic_arg->display_picture_number= s->input_picture_number++;
 
-        if(pts != AV_NOPTS_VALUE){ 
+        if(pts != AV_NOPTS_VALUE){
             if(s->user_specified_pts != AV_NOPTS_VALUE){
                 int64_t time= pts;
                 int64_t last= s->user_specified_pts;
-            
-                if(time <= last){            
-                    av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%Ld, last=%Ld\n", pts, s->user_specified_pts);
+
+                if(time <= last){
+                    av_log(s->avctx, AV_LOG_ERROR, "Error, Invalid timestamp=%"PRId64", last=%"PRId64"\n", pts, s->user_specified_pts);
                     return -1;
                 }
             }
             s->user_specified_pts= pts;
         }else{
             if(s->user_specified_pts != AV_NOPTS_VALUE){
-                s->user_specified_pts= 
+                s->user_specified_pts=
                 pts= s->user_specified_pts + 1;
-                av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%Ld)\n", pts);
+                av_log(s->avctx, AV_LOG_INFO, "Warning: AVFrame.pts=? trying to guess (%"PRId64")\n", pts);
             }else{
                 pts= pic_arg->display_picture_number;
             }
@@ -2059,22 +2088,21 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
     if(pic_arg->linesize[0] != s->linesize) direct=0;
     if(pic_arg->linesize[1] != s->uvlinesize) direct=0;
     if(pic_arg->linesize[2] != s->uvlinesize) direct=0;
-  
+
 //    av_log(AV_LOG_DEBUG, "%d %d %d %d\n",pic_arg->linesize[0], pic_arg->linesize[1], s->linesize, s->uvlinesize);
-    
+
     if(direct){
         i= ff_find_unused_picture(s, 1);
 
         pic= (AVFrame*)&s->picture[i];
         pic->reference= 3;
-    
+
         for(i=0; i<4; i++){
             pic->data[i]= pic_arg->data[i];
             pic->linesize[i]= pic_arg->linesize[i];
         }
         alloc_picture(s, (Picture*)pic, 1);
     }else{
-        int offset= 16;
         i= ff_find_unused_picture(s, 0);
 
         pic= (AVFrame*)&s->picture[i];
@@ -2082,14 +2110,14 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
 
         alloc_picture(s, (Picture*)pic, 0);
 
-        if(   pic->data[0] + offset == pic_arg->data[0] 
-           && pic->data[1] + offset == pic_arg->data[1]
-           && pic->data[2] + offset == pic_arg->data[2]){
+        if(   pic->data[0] + INPLACE_OFFSET == pic_arg->data[0]
+           && pic->data[1] + INPLACE_OFFSET == pic_arg->data[1]
+           && pic->data[2] + INPLACE_OFFSET == pic_arg->data[2]){
        // empty
         }else{
             int h_chroma_shift, v_chroma_shift;
             avcodec_get_chroma_sub_sample(s->avctx->pix_fmt, &h_chroma_shift, &v_chroma_shift);
-        
+
             for(i=0; i<3; i++){
                 int src_stride= pic_arg->linesize[i];
                 int dst_stride= i ? s->uvlinesize : s->linesize;
@@ -2098,8 +2126,8 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
                 int w= s->width >>h_shift;
                 int h= s->height>>v_shift;
                 uint8_t *src= pic_arg->data[i];
-                uint8_t *dst= pic->data[i] + offset;
-            
+                uint8_t *dst= pic->data[i] + INPLACE_OFFSET;
+
                 if(src_stride==dst_stride)
                     memcpy(dst, src, src_stride*h);
                 else{
@@ -2115,11 +2143,11 @@ static int load_input_picture(MpegEncContext *s, AVFrame *pic_arg){
     copy_picture_attributes(s, pic, pic_arg);
     pic->pts= pts; //we set this here to avoid modifiying pic_arg
   }
-  
+
     /* shift buffer entries */
     for(i=1; i<MAX_PICTURE_COUNT /*s->encoding_delay+1*/; i++)
         s->input_picture[i-1]= s->input_picture[i];
-        
+
     s->input_picture[encoding_delay]= (Picture*)pic;
 
     return 0;
@@ -2135,8 +2163,9 @@ static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
         const int bw= plane ? 1 : 2;
         for(y=0; y<s->mb_height*bw; y++){
             for(x=0; x<s->mb_width*bw; x++){
-                int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride), ref->data[plane] + 8*(x + y*stride), stride, 8);
-                
+                int off= p->type == FF_BUFFER_TYPE_SHARED ? 0: 16;
+                int v= s->dsp.frame_skip_cmp[1](s, p->data[plane] + 8*(x + y*stride)+off, ref->data[plane] + 8*(x + y*stride), stride, 8);
+
                 switch(s->avctx->frame_skip_exp){
                     case 0: score= FFMAX(score, v); break;
                     case 1: score+= ABS(v);break;
@@ -2147,7 +2176,7 @@ static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
             }
         }
     }
-    
+
     if(score) score64= score;
 
     if(score64 < s->avctx->frame_skip_threshold)
@@ -2157,6 +2186,115 @@ static int skip_check(MpegEncContext *s, Picture *p, Picture *ref){
     return 0;
 }
 
+static int estimate_best_b_count(MpegEncContext *s){
+    AVCodec *codec= avcodec_find_encoder(s->avctx->codec_id);
+    AVCodecContext *c= avcodec_alloc_context();
+    AVFrame input[FF_MAX_B_FRAMES+2];
+    const int scale= s->avctx->brd_scale;
+    int i, j, out_size, p_lambda, b_lambda, lambda2;
+    int outbuf_size= s->width * s->height; //FIXME
+    uint8_t *outbuf= av_malloc(outbuf_size);
+    ImgReSampleContext *resample;
+    int64_t best_rd= INT64_MAX;
+    int best_b_count= -1;
+
+//    emms_c();
+    p_lambda= s->last_lambda_for[P_TYPE]; //s->next_picture_ptr->quality;
+    b_lambda= s->last_lambda_for[B_TYPE]; //p_lambda *ABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
+    if(!b_lambda) b_lambda= p_lambda; //FIXME we should do this somewhere else
+    lambda2= (b_lambda*b_lambda + (1<<FF_LAMBDA_SHIFT)/2 ) >> FF_LAMBDA_SHIFT;
+
+    c->width = s->width >> scale;
+    c->height= s->height>> scale;
+    c->flags= CODEC_FLAG_QSCALE | CODEC_FLAG_PSNR | CODEC_FLAG_INPUT_PRESERVED /*| CODEC_FLAG_EMU_EDGE*/;
+    c->flags|= s->avctx->flags & CODEC_FLAG_QPEL;
+    c->mb_decision= s->avctx->mb_decision;
+    c->me_cmp= s->avctx->me_cmp;
+    c->mb_cmp= s->avctx->mb_cmp;
+    c->me_sub_cmp= s->avctx->me_sub_cmp;
+    c->pix_fmt = PIX_FMT_YUV420P;
+    c->time_base= s->avctx->time_base;
+    c->max_b_frames= s->max_b_frames;
+
+    if (avcodec_open(c, codec) < 0)
+        return -1;
+
+    resample= img_resample_init(c->width, c->height, s->width, s->height); //FIXME use sws
+
+    for(i=0; i<s->max_b_frames+2; i++){
+        int ysize= c->width*c->height;
+        int csize= (c->width/2)*(c->height/2);
+        Picture pre_input, *pre_input_ptr= i ? s->input_picture[i-1] : s->next_picture_ptr;
+
+        if(pre_input_ptr)
+            pre_input= *pre_input_ptr;
+
+        if(pre_input.type != FF_BUFFER_TYPE_SHARED && i){
+            pre_input.data[0]+=INPLACE_OFFSET;
+            pre_input.data[1]+=INPLACE_OFFSET;
+            pre_input.data[2]+=INPLACE_OFFSET;
+        }
+
+        avcodec_get_frame_defaults(&input[i]);
+        input[i].data[0]= av_malloc(ysize + 2*csize);
+        input[i].data[1]= input[i].data[0] + ysize;
+        input[i].data[2]= input[i].data[1] + csize;
+        input[i].linesize[0]= c->width;
+        input[i].linesize[1]=
+        input[i].linesize[2]= c->width/2;
+
+        if(!i || s->input_picture[i-1])
+            img_resample(resample, &input[i], &pre_input);
+    }
+
+    for(j=0; j<s->max_b_frames+1; j++){
+        int64_t rd=0;
+
+        if(!s->input_picture[j])
+            break;
+
+        c->error[0]= c->error[1]= c->error[2]= 0;
+
+        input[0].pict_type= I_TYPE;
+        input[0].quality= 1 * FF_QP2LAMBDA;
+        out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[0]);
+//        rd += (out_size * lambda2) >> FF_LAMBDA_SHIFT;
+
+        for(i=0; i<s->max_b_frames+1; i++){
+            int is_p= i % (j+1) == j || i==s->max_b_frames;
+
+            input[i+1].pict_type= is_p ? P_TYPE : B_TYPE;
+            input[i+1].quality= is_p ? p_lambda : b_lambda;
+            out_size = avcodec_encode_video(c, outbuf, outbuf_size, &input[i+1]);
+            rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
+        }
+
+        /* get the delayed frames */
+        while(out_size){
+            out_size = avcodec_encode_video(c, outbuf, outbuf_size, NULL);
+            rd += (out_size * lambda2) >> (FF_LAMBDA_SHIFT - 3);
+        }
+
+        rd += c->error[0] + c->error[1] + c->error[2];
+
+        if(rd < best_rd){
+            best_rd= rd;
+            best_b_count= j;
+        }
+    }
+
+    av_freep(&outbuf);
+    avcodec_close(c);
+    av_freep(&c);
+    img_resample_close(resample);
+
+    for(i=0; i<s->max_b_frames+2; i++){
+        av_freep(&input[i].data[0]);
+    }
+
+    return best_b_count;
+}
+
 static void select_input_picture(MpegEncContext *s){
     int i;
 
@@ -2174,20 +2312,24 @@ static void select_input_picture(MpegEncContext *s){
             int b_frames;
 
             if(s->avctx->frame_skip_threshold || s->avctx->frame_skip_factor){
-                if(skip_check(s, s->input_picture[0], s->next_picture_ptr)){
+                if(s->picture_in_gop_number < s->gop_size && skip_check(s, s->input_picture[0], s->next_picture_ptr)){
+                //FIXME check that te gop check above is +-1 correct
 //av_log(NULL, AV_LOG_DEBUG, "skip %p %Ld\n", s->input_picture[0]->data[0], s->input_picture[0]->pts);
-                
+
                     if(s->input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
                         for(i=0; i<4; i++)
                             s->input_picture[0]->data[i]= NULL;
-                        s->input_picture[0]->type= 0;            
+                        s->input_picture[0]->type= 0;
                     }else{
-                        assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER 
+                        assert(   s->input_picture[0]->type==FF_BUFFER_TYPE_USER
                                || s->input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
-            
+
                         s->avctx->release_buffer(s->avctx, (AVFrame*)s->input_picture[0]);
                     }
 
+                    emms_c();
+                    ff_vbv_update(s, 0);
+
                     goto no_output_pic;
                 }
             }
@@ -2196,14 +2338,14 @@ static void select_input_picture(MpegEncContext *s){
                 for(i=0; i<s->max_b_frames+1; i++){
                     int pict_num= s->input_picture[0]->display_picture_number + i;
 
-                    if(pict_num >= s->rc_context.num_entries) 
+                    if(pict_num >= s->rc_context.num_entries)
                         break;
                     if(!s->input_picture[i]){
                         s->rc_context.entry[pict_num-1].new_pict_type = P_TYPE;
                         break;
                     }
 
-                    s->input_picture[i]->pict_type= 
+                    s->input_picture[i]->pict_type=
                         s->rc_context.entry[pict_num].new_pict_type;
                 }
             }
@@ -2214,21 +2356,23 @@ static void select_input_picture(MpegEncContext *s){
             }else if(s->avctx->b_frame_strategy==1){
                 for(i=1; i<s->max_b_frames+1; i++){
                     if(s->input_picture[i] && s->input_picture[i]->b_frame_score==0){
-                        s->input_picture[i]->b_frame_score= 
-                            get_intra_count(s, s->input_picture[i  ]->data[0], 
+                        s->input_picture[i]->b_frame_score=
+                            get_intra_count(s, s->input_picture[i  ]->data[0],
                                                s->input_picture[i-1]->data[0], s->linesize) + 1;
                     }
                 }
                 for(i=0; i<s->max_b_frames+1; i++){
                     if(s->input_picture[i]==NULL || s->input_picture[i]->b_frame_score - 1 > s->mb_num/40) break;
                 }
-                                
+
                 b_frames= FFMAX(0, i-1);
-                
+
                 /* reset scores */
                 for(i=0; i<b_frames+1; i++){
                     s->input_picture[i]->b_frame_score=0;
                 }
+            }else if(s->avctx->b_frame_strategy==2){
+                b_frames= estimate_best_b_count(s);
             }else{
                 av_log(s->avctx, AV_LOG_ERROR, "illegal b frame strategy\n");
                 b_frames=0;
@@ -2257,7 +2401,7 @@ static void select_input_picture(MpegEncContext *s){
                 s->input_picture[b_frames]->pict_type= I_TYPE;
               }
             }
-            
+
             if(   (s->flags & CODEC_FLAG_CLOSED_GOP)
                && b_frames
                && s->input_picture[b_frames]->pict_type== I_TYPE)
@@ -2282,7 +2426,7 @@ no_output_pic:
 
         if(s->reordered_input_picture[0]->type == FF_BUFFER_TYPE_SHARED){
             // input is a shared pix, so we can't modifiy it -> alloc a new one & ensure that the shared one is reuseable
-        
+
             int i= ff_find_unused_picture(s, 0);
             Picture *pic= &s->picture[i];
 
@@ -2290,9 +2434,9 @@ no_output_pic:
             for(i=0; i<4; i++)
                 s->reordered_input_picture[0]->data[i]= NULL;
             s->reordered_input_picture[0]->type= 0;
-            
+
             pic->reference              = s->reordered_input_picture[0]->reference;
-            
+
             alloc_picture(s, pic, 0);
 
             copy_picture_attributes(s, (AVFrame*)pic, (AVFrame*)s->reordered_input_picture[0]);
@@ -2301,16 +2445,16 @@ no_output_pic:
         }else{
             // input is not a shared pix -> reuse buffer for current_pix
 
-            assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER 
+            assert(   s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_USER
                    || s->reordered_input_picture[0]->type==FF_BUFFER_TYPE_INTERNAL);
-            
+
             s->current_picture_ptr= s->reordered_input_picture[0];
             for(i=0; i<4; i++){
-                s->new_picture.data[i]+=16;
+                s->new_picture.data[i]+= INPLACE_OFFSET;
             }
         }
         copy_picture(&s->current_picture, s->current_picture_ptr);
-    
+
         s->picture_number= s->new_picture.display_picture_number;
 //printf("dpn:%d\n", s->picture_number);
     }else{
@@ -2329,7 +2473,7 @@ int MPV_encode_picture(AVCodecContext *avctx,
         av_log(avctx, AV_LOG_ERROR, "this codec supports only YUV420P\n");
         return -1;
     }
-    
+
     for(i=0; i<avctx->thread_count; i++){
         int start_y= s->thread_context[i]->start_mb_y;
         int   end_y= s->thread_context[i]->  end_mb_y;
@@ -2344,9 +2488,9 @@ int MPV_encode_picture(AVCodecContext *avctx,
 
     if(load_input_picture(s, pic_arg) < 0)
         return -1;
-    
+
     select_input_picture(s);
-    
+
     /* output? */
     if(s->new_picture.data[0]){
         s->pict_type= s->new_picture.pict_type;
@@ -2355,7 +2499,7 @@ int MPV_encode_picture(AVCodecContext *avctx,
         MPV_frame_start(s, avctx);
 
         encode_picture(s, s->picture_number);
-        
+
         avctx->real_pict_num  = s->picture_number;
         avctx->header_bits = s->header_bits;
         avctx->mv_bits     = s->mv_bits;
@@ -2373,11 +2517,12 @@ int MPV_encode_picture(AVCodecContext *avctx,
         if (s->out_format == FMT_MJPEG)
             mjpeg_picture_trailer(s);
 #endif /* #if 0 */
-        
+
         if(s->flags&CODEC_FLAG_PASS1)
             ff_write_pass1_stats(s);
 
         for(i=0; i<4; i++){
+            s->current_picture_ptr->error[i]= s->current_picture.error[i];
             avctx->error[i] += s->current_picture_ptr->error[i];
         }
 
@@ -2415,13 +2560,13 @@ int MPV_encode_picture(AVCodecContext *avctx,
             s->frame_bits  = put_bits_count(&s->pb);
         }
 
-        /* update mpeg1/2 vbv_delay for CBR */    
+        /* update mpeg1/2 vbv_delay for CBR */
         if(s->avctx->rc_max_rate && s->avctx->rc_min_rate == s->avctx->rc_max_rate && s->out_format == FMT_MPEG1
            && 90000LL * (avctx->rc_buffer_size-1) <= s->avctx->rc_max_rate*0xFFFFLL){
             int vbv_delay;
 
             assert(s->repeat_first_field==0);
-            
+
             vbv_delay= lrintf(90000 * s->rc_context.buffer_index / s->avctx->rc_max_rate);
             assert(vbv_delay < 0xFFFF);
 
@@ -2438,7 +2583,7 @@ int MPV_encode_picture(AVCodecContext *avctx,
         s->frame_bits=0;
     }
     assert((s->frame_bits&7)==0);
-    
+
     return s->frame_bits/8;
 }
 
@@ -2468,7 +2613,7 @@ static inline void gmc1_motion(MpegEncContext *s,
 
     linesize = s->linesize;
     uvlinesize = s->uvlinesize;
-    
+
     ptr = ref_picture[0] + (src_y * linesize) + src_x;
 
     if(s->flags&CODEC_FLAG_EMU_EDGE){
@@ -2478,21 +2623,21 @@ static inline void gmc1_motion(MpegEncContext *s,
             ptr= s->edge_emu_buffer;
         }
     }
-    
+
     if((motion_x|motion_y)&7){
         s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
         s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
     }else{
         int dxy;
-        
+
         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
         if (s->no_rounding){
-	    s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
+            s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
         }else{
             s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
         }
     }
-    
+
     if(s->flags&CODEC_FLAG_GRAY) return;
 
     motion_x= s->sprite_offset[1][0];
@@ -2519,14 +2664,14 @@ static inline void gmc1_motion(MpegEncContext *s,
         }
     }
     s->dsp.gmc1(dest_cb, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
-    
+
     ptr = ref_picture[2] + offset;
     if(emu){
         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
         ptr= s->edge_emu_buffer;
     }
     s->dsp.gmc1(dest_cr, ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
-    
+
     return;
 }
 
@@ -2548,17 +2693,17 @@ static inline void gmc_motion(MpegEncContext *s,
     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
 
     s->dsp.gmc(dest_y, ptr, linesize, 16,
-           ox, 
-           oy, 
+           ox,
+           oy,
            s->sprite_delta[0][0], s->sprite_delta[0][1],
-           s->sprite_delta[1][0], s->sprite_delta[1][1], 
+           s->sprite_delta[1][0], s->sprite_delta[1][1],
            a+1, (1<<(2*a+1)) - s->no_rounding,
            s->h_edge_pos, s->v_edge_pos);
     s->dsp.gmc(dest_y+8, ptr, linesize, 16,
-           ox + s->sprite_delta[0][0]*8, 
-           oy + s->sprite_delta[1][0]*8, 
+           ox + s->sprite_delta[0][0]*8,
+           oy + s->sprite_delta[1][0]*8,
            s->sprite_delta[0][0], s->sprite_delta[0][1],
-           s->sprite_delta[1][0], s->sprite_delta[1][1], 
+           s->sprite_delta[1][0], s->sprite_delta[1][1],
            a+1, (1<<(2*a+1)) - s->no_rounding,
            s->h_edge_pos, s->v_edge_pos);
 
@@ -2569,19 +2714,19 @@ static inline void gmc_motion(MpegEncContext *s,
 
     ptr = ref_picture[1];
     s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
-           ox, 
-           oy, 
+           ox,
+           oy,
            s->sprite_delta[0][0], s->sprite_delta[0][1],
-           s->sprite_delta[1][0], s->sprite_delta[1][1], 
+           s->sprite_delta[1][0], s->sprite_delta[1][1],
            a+1, (1<<(2*a+1)) - s->no_rounding,
            s->h_edge_pos>>1, s->v_edge_pos>>1);
-    
+
     ptr = ref_picture[2];
     s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
-           ox, 
-           oy, 
+           ox,
+           oy,
            s->sprite_delta[0][0], s->sprite_delta[0][1],
-           s->sprite_delta[1][0], s->sprite_delta[1][1], 
+           s->sprite_delta[1][0], s->sprite_delta[1][1],
            a+1, (1<<(2*a+1)) - s->no_rounding,
            s->h_edge_pos>>1, s->v_edge_pos>>1);
 }
@@ -2598,7 +2743,7 @@ static inline void gmc_motion(MpegEncContext *s,
  * @param w width of the source buffer
  * @param h height of the source buffer
  */
-void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h, 
+void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
                                     int src_x, int src_y, int w, int h){
     int x, y;
     int start_y, start_x, end_y, end_x;
@@ -2643,13 +2788,13 @@ void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w,
             buf[x + y*linesize]= buf[x + (end_y-1)*linesize];
         }
     }
-                                    
+
     for(y=0; y<block_h; y++){
        //left
         for(x=0; x<start_x; x++){
             buf[x + y*linesize]= buf[start_x + y*linesize];
         }
-       
+
        //right
         for(x=end_x; x<block_w; x++){
             buf[x + y*linesize]= buf[end_x - 1 + y*linesize];
@@ -2657,7 +2802,7 @@ void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w,
     }
 }
 
-static inline int hpel_motion(MpegEncContext *s, 
+static inline int hpel_motion(MpegEncContext *s,
                                   uint8_t *dest, uint8_t *src,
                                   int field_based, int field_select,
                                   int src_x, int src_y,
@@ -2672,7 +2817,7 @@ static inline int hpel_motion(MpegEncContext *s,
     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
     src_x += motion_x >> 1;
     src_y += motion_y >> 1;
-                
+
     /* WARNING: do no forget half pels */
     src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
     if (src_x == width)
@@ -2697,7 +2842,7 @@ static inline int hpel_motion(MpegEncContext *s,
     return emu;
 }
 
-static inline int hpel_motion_lowres(MpegEncContext *s, 
+static inline int hpel_motion_lowres(MpegEncContext *s,
                                   uint8_t *dest, uint8_t *src,
                                   int field_based, int field_select,
                                   int src_x, int src_y,
@@ -2720,7 +2865,7 @@ static inline int hpel_motion_lowres(MpegEncContext *s,
     sy= motion_y & s_mask;
     src_x += motion_x >> (lowres+1);
     src_y += motion_y >> (lowres+1);
-                
+
     src += src_y * stride + src_x;
 
     if(   (unsigned)src_x > h_edge_pos                 - (!!sx) - w
@@ -2748,8 +2893,8 @@ static always_inline void mpeg_motion(MpegEncContext *s,
 {
     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
     int dxy, uvdxy, mx, my, src_x, src_y, uvsrc_x, uvsrc_y, v_edge_pos, uvlinesize, linesize;
-    
-#if 0    
+
+#if 0
 if(s->quarter_sample)
 {
     motion_x>>=1;
@@ -2822,9 +2967,9 @@ if(s->quarter_sample)
             ptr_y = s->edge_emu_buffer;
             if(!(s->flags&CODEC_FLAG_GRAY)){
                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
-                ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based, 
+                ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
-                ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based, 
+                ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
                                  uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
                 ptr_cb= uvbuf;
                 ptr_cr= uvbuf+16;
@@ -2844,7 +2989,7 @@ if(s->quarter_sample)
     }
 
     pix_op[0][dxy](dest_y, ptr_y, linesize, h);
-    
+
     if(!(s->flags&CODEC_FLAG_GRAY)){
         pix_op[s->chroma_x_shift][uvdxy](dest_cb, ptr_cb, uvlinesize, h >> s->chroma_y_shift);
         pix_op[s->chroma_x_shift][uvdxy](dest_cr, ptr_cr, uvlinesize, h >> s->chroma_y_shift);
@@ -2877,7 +3022,7 @@ static always_inline void mpeg_motion_lowres(MpegEncContext *s,
         motion_x/=2;
         motion_y/=2;
     }
-    
+
     if(field_based){
         motion_y += (bottom_field - field_select)*((1<<lowres)-1);
     }
@@ -2886,7 +3031,7 @@ static always_inline void mpeg_motion_lowres(MpegEncContext *s,
     sy= motion_y & s_mask;
     src_x = s->mb_x*2*block_s               + (motion_x >> (lowres+1));
     src_y =(s->mb_y*2*block_s>>field_based) + (motion_y >> (lowres+1));
-    
+
     if (s->out_format == FMT_H263) {
         uvsx = ((motion_x>>1) & s_mask) | (sx&1);
         uvsy = ((motion_y>>1) & s_mask) | (sy&1);
@@ -2919,9 +3064,9 @@ static always_inline void mpeg_motion_lowres(MpegEncContext *s,
             ptr_y = s->edge_emu_buffer;
             if(!(s->flags&CODEC_FLAG_GRAY)){
                 uint8_t *uvbuf= s->edge_emu_buffer+18*s->linesize;
-                ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based, 
+                ff_emulated_edge_mc(uvbuf  , ptr_cb, s->uvlinesize, 9, 9+field_based,
                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
-                ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based, 
+                ff_emulated_edge_mc(uvbuf+16, ptr_cr, s->uvlinesize, 9, 9+field_based,
                                  uvsrc_x, uvsrc_y<<field_based, h_edge_pos>>1, v_edge_pos>>1);
                 ptr_cb= uvbuf;
                 ptr_cr= uvbuf+16;
@@ -2943,7 +3088,7 @@ static always_inline void mpeg_motion_lowres(MpegEncContext *s,
     sx <<= 2 - lowres;
     sy <<= 2 - lowres;
     pix_op[lowres-1](dest_y, ptr_y, linesize, h, sx, sy);
-    
+
     if(!(s->flags&CODEC_FLAG_GRAY)){
         uvsx <<= 2 - lowres;
         uvsy <<= 2 - lowres;
@@ -2968,7 +3113,7 @@ static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
     OBMC_FILTER(x+1       , t, l, m, r, b);\
     OBMC_FILTER(x  +stride, t, l, m, r, b);\
     OBMC_FILTER(x+1+stride, t, l, m, r, b);
-    
+
     x=0;
     OBMC_FILTER (x  , 2, 2, 4, 0, 0);
     OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
@@ -3015,9 +3160,9 @@ static inline void obmc_motion(MpegEncContext *s,
 {
     int i;
     uint8_t *ptr[5];
-    
+
     assert(s->quarter_sample==0);
-    
+
     for(i=0; i<5; i++){
         if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
             ptr[i]= ptr[MID];
@@ -3032,7 +3177,7 @@ static inline void obmc_motion(MpegEncContext *s,
         }
     }
 
-    put_obmc(dest, ptr, s->linesize);                
+    put_obmc(dest, ptr, s->linesize);
 }
 
 static inline void qpel_motion(MpegEncContext *s,
@@ -3052,7 +3197,7 @@ static inline void qpel_motion(MpegEncContext *s,
     v_edge_pos = s->v_edge_pos >> field_based;
     linesize = s->linesize << field_based;
     uvlinesize = s->uvlinesize << field_based;
-    
+
     if(field_based){
         mx= motion_x/2;
         my= motion_y>>1;
@@ -3081,16 +3226,16 @@ static inline void qpel_motion(MpegEncContext *s,
     ptr_cb = ref_picture[1] + uvsrc_y * uvlinesize + uvsrc_x;
     ptr_cr = ref_picture[2] + uvsrc_y * uvlinesize + uvsrc_x;
 
-    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16 
+    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 16
        || (unsigned)src_y >    v_edge_pos - (motion_y&3) - h  ){
-        ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based, 
+        ff_emulated_edge_mc(s->edge_emu_buffer, ptr_y, s->linesize, 17, 17+field_based,
                          src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
         ptr_y= s->edge_emu_buffer;
         if(!(s->flags&CODEC_FLAG_GRAY)){
             uint8_t *uvbuf= s->edge_emu_buffer + 18*s->linesize;
-            ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based, 
+            ff_emulated_edge_mc(uvbuf, ptr_cb, s->uvlinesize, 9, 9 + field_based,
                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
-            ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based, 
+            ff_emulated_edge_mc(uvbuf + 16, ptr_cr, s->uvlinesize, 9, 9 + field_based,
                              uvsrc_x, uvsrc_y<<field_based, s->h_edge_pos>>1, s->v_edge_pos>>1);
             ptr_cb= uvbuf;
             ptr_cr= uvbuf + 16;
@@ -3141,12 +3286,12 @@ static inline void chroma_4mv_motion(MpegEncContext *s,
                                      int mx, int my){
     int dxy, emu=0, src_x, src_y, offset;
     uint8_t *ptr;
-    
+
     /* In case of 8X8, we construct a single chroma motion vector
        with a special rounding */
     mx= ff_h263_round_chroma(mx);
     my= ff_h263_round_chroma(my);
-    
+
     dxy = ((my & 1) << 1) | (mx & 1);
     mx >>= 1;
     my >>= 1;
@@ -3159,7 +3304,7 @@ static inline void chroma_4mv_motion(MpegEncContext *s,
     src_y = clip(src_y, -8, s->height/2);
     if (src_y == s->height/2)
         dxy &= ~2;
-    
+
     offset = (src_y * (s->uvlinesize)) + src_x;
     ptr = ref_picture[1] + offset;
     if(s->flags&CODEC_FLAG_EMU_EDGE){
@@ -3192,7 +3337,7 @@ static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
     const int v_edge_pos = s->v_edge_pos >> (lowres+1);
     int emu=0, src_x, src_y, offset, sx, sy;
     uint8_t *ptr;
-    
+
     if(s->quarter_sample){
         mx/=2;
         my/=2;
@@ -3202,12 +3347,12 @@ static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
        with a special rounding */
     mx= ff_h263_round_chroma(mx);
     my= ff_h263_round_chroma(my);
-    
+
     sx= mx & s_mask;
     sy= my & s_mask;
     src_x = s->mb_x*block_s + (mx >> (lowres+1));
     src_y = s->mb_y*block_s + (my >> (lowres+1));
-    
+
     offset = src_y * s->uvlinesize + src_x;
     ptr = ref_picture[1] + offset;
     if(s->flags&CODEC_FLAG_EMU_EDGE){
@@ -3217,11 +3362,11 @@ static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
             ptr= s->edge_emu_buffer;
             emu=1;
         }
-    }     
+    }
     sx <<= 2 - lowres;
     sy <<= 2 - lowres;
     pix_op[lowres](dest_cb, ptr, s->uvlinesize, block_s, sx, sy);
-          
+
     ptr = ref_picture[2] + offset;
     if(emu){
         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, h_edge_pos, v_edge_pos);
@@ -3242,9 +3387,9 @@ static inline void chroma_4mv_motion_lowres(MpegEncContext *s,
  * @param pic_op qpel motion compensation function (average or put normally)
  * the motion vectors are taken from s->mv and the MV type from s->mv_type
  */
-static inline void MPV_motion(MpegEncContext *s, 
+static inline void MPV_motion(MpegEncContext *s,
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
-                              int dir, uint8_t **ref_picture, 
+                              int dir, uint8_t **ref_picture,
                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
 {
     int dxy, mx, my, src_x, src_y, motion_x, motion_y;
@@ -3261,7 +3406,7 @@ static inline void MPV_motion(MpegEncContext *s,
         const int mot_xy= mb_x*2 + mb_y*2*mot_stride;
 
         assert(!s->mb_skipped);
-                
+
         memcpy(mv_cache[1][1], s->current_picture.motion_val[0][mot_xy           ], sizeof(int16_t)*4);
         memcpy(mv_cache[2][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
         memcpy(mv_cache[3][1], s->current_picture.motion_val[0][mot_xy+mot_stride], sizeof(int16_t)*4);
@@ -3287,7 +3432,7 @@ static inline void MPV_motion(MpegEncContext *s,
             *(int32_t*)mv_cache[1][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2];
             *(int32_t*)mv_cache[2][3]= *(int32_t*)s->current_picture.motion_val[0][mot_xy+2+mot_stride];
         }
-        
+
         mx = 0;
         my = 0;
         for(i=0;i<4;i++) {
@@ -3314,7 +3459,7 @@ static inline void MPV_motion(MpegEncContext *s,
 
         return;
     }
-   
+
     switch(s->mv_type) {
     case MV_TYPE_16X16:
         if(s->mcsel){
@@ -3326,7 +3471,7 @@ static inline void MPV_motion(MpegEncContext *s,
                             ref_picture);
             }
         }else if(s->quarter_sample){
-            qpel_motion(s, dest_y, dest_cb, dest_cr, 
+            qpel_motion(s, dest_y, dest_cb, dest_cr,
                         0, 0, 0,
                         ref_picture, pix_op, qpix_op,
                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
@@ -3336,11 +3481,11 @@ static inline void MPV_motion(MpegEncContext *s,
                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
         }else
         {
-            mpeg_motion(s, dest_y, dest_cb, dest_cr, 
+            mpeg_motion(s, dest_y, dest_cb, dest_cr,
                         0, 0, 0,
                         ref_picture, pix_op,
                         s->mv[dir][0][0], s->mv[dir][0][1], 16);
-        }           
+        }
         break;
     case MV_TYPE_8X8:
         mx = 0;
@@ -3353,7 +3498,7 @@ static inline void MPV_motion(MpegEncContext *s,
                 dxy = ((motion_y & 3) << 2) | (motion_x & 3);
                 src_x = mb_x * 16 + (motion_x >> 2) + (i & 1) * 8;
                 src_y = mb_y * 16 + (motion_y >> 2) + (i >>1) * 8;
-                    
+
                 /* WARNING: do no forget half pels */
                 src_x = clip(src_x, -16, s->width);
                 if (src_x == s->width)
@@ -3361,10 +3506,10 @@ static inline void MPV_motion(MpegEncContext *s,
                 src_y = clip(src_y, -16, s->height);
                 if (src_y == s->height)
                     dxy &= ~12;
-                    
+
                 ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
                 if(s->flags&CODEC_FLAG_EMU_EDGE){
-                    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8 
+                    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&3) - 8
                        || (unsigned)src_y > s->v_edge_pos - (motion_y&3) - 8 ){
                         ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
                         ptr= s->edge_emu_buffer;
@@ -3404,7 +3549,7 @@ static inline void MPV_motion(MpegEncContext *s,
                                 s->mv[dir][i][0], s->mv[dir][i][1], 8);
                 }
             }else{
-                /* top field */       
+                /* top field */
                 mpeg_motion(s, dest_y, dest_cb, dest_cr,
                             1, 0, s->field_select[dir][0],
                             ref_picture, pix_op,
@@ -3418,7 +3563,7 @@ static inline void MPV_motion(MpegEncContext *s,
         } else {
             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
                 ref_picture= s->current_picture_ptr->data;
-            } 
+            }
 
             mpeg_motion(s, dest_y, dest_cb, dest_cr,
                         0, 0, s->field_select[dir][0],
@@ -3434,17 +3579,17 @@ static inline void MPV_motion(MpegEncContext *s,
                 ref2picture= ref_picture;
             }else{
                 ref2picture= s->current_picture_ptr->data;
-            } 
+            }
 
-            mpeg_motion(s, dest_y, dest_cb, dest_cr, 
+            mpeg_motion(s, dest_y, dest_cb, dest_cr,
                         0, 0, s->field_select[dir][i],
                         ref2picture, pix_op,
                         s->mv[dir][i][0], s->mv[dir][i][1] + 16*i, 8);
-                
+
             dest_y += 16*s->linesize;
             dest_cb+= (16>>s->chroma_y_shift)*s->uvlinesize;
             dest_cr+= (16>>s->chroma_y_shift)*s->uvlinesize;
-        }        
+        }
         break;
     case MV_TYPE_DMV:
         if(s->picture_structure == PICT_FRAME){
@@ -3456,21 +3601,21 @@ static inline void MPV_motion(MpegEncContext *s,
                                 ref_picture, pix_op,
                                 s->mv[dir][2*i + j][0], s->mv[dir][2*i + j][1], 8);
                 }
-                pix_op = s->dsp.avg_pixels_tab; 
+                pix_op = s->dsp.avg_pixels_tab;
             }
         }else{
             for(i=0; i<2; i++){
-                mpeg_motion(s, dest_y, dest_cb, dest_cr, 
+                mpeg_motion(s, dest_y, dest_cb, dest_cr,
                             0, 0, s->picture_structure != i+1,
                             ref_picture, pix_op,
                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],16);
 
                 // after put we make avg of the same block
-                pix_op=s->dsp.avg_pixels_tab; 
+                pix_op=s->dsp.avg_pixels_tab;
 
                 //opposite parity is always in the same frame if this is second field
                 if(!s->first_field){
-                    ref_picture = s->current_picture_ptr->data;    
+                    ref_picture = s->current_picture_ptr->data;
                 }
             }
         }
@@ -3490,22 +3635,22 @@ static inline void MPV_motion(MpegEncContext *s,
  * @param pic_op halfpel motion compensation function (average or put normally)
  * the motion vectors are taken from s->mv and the MV type from s->mv_type
  */
-static inline void MPV_motion_lowres(MpegEncContext *s, 
+static inline void MPV_motion_lowres(MpegEncContext *s,
                               uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
-                              int dir, uint8_t **ref_picture, 
+                              int dir, uint8_t **ref_picture,
                               h264_chroma_mc_func *pix_op)
 {
     int mx, my;
     int mb_x, mb_y, i;
     const int lowres= s->avctx->lowres;
-    const int block_s= 8>>lowres;    
+    const int block_s= 8>>lowres;
 
     mb_x = s->mb_x;
     mb_y = s->mb_y;
 
     switch(s->mv_type) {
     case MV_TYPE_16X16:
-        mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr, 
+        mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
                     0, 0, 0,
                     ref_picture, pix_op,
                     s->mv[dir][0][0], s->mv[dir][0][1], 2*block_s);
@@ -3531,7 +3676,7 @@ static inline void MPV_motion_lowres(MpegEncContext *s,
         break;
     case MV_TYPE_FIELD:
         if (s->picture_structure == PICT_FRAME) {
-            /* top field */       
+            /* top field */
             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
                         1, 0, s->field_select[dir][0],
                         ref_picture, pix_op,
@@ -3544,7 +3689,7 @@ static inline void MPV_motion_lowres(MpegEncContext *s,
         } else {
             if(s->picture_structure != s->field_select[dir][0] + 1 && s->pict_type != B_TYPE && !s->first_field){
                 ref_picture= s->current_picture_ptr->data;
-            } 
+            }
 
             mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
                         0, 0, s->field_select[dir][0],
@@ -3560,17 +3705,17 @@ static inline void MPV_motion_lowres(MpegEncContext *s,
                 ref2picture= ref_picture;
             }else{
                 ref2picture= s->current_picture_ptr->data;
-            } 
+            }
 
-            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr, 
+            mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
                         0, 0, s->field_select[dir][i],
                         ref2picture, pix_op,
                         s->mv[dir][i][0], s->mv[dir][i][1] + 2*block_s*i, block_s);
-                
+
             dest_y += 2*block_s*s->linesize;
             dest_cb+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
             dest_cr+= (2*block_s>>s->chroma_y_shift)*s->uvlinesize;
-        }        
+        }
         break;
     case MV_TYPE_DMV:
         if(s->picture_structure == PICT_FRAME){
@@ -3586,7 +3731,7 @@ static inline void MPV_motion_lowres(MpegEncContext *s,
             }
         }else{
             for(i=0; i<2; i++){
-                mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr, 
+                mpeg_motion_lowres(s, dest_y, dest_cb, dest_cr,
                             0, 0, s->picture_structure != i+1,
                             ref_picture, pix_op,
                             s->mv[dir][2*i][0],s->mv[dir][2*i][1],2*block_s);
@@ -3596,7 +3741,7 @@ static inline void MPV_motion_lowres(MpegEncContext *s,
 
                 //opposite parity is always in the same frame if this is second field
                 if(!s->first_field){
-                    ref_picture = s->current_picture_ptr->data;    
+                    ref_picture = s->current_picture_ptr->data;
                 }
             }
         }
@@ -3606,7 +3751,7 @@ static inline void MPV_motion_lowres(MpegEncContext *s,
 }
 
 /* put block[] to dest[] */
-static inline void put_dct(MpegEncContext *s, 
+static inline void put_dct(MpegEncContext *s,
                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
 {
     s->dct_unquantize_intra(s, block, i, qscale);
@@ -3614,7 +3759,7 @@ static inline void put_dct(MpegEncContext *s,
 }
 
 /* add block[] to dest[] */
-static inline void add_dct(MpegEncContext *s, 
+static inline void add_dct(MpegEncContext *s,
                            DCTELEM *block, int i, uint8_t *dest, int line_size)
 {
     if (s->block_last_index[i] >= 0) {
@@ -3622,7 +3767,7 @@ static inline void add_dct(MpegEncContext *s,
     }
 }
 
-static inline void add_dequant_dct(MpegEncContext *s, 
+static inline void add_dequant_dct(MpegEncContext *s,
                            DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale)
 {
     if (s->block_last_index[i] >= 0) {
@@ -3639,9 +3784,9 @@ void ff_clean_intra_table_entries(MpegEncContext *s)
 {
     int wrap = s->b8_stride;
     int xy = s->block_index[0];
-    
-    s->dc_val[0][xy           ] = 
-    s->dc_val[0][xy + 1       ] = 
+
+    s->dc_val[0][xy           ] =
+    s->dc_val[0][xy + 1       ] =
     s->dc_val[0][xy     + wrap] =
     s->dc_val[0][xy + 1 + wrap] = 1024;
     /* ac pred */
@@ -3661,7 +3806,7 @@ void ff_clean_intra_table_entries(MpegEncContext *s)
     /* ac pred */
     memset(s->ac_val[1][xy], 0, 16 * sizeof(int16_t));
     memset(s->ac_val[2][xy], 0, 16 * sizeof(int16_t));
-    
+
     s->mbintra_table[xy]= 0;
 }
 
@@ -3735,7 +3880,7 @@ static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM bloc
             if (s->mb_skipped) {
                 s->mb_skipped= 0;
                 assert(s->pict_type!=I_TYPE);
- 
+
                 (*mbskip_ptr) ++; /* indicate that this time we skipped it */
                 if(*mbskip_ptr >99) *mbskip_ptr= 99;
 
@@ -3750,10 +3895,10 @@ static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM bloc
                 *mbskip_ptr = 0; /* not skipped */
             }
         }
-        
+
         dct_linesize = linesize << s->interlaced_dct;
         dct_offset =(s->interlaced_dct)? linesize : linesize*block_size;
-        
+
         if(readable){
             dest_y=  s->dest[0];
             dest_cb= s->dest[1];
@@ -3779,7 +3924,7 @@ static always_inline void MPV_decode_mb_internal(MpegEncContext *s, DCTELEM bloc
                         MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix);
                     }
                 }else{
-                    if ((!s->no_rounding) || s->pict_type==B_TYPE){                
+                    if ((!s->no_rounding) || s->pict_type==B_TYPE){
                         op_pix = s->dsp.put_pixels_tab;
                         op_qpix= s->dsp.put_qpel_pixels_tab;
                     }else{
@@ -3961,16 +4106,16 @@ static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index
     const int maxlevel= s->max_qcoeff;
     const int minlevel= s->min_qcoeff;
     int overflow=0;
-    
+
     if(s->mb_intra){
         i=1; //skip clipping of intra dc
     }else
         i=0;
-    
+
     for(;i<=last_index; i++){
         const int j= s->intra_scantable.permutated[i];
         int level = block[j];
-       
+
         if     (level>maxlevel){
             level=maxlevel;
             overflow++;
@@ -3978,10 +4123,10 @@ static inline void clip_coeffs(MpegEncContext *s, DCTELEM *block, int last_index
             level=minlevel;
             overflow++;
         }
-        
+
         block[j]= level;
     }
-    
+
     if(overflow && s->avctx->mb_decision == FF_MB_DECISION_SIMPLE)
         av_log(s->avctx, AV_LOG_INFO, "warning, clipping %d dct coefficients to %d..%d\n", overflow, minlevel, maxlevel);
 }
@@ -3996,7 +4141,7 @@ void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
     if (s->avctx->draw_horiz_band) {
         AVFrame *src;
         int offset[4];
-        
+
         if(s->picture_structure != PICT_FRAME){
             h <<= 1;
             y <<= 1;
@@ -4005,13 +4150,13 @@ void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
 
         h= FFMIN(h, s->avctx->height - y);
 
-        if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER)) 
+        if(s->pict_type==B_TYPE || s->low_delay || (s->avctx->slice_flags&SLICE_FLAG_CODED_ORDER))
             src= (AVFrame*)s->current_picture_ptr;
         else if(s->last_picture_ptr)
             src= (AVFrame*)s->last_picture_ptr;
         else
             return;
-            
+
         if(s->pict_type==B_TYPE && s->picture_structure == PICT_FRAME && s->out_format != FMT_H264){
             offset[0]=
             offset[1]=
@@ -4019,7 +4164,7 @@ void ff_draw_horiz_band(MpegEncContext *s, int y, int h){
             offset[3]= 0;
         }else{
             offset[0]= y * s->linesize;;
-            offset[1]= 
+            offset[1]=
             offset[2]= (y >> s->chroma_y_shift) * s->uvlinesize;
             offset[3]= 0;
         }
@@ -4035,7 +4180,7 @@ void ff_init_block_index(MpegEncContext *s){ //FIXME maybe rename
     const int linesize= s->current_picture.linesize[0]; //not s->linesize as this would be wrong for field pics
     const int uvlinesize= s->current_picture.linesize[1];
     const int mb_size= 4 - s->avctx->lowres;
-        
+
     s->block_index[0]= s->b8_stride*(s->mb_y*2    ) - 2 + s->mb_x*2;
     s->block_index[1]= s->b8_stride*(s->mb_y*2    ) - 1 + s->mb_x*2;
     s->block_index[2]= s->b8_stride*(s->mb_y*2 + 1) - 2 + s->mb_x*2;
@@ -4092,26 +4237,26 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
     int dct_offset   = s->linesize*8; //default for progressive frames
     uint8_t *ptr_y, *ptr_cb, *ptr_cr;
     int wrap_y, wrap_c;
-    
+
     for(i=0; i<6; i++) skip_dct[i]=0;
-    
+
     if(s->adaptive_quant){
         const int last_qp= s->qscale;
         const int mb_xy= mb_x + mb_y*s->mb_stride;
 
         s->lambda= s->lambda_table[mb_xy];
         update_qscale(s);
-    
+
         if(!(s->flags&CODEC_FLAG_QP_RD)){
             s->dquant= s->qscale - last_qp;
 
             if(s->out_format==FMT_H263){
                 s->dquant= clip(s->dquant, -2, 2); //FIXME RD
-            
-                if(s->codec_id==CODEC_ID_MPEG4){        
+
+                if(s->codec_id==CODEC_ID_MPEG4){
                     if(!s->mb_intra){
                         if(s->pict_type == B_TYPE){
-                            if(s->dquant&1) 
+                            if(s->dquant&1)
                                 s->dquant= (s->dquant/2)*2;
                             if(s->mv_dir&MV_DIRECT)
                                 s->dquant= 0;
@@ -4147,22 +4292,22 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
             int progressive_score, interlaced_score;
 
             s->interlaced_dct=0;
-            progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8) 
+            progressive_score= s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y, 8)
                               +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y*8, NULL, wrap_y, 8) - 400;
 
             if(progressive_score > 0){
-                interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8) 
+                interlaced_score = s->dsp.ildct_cmp[4](s, ptr_y           , NULL, wrap_y*2, 8)
                                   +s->dsp.ildct_cmp[4](s, ptr_y + wrap_y  , NULL, wrap_y*2, 8);
                 if(progressive_score > interlaced_score){
                     s->interlaced_dct=1;
-            
+
                     dct_offset= wrap_y;
                     wrap_y<<=1;
                 }
             }
         }
-        
-	s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
+
+        s->dsp.get_pixels(s->block[0], ptr_y                 , wrap_y);
         s->dsp.get_pixels(s->block[1], ptr_y              + 8, wrap_y);
         s->dsp.get_pixels(s->block[2], ptr_y + dct_offset    , wrap_y);
         s->dsp.get_pixels(s->block[3], ptr_y + dct_offset + 8, wrap_y);
@@ -4171,7 +4316,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
             skip_dct[4]= 1;
             skip_dct[5]= 1;
         }else{
-	    s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
+            s->dsp.get_pixels(s->block[4], ptr_cb, wrap_c);
             s->dsp.get_pixels(s->block[5], ptr_cr, wrap_c);
         }
     }else{
@@ -4184,7 +4329,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
         dest_cr = s->dest[2];
 
         if ((!s->no_rounding) || s->pict_type==B_TYPE){
-	    op_pix = s->dsp.put_pixels_tab;
+            op_pix = s->dsp.put_pixels_tab;
             op_qpix= s->dsp.put_qpel_pixels_tab;
         }else{
             op_pix = s->dsp.put_no_rnd_pixels_tab;
@@ -4204,29 +4349,29 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
             int progressive_score, interlaced_score;
 
             s->interlaced_dct=0;
-            progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8) 
+            progressive_score= s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y, 8)
                               +s->dsp.ildct_cmp[0](s, dest_y + wrap_y*8, ptr_y + wrap_y*8, wrap_y, 8) - 400;
-            
+
             if(s->avctx->ildct_cmp == FF_CMP_VSSE) progressive_score -= 400;
 
             if(progressive_score>0){
-                interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8) 
+                interlaced_score = s->dsp.ildct_cmp[0](s, dest_y           , ptr_y           , wrap_y*2, 8)
                                   +s->dsp.ildct_cmp[0](s, dest_y + wrap_y  , ptr_y + wrap_y  , wrap_y*2, 8);
-            
+
                 if(progressive_score > interlaced_score){
                     s->interlaced_dct=1;
-            
+
                     dct_offset= wrap_y;
                     wrap_y<<=1;
                 }
             }
         }
-        
-	s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
+
+        s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
         s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
         s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
         s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
-        
+
         if(s->flags&CODEC_FLAG_GRAY){
             skip_dct[4]= 1;
             skip_dct[5]= 1;
@@ -4234,10 +4379,10 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
             s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
             s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
         }
-        /* pre quantization */         
+        /* pre quantization */
         if(s->current_picture.mc_mb_var[s->mb_stride*mb_y+ mb_x]<2*s->qscale*s->qscale){
             //FIXME optimize
-	    if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
+            if(s->dsp.sad[1](NULL, ptr_y               , dest_y               , wrap_y, 8) < 20*s->qscale) skip_dct[0]= 1;
             if(s->dsp.sad[1](NULL, ptr_y            + 8, dest_y            + 8, wrap_y, 8) < 20*s->qscale) skip_dct[1]= 1;
             if(s->dsp.sad[1](NULL, ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y, 8) < 20*s->qscale) skip_dct[2]= 1;
             if(s->dsp.sad[1](NULL, ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y, 8) < 20*s->qscale) skip_dct[3]= 1;
@@ -4255,7 +4400,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
         if(!skip_dct[5]) get_vissual_weight(weight[5], ptr_cr                , wrap_c);
         memcpy(orig[0], s->block[0], sizeof(DCTELEM)*64*6);
     }
-            
+
     /* DCT & quantize */
     assert(s->out_format!=FMT_MJPEG || s->qscale==8);
     {
@@ -4277,7 +4422,7 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
                 }
             }
         }
-        
+
         if(s->luma_elim_threshold && !s->mb_intra)
             for(i=0; i<4; i++)
                 dct_single_coeff_elimination(s, i, s->luma_elim_threshold);
@@ -4351,19 +4496,19 @@ static void encode_mb(MpegEncContext *s, int motion_x, int motion_y)
 void ff_mpeg_flush(AVCodecContext *avctx){
     int i;
     MpegEncContext *s = avctx->priv_data;
-    
-    if(s==NULL || s->picture==NULL) 
+
+    if(s==NULL || s->picture==NULL)
         return;
-    
+
     for(i=0; i<MAX_PICTURE_COUNT; i++){
        if(s->picture[i].data[0] && (   s->picture[i].type == FF_BUFFER_TYPE_INTERNAL
                                     || s->picture[i].type == FF_BUFFER_TYPE_USER))
         avctx->release_buffer(avctx, (AVFrame*)&s->picture[i]);
     }
     s->current_picture_ptr = s->last_picture_ptr = s->next_picture_ptr = NULL;
-    
+
     s->mb_x= s->mb_y= 0;
-    
+
     s->parse_context.state= -1;
     s->parse_context.frame_start_found= 0;
     s->parse_context.overread= 0;
@@ -4382,7 +4527,7 @@ void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
     int i;
 
     if(length==0) return;
-    
+
     if(words < 16){
         for(i=0; i<words; i++) put_bits(pb, 16, be2me_16(srcw[i]));
     }else if(put_bits_count(pb)&7){
@@ -4394,7 +4539,7 @@ void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length)
         memcpy(pbBufPtr(pb), src+i, 2*words-i);
         skip_put_bytes(pb, 2*words-i);
     }
-        
+
     put_bits(pb, bits, be2me_16(srcw[words])>>(16-bits));
 }
 
@@ -4407,7 +4552,7 @@ static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext
     d->mb_skip_run= s->mb_skip_run;
     for(i=0; i<3; i++)
         d->last_dc[i]= s->last_dc[i];
-    
+
     /* statistics */
     d->mv_bits= s->mv_bits;
     d->i_tex_bits= s->i_tex_bits;
@@ -4427,14 +4572,14 @@ static inline void copy_context_before_encode(MpegEncContext *d, MpegEncContext
 static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *s, int type){
     int i;
 
-    memcpy(d->mv, s->mv, 2*4*2*sizeof(int)); 
+    memcpy(d->mv, s->mv, 2*4*2*sizeof(int));
     memcpy(d->last_mv, s->last_mv, 2*2*2*sizeof(int)); //FIXME is memcpy faster then a loop?
-    
+
     /* mpeg1 */
     d->mb_skip_run= s->mb_skip_run;
     for(i=0; i<3; i++)
         d->last_dc[i]= s->last_dc[i];
-    
+
     /* statistics */
     d->mv_bits= s->mv_bits;
     d->i_tex_bits= s->i_tex_bits;
@@ -4461,13 +4606,13 @@ static inline void copy_context_after_encode(MpegEncContext *d, MpegEncContext *
     d->qscale= s->qscale;
 }
 
-static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type, 
+static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegEncContext *best, int type,
                            PutBitContext pb[2], PutBitContext pb2[2], PutBitContext tex_pb[2],
                            int *dmin, int *next_block, int motion_x, int motion_y)
 {
     int score;
     uint8_t *dest_backup[3];
-    
+
     copy_context_before_encode(s, backup, type);
 
     s->block= s->blocks[*next_block];
@@ -4476,7 +4621,7 @@ static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegE
         s->pb2   = pb2   [*next_block];
         s->tex_pb= tex_pb[*next_block];
     }
-    
+
     if(*next_block){
         memcpy(dest_backup, s->dest, sizeof(s->dest));
         s->dest[0] = s->rd_scratchpad;
@@ -4486,20 +4631,20 @@ static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegE
     }
 
     encode_mb(s, motion_x, motion_y);
-    
+
     score= put_bits_count(&s->pb);
     if(s->data_partitioning){
         score+= put_bits_count(&s->pb2);
         score+= put_bits_count(&s->tex_pb);
     }
-   
+
     if(s->avctx->mb_decision == FF_MB_DECISION_RD){
         MPV_decode_mb(s, s->block);
 
         score *= s->lambda2;
         score += sse_mb(s) << FF_LAMBDA_SHIFT;
     }
-    
+
     if(*next_block){
         memcpy(s->dest, dest_backup, sizeof(s->dest));
     }
@@ -4511,25 +4656,25 @@ static inline void encode_mb_hq(MpegEncContext *s, MpegEncContext *backup, MpegE
         copy_context_after_encode(best, s, type);
     }
 }
-                
+
 static int sse(MpegEncContext *s, uint8_t *src1, uint8_t *src2, int w, int h, int stride){
     uint32_t *sq = squareTbl + 256;
     int acc=0;
     int x,y;
-    
-    if(w==16 && h==16) 
+
+    if(w==16 && h==16)
         return s->dsp.sse[0](NULL, src1, src2, stride, 16);
     else if(w==8 && h==8)
         return s->dsp.sse[1](NULL, src1, src2, stride, 8);
-    
+
     for(y=0; y<h; y++){
         for(x=0; x<w; x++){
             acc+= sq[src1[x + y*stride] - src2[x + y*stride]];
-        } 
+        }
     }
-    
+
     assert(acc>=0);
-    
+
     return acc;
 }
 
@@ -4561,7 +4706,7 @@ static int sse_mb(MpegEncContext *s){
 static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
     MpegEncContext *s= arg;
 
-    
+
     s->me.pre_pass=1;
     s->me.dia_size= s->avctx->pre_dia_size;
     s->first_slice_line=1;
@@ -4571,9 +4716,9 @@ static int pre_estimate_motion_thread(AVCodecContext *c, void *arg){
         }
         s->first_slice_line=0;
     }
-    
+
     s->me.pre_pass=0;
-    
+
     return 0;
 }
 
@@ -4590,7 +4735,7 @@ static int estimate_motion_thread(AVCodecContext *c, void *arg){
             s->block_index[1]+=2;
             s->block_index[2]+=2;
             s->block_index[3]+=2;
-            
+
             /* compute motion vector & mb_type and store in context */
             if(s->pict_type==B_TYPE)
                 ff_estimate_b_frame_motion(s, s->mb_x, s->mb_y);
@@ -4614,7 +4759,7 @@ static int mb_var_thread(AVCodecContext *c, void *arg){
             uint8_t *pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
             int varc;
             int sum = s->dsp.pix_sum(pix, s->linesize);
-    
+
             varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
 
             s->current_picture.mb_var [s->mb_stride * mb_y + mb_x] = varc;
@@ -4632,7 +4777,7 @@ static void write_slice_end(MpegEncContext *s){
         if(s->partitioned_frame){
             ff_mpeg4_merge_partitions(s);
         }
-    
+
         ff_mpeg4_stuffing(&s->pb);
     }else if(s->out_format == FMT_MJPEG){
         ff_mjpeg_stuffing(&s->pb);
@@ -4641,7 +4786,7 @@ static void write_slice_end(MpegEncContext *s){
 
     align_put_bits(&s->pb);
     flush_put_bits(&s->pb);
-    
+
     if((s->flags&CODEC_FLAG_PASS1) && !s->partitioned_frame)
         s->misc_bits+= get_bits_diff(s);
 }
@@ -4677,12 +4822,12 @@ static int encode_thread(AVCodecContext *c, void *arg){
         /* init last dc values */
         /* note: quant matrix value (8) is implied here */
         s->last_dc[i] = 128 << s->intra_dc_precision;
-        
-        s->current_picture_ptr->error[i] = 0;
+
+        s->current_picture.error[i] = 0;
     }
     s->mb_skip_run = 0;
     memset(s->last_mv, 0, sizeof(s->last_mv));
-     
+
     s->last_mv_dir = 0;
 
     switch(s->codec_id){
@@ -4701,7 +4846,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
     }
 
     s->resync_mb_x=0;
-    s->resync_mb_y=0; 
+    s->resync_mb_y=0;
     s->first_slice_line = 1;
     s->ptr_lastgob = s->pb.buf;
     for(mb_y= s->start_mb_y; mb_y < s->end_mb_y; mb_y++) {
@@ -4711,7 +4856,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
 
         ff_set_qscale(s, s->qscale);
         ff_init_block_index(s);
-        
+
         for(mb_x=0; mb_x < s->mb_width; mb_x++) {
             int xy= mb_y*s->mb_stride + mb_x; // removed const, H261 needs to adjust this
             int mb_type= s->mb_type[xy];
@@ -4746,13 +4891,13 @@ static int encode_thread(AVCodecContext *c, void *arg){
             /* write gob / video packet header  */
             if(s->rtp_mode){
                 int current_packet_size, is_gob_start;
-                
+
                 current_packet_size= ((put_bits_count(&s->pb)+7)>>3) - (s->ptr_lastgob - s->pb.buf);
-                
-                is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0; 
-                
+
+                is_gob_start= s->avctx->rtp_payload_size && current_packet_size >= s->avctx->rtp_payload_size && mb_y + mb_x>0;
+
                 if(s->start_mb_y == mb_y && mb_y > 0 && mb_x==0) is_gob_start=1;
-                
+
                 switch(s->codec_id){
                 case CODEC_ID_H263:
                 case CODEC_ID_H263P:
@@ -4777,10 +4922,10 @@ static int encode_thread(AVCodecContext *c, void *arg){
                         }
 #endif /* #if 0 */
                     }
-                
+
                     assert((put_bits_count(&s->pb)&7) == 0);
                     current_packet_size= pbBufPtr(&s->pb) - s->ptr_lastgob;
-                    
+
                     if(s->avctx->error_rate && s->resync_mb_x + s->resync_mb_y > 0){
                         int r= put_bits_count(&s->pb)/8 + s->picture_number + 16 + s->mb_x + s->mb_y;
                         int d= 100 / s->avctx->error_rate;
@@ -4797,7 +4942,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
                         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width + mb_x - s->resync_mb_x;
                         s->avctx->rtp_callback(s->avctx, s->ptr_lastgob, current_packet_size, number_mb);
                     }
-                    
+
                     switch(s->codec_id){
 /* xine: do not need this for decode or MPEG-1 encoding modes */
 #if 0
@@ -4815,7 +4960,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
 #if 0
                     case CODEC_ID_H263:
                     case CODEC_ID_H263P:
-                        h263_encode_gob_header(s, mb_y);                       
+                        h263_encode_gob_header(s, mb_y);
                     break;
 #endif /* #if 0 */
                     }
@@ -4825,7 +4970,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
                         s->misc_bits+= bits - s->last_bits;
                         s->last_bits= bits;
                     }
-    
+
                     s->ptr_lastgob += current_packet_size;
                     s->first_slice_line=1;
                     s->resync_mb_x=mb_x;
@@ -4835,7 +4980,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
 
             if(  (s->resync_mb_x   == s->mb_x)
                && s->resync_mb_y+1 == s->mb_y){
-                s->first_slice_line=0; 
+                s->first_slice_line=0;
             }
 
             s->mb_skipped=0;
@@ -4860,10 +5005,10 @@ static int encode_thread(AVCodecContext *c, void *arg){
                     s->mb_intra= 0;
                     s->mv[0][0][0] = s->p_mv_table[xy][0];
                     s->mv[0][0][1] = s->p_mv_table[xy][1];
-                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER, pb, pb2, tex_pb,
                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
                 }
-                if(mb_type&CANDIDATE_MB_TYPE_INTER_I){ 
+                if(mb_type&CANDIDATE_MB_TYPE_INTER_I){
                     s->mv_dir = MV_DIR_FORWARD;
                     s->mv_type = MV_TYPE_FIELD;
                     s->mb_intra= 0;
@@ -4872,7 +5017,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
                         s->mv[0][i][0] = s->p_field_mv_table[i][j][xy][0];
                         s->mv[0][i][1] = s->p_field_mv_table[i][j][xy][1];
                     }
-                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER_I, pb, pb2, tex_pb,
                                  &dmin, &next_block, 0, 0);
                 }
                 if(mb_type&CANDIDATE_MB_TYPE_SKIPPED){
@@ -4881,10 +5026,10 @@ static int encode_thread(AVCodecContext *c, void *arg){
                     s->mb_intra= 0;
                     s->mv[0][0][0] = 0;
                     s->mv[0][0][1] = 0;
-                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_SKIPPED, pb, pb2, tex_pb,
                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
                 }
-                if(mb_type&CANDIDATE_MB_TYPE_INTER4V){                 
+                if(mb_type&CANDIDATE_MB_TYPE_INTER4V){
                     s->mv_dir = MV_DIR_FORWARD;
                     s->mv_type = MV_TYPE_8X8;
                     s->mb_intra= 0;
@@ -4892,7 +5037,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
                         s->mv[0][i][0] = s->current_picture.motion_val[0][s->block_index[i]][0];
                         s->mv[0][i][1] = s->current_picture.motion_val[0][s->block_index[i]][1];
                     }
-                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER4V, pb, pb2, tex_pb,
                                  &dmin, &next_block, 0, 0);
                 }
                 if(mb_type&CANDIDATE_MB_TYPE_FORWARD){
@@ -4901,7 +5046,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
                     s->mb_intra= 0;
                     s->mv[0][0][0] = s->b_forw_mv_table[xy][0];
                     s->mv[0][0][1] = s->b_forw_mv_table[xy][1];
-                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD, pb, pb2, tex_pb,
                                  &dmin, &next_block, s->mv[0][0][0], s->mv[0][0][1]);
                 }
                 if(mb_type&CANDIDATE_MB_TYPE_BACKWARD){
@@ -4910,7 +5055,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
                     s->mb_intra= 0;
                     s->mv[1][0][0] = s->b_back_mv_table[xy][0];
                     s->mv[1][0][1] = s->b_back_mv_table[xy][1];
-                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD, pb, pb2, tex_pb,
                                  &dmin, &next_block, s->mv[1][0][0], s->mv[1][0][1]);
                 }
                 if(mb_type&CANDIDATE_MB_TYPE_BIDIR){
@@ -4921,23 +5066,23 @@ static int encode_thread(AVCodecContext *c, void *arg){
                     s->mv[0][0][1] = s->b_bidir_forw_mv_table[xy][1];
                     s->mv[1][0][0] = s->b_bidir_back_mv_table[xy][0];
                     s->mv[1][0][1] = s->b_bidir_back_mv_table[xy][1];
-                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR, pb, pb2, tex_pb,
                                  &dmin, &next_block, 0, 0);
                 }
                 if(mb_type&CANDIDATE_MB_TYPE_DIRECT){
                     int mx= s->b_direct_mv_table[xy][0];
                     int my= s->b_direct_mv_table[xy][1];
-                    
+
                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD | MV_DIRECT;
                     s->mb_intra= 0;
 /* xine: do not need this for decode or MPEG-1 encoding modes */
 #if 0
                     ff_mpeg4_set_direct_mv(s, mx, my);
 #endif /* #if 0 */
-                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_DIRECT, pb, pb2, tex_pb,
                                  &dmin, &next_block, mx, my);
                 }
-                if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){ 
+                if(mb_type&CANDIDATE_MB_TYPE_FORWARD_I){
                     s->mv_dir = MV_DIR_FORWARD;
                     s->mv_type = MV_TYPE_FIELD;
                     s->mb_intra= 0;
@@ -4946,10 +5091,10 @@ static int encode_thread(AVCodecContext *c, void *arg){
                         s->mv[0][i][0] = s->b_field_mv_table[0][i][j][xy][0];
                         s->mv[0][i][1] = s->b_field_mv_table[0][i][j][xy][1];
                     }
-                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_FORWARD_I, pb, pb2, tex_pb,
                                  &dmin, &next_block, 0, 0);
                 }
-                if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){ 
+                if(mb_type&CANDIDATE_MB_TYPE_BACKWARD_I){
                     s->mv_dir = MV_DIR_BACKWARD;
                     s->mv_type = MV_TYPE_FIELD;
                     s->mb_intra= 0;
@@ -4958,10 +5103,10 @@ static int encode_thread(AVCodecContext *c, void *arg){
                         s->mv[1][i][0] = s->b_field_mv_table[1][i][j][xy][0];
                         s->mv[1][i][1] = s->b_field_mv_table[1][i][j][xy][1];
                     }
-                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BACKWARD_I, pb, pb2, tex_pb,
                                  &dmin, &next_block, 0, 0);
                 }
-                if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){ 
+                if(mb_type&CANDIDATE_MB_TYPE_BIDIR_I){
                     s->mv_dir = MV_DIR_FORWARD | MV_DIR_BACKWARD;
                     s->mv_type = MV_TYPE_FIELD;
                     s->mb_intra= 0;
@@ -4972,7 +5117,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
                             s->mv[dir][i][1] = s->b_field_mv_table[dir][i][j][xy][1];
                         }
                     }
-                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_BIDIR_I, pb, pb2, tex_pb,
                                  &dmin, &next_block, 0, 0);
                 }
                 if(mb_type&CANDIDATE_MB_TYPE_INTRA){
@@ -4981,7 +5126,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
                     s->mb_intra= 1;
                     s->mv[0][0][0] = 0;
                     s->mv[0][0][1] = 0;
-                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb, 
+                    encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTRA, pb, pb2, tex_pb,
                                  &dmin, &next_block, 0, 0);
                     if(s->h263_pred || s->h263_aic){
                         if(best_s.mb_intra)
@@ -4997,7 +5142,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
                         int dquant, dir, qp, dc[6];
                         DCTELEM ac[6][16];
                         const int mvdir= (best_s.mv_dir&MV_DIR_BACKWARD) ? 1 : 0;
-                        
+
                         assert(backup_s.dquant == 0);
 
                         //FIXME intra
@@ -5008,7 +5153,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
                         s->mv[0][0][1] = best_s.mv[0][0][1];
                         s->mv[1][0][0] = best_s.mv[1][0][0];
                         s->mv[1][0][1] = best_s.mv[1][0][1];
-                        
+
                         dir= s->pict_type == B_TYPE ? 2 : 1;
                         if(last_qp + dir > s->avctx->qmax) dir= -dir;
                         for(dquant= dir; dquant<=2 && dquant>=-2; dquant += dir){
@@ -5023,7 +5168,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
                                 }
                             }
 
-                            encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb, 
+                            encode_mb_hq(s, &backup_s, &best_s, CANDIDATE_MB_TYPE_INTER /* wrong but unused */, pb, pb2, tex_pb,
                                          &dmin, &next_block, s->mv[mvdir][0][0], s->mv[mvdir][0][1]);
                             if(best_s.qscale != qp){
                                 if(s->mb_intra && s->dc_val[0]){
@@ -5045,28 +5190,28 @@ static int encode_thread(AVCodecContext *c, void *arg){
                 }
 
                 copy_context_after_encode(s, &best_s, -1);
-                
+
                 pb_bits_count= put_bits_count(&s->pb);
                 flush_put_bits(&s->pb);
                 ff_copy_bits(&backup_s.pb, bit_buf[next_block^1], pb_bits_count);
                 s->pb= backup_s.pb;
-                
+
                 if(s->data_partitioning){
                     pb2_bits_count= put_bits_count(&s->pb2);
                     flush_put_bits(&s->pb2);
                     ff_copy_bits(&backup_s.pb2, bit_buf2[next_block^1], pb2_bits_count);
                     s->pb2= backup_s.pb2;
-                    
+
                     tex_pb_bits_count= put_bits_count(&s->tex_pb);
                     flush_put_bits(&s->tex_pb);
                     ff_copy_bits(&backup_s.tex_pb, bit_buf_tex[next_block^1], tex_pb_bits_count);
                     s->tex_pb= backup_s.tex_pb;
                 }
                 s->last_bits= put_bits_count(&s->pb);
-               
+
                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
                     ff_h263_update_motion_val(s);
-        
+
                 if(next_block==0){ //FIXME 16 vs linesize16
                     s->dsp.put_pixels_tab[0][0](s->dest[0], s->rd_scratchpad                     , s->linesize  ,16);
                     s->dsp.put_pixels_tab[1][0](s->dest[1], s->rd_scratchpad + 16*s->linesize    , s->uvlinesize, 8);
@@ -5079,7 +5224,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
                 int motion_x, motion_y;
                 s->mv_type=MV_TYPE_16X16;
                 // only one MB-Type possible
-                
+
                 switch(mb_type){
                 case CANDIDATE_MB_TYPE_INTRA:
                     s->mv_dir = 0;
@@ -5191,10 +5336,10 @@ static int encode_thread(AVCodecContext *c, void *arg){
 
                 // RAL: Update last macroblock type
                 s->last_mv_dir = s->mv_dir;
-            
+
                 if (s->out_format == FMT_H263 && s->pict_type!=B_TYPE)
                     ff_h263_update_motion_val(s);
-		
+
                 MPV_decode_mb(s, s->block);
             }
 
@@ -5203,7 +5348,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
                 s->p_mv_table[xy][0]=0;
                 s->p_mv_table[xy][1]=0;
             }
-            
+
             if(s->flags&CODEC_FLAG_PSNR){
                 int w= 16;
                 int h= 16;
@@ -5211,13 +5356,13 @@ static int encode_thread(AVCodecContext *c, void *arg){
                 if(s->mb_x*16 + 16 > s->width ) w= s->width - s->mb_x*16;
                 if(s->mb_y*16 + 16 > s->height) h= s->height- s->mb_y*16;
 
-                s->current_picture_ptr->error[0] += sse(
+                s->current_picture.error[0] += sse(
                     s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*s->linesize*16,
                     s->dest[0], w, h, s->linesize);
-                s->current_picture_ptr->error[1] += sse(
+                s->current_picture.error[1] += sse(
                     s, s->new_picture.data[1] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
                     s->dest[1], w>>1, h>>1, s->uvlinesize);
-                s->current_picture_ptr->error[2] += sse(
+                s->current_picture.error[2] += sse(
                     s, s->new_picture    .data[2] + s->mb_x*8  + s->mb_y*s->uvlinesize*8,
                     s->dest[2], w>>1, h>>1, s->uvlinesize);
             }
@@ -5238,7 +5383,7 @@ static int encode_thread(AVCodecContext *c, void *arg){
 
     write_slice_end(s);
 
-    /* Send the last GOB if RTP */    
+    /* Send the last GOB if RTP */
     if (s->avctx->rtp_callback) {
         int number_mb = (mb_y - s->resync_mb_y)*s->mb_width - s->resync_mb_x;
         pdif = pbBufPtr(&s->pb) - s->ptr_lastgob;
@@ -5272,6 +5417,9 @@ static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src)
     MERGE(misc_bits);
     MERGE(error_count);
     MERGE(padding_bug_score);
+    MERGE(current_picture.error[0]);
+    MERGE(current_picture.error[1]);
+    MERGE(current_picture.error[2]);
 
     if(dst->avctx->noise_reduction){
         for(i=0; i<64; i++){
@@ -5279,20 +5427,48 @@ static void merge_context_after_encode(MpegEncContext *dst, MpegEncContext *src)
             MERGE(dct_error_sum[1][i]);
         }
     }
-    
+
     assert(put_bits_count(&src->pb) % 8 ==0);
     assert(put_bits_count(&dst->pb) % 8 ==0);
     ff_copy_bits(&dst->pb, src->pb.buf, put_bits_count(&src->pb));
     flush_put_bits(&dst->pb);
 }
 
+static void estimate_qp(MpegEncContext *s, int dry_run){
+    if (!s->fixed_qscale)
+        s->current_picture_ptr->quality=
+        s->current_picture.quality = ff_rate_estimate_qscale(s, dry_run);
+
+    if(s->adaptive_quant){
+/* xine: do not need this for decode or MPEG-1 encoding modes */
+#if 0
+        switch(s->codec_id){
+        case CODEC_ID_MPEG4:
+            ff_clean_mpeg4_qscales(s);
+            break;
+        case CODEC_ID_H263:
+        case CODEC_ID_H263P:
+        case CODEC_ID_FLV1:
+            ff_clean_h263_qscales(s);
+            break;
+        }
+#endif /* #if 0 */
+
+        s->lambda= s->lambda_table[0];
+        //FIXME broken
+    }else
+        s->lambda= s->current_picture.quality;
+//printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
+    update_qscale(s);
+}
+
 static void encode_picture(MpegEncContext *s, int picture_number)
 {
     int i;
     int bits;
 
     s->picture_number = picture_number;
-    
+
     /* Reset the average MB variance */
     s->me.mb_var_sum_temp    =
     s->me.mc_mb_var_sum_temp = 0;
@@ -5304,19 +5480,30 @@ static void encode_picture(MpegEncContext *s, int picture_number)
     if (s->codec_id == CODEC_ID_MPEG1VIDEO || s->codec_id == CODEC_ID_MPEG2VIDEO || (s->h263_pred && !s->h263_msmpeg4))
         ff_set_mpeg4_time(s, s->picture_number);  //FIXME rename and use has_b_frames or similar
 #endif /* #if 0 */
-        
+
     s->me.scene_change_score=0;
-    
+
 //    s->lambda= s->current_picture_ptr->quality; //FIXME qscale / ... stuff for ME ratedistoration
-    
+
     if(s->pict_type==I_TYPE){
         if(s->msmpeg4_version >= 3) s->no_rounding=1;
         else                        s->no_rounding=0;
     }else if(s->pict_type!=B_TYPE){
         if(s->flipflop_rounding || s->codec_id == CODEC_ID_H263P || s->codec_id == CODEC_ID_MPEG4)
-            s->no_rounding ^= 1;          
+            s->no_rounding ^= 1;
     }
-    
+
+    if(s->flags & CODEC_FLAG_PASS2){
+        estimate_qp(s, 1);
+        ff_get_2pass_fcode(s);
+    }else if(!(s->flags & CODEC_FLAG_QSCALE)){
+        if(s->pict_type==B_TYPE)
+            s->lambda= s->last_lambda_for[s->pict_type];
+        else
+            s->lambda= s->last_lambda_for[s->last_non_b_pict_type];
+        update_qscale(s);
+    }
+
     s->mb_intra=0; //for the rate distortion & bit compare functions
     for(i=1; i<s->avctx->thread_count; i++){
         ff_update_duplicate_context(s->thread_context[i], s);
@@ -5343,7 +5530,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
         /* I-Frame */
         for(i=0; i<s->mb_stride*s->mb_height; i++)
             s->mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
-        
+
         if(!s->fixed_qscale){
             /* finding spatial complexity for I-frame rate control */
             s->avctx->execute(s->avctx, mb_var_thread, (void**)&(s->thread_context[0]), NULL, s->avctx->thread_count);
@@ -5375,14 +5562,14 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                 b= ff_get_best_fcode(s, s->p_field_mv_table[1][1], CANDIDATE_MB_TYPE_INTER_I);
                 s->f_code= FFMAX(s->f_code, FFMAX(a,b));
             }
-                    
+
             ff_fix_long_p_mvs(s);
             ff_fix_long_mvs(s, NULL, 0, s->p_mv_table, s->f_code, CANDIDATE_MB_TYPE_INTER, 0);
             if(s->flags & CODEC_FLAG_INTERLACED_ME){
                 int j;
                 for(i=0; i<2; i++){
                     for(j=0; j<2; j++)
-                        ff_fix_long_mvs(s, s->p_field_select_table[i], j, 
+                        ff_fix_long_mvs(s, s->p_field_select_table[i], j,
                                         s->p_field_mv_table[i][j], s->f_code, CANDIDATE_MB_TYPE_INTER_I, 0);
                 }
             }
@@ -5408,9 +5595,9 @@ static void encode_picture(MpegEncContext *s, int picture_number)
                 for(dir=0; dir<2; dir++){
                     for(i=0; i<2; i++){
                         for(j=0; j<2; j++){
-                            int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I) 
+                            int type= dir ? (CANDIDATE_MB_TYPE_BACKWARD_I|CANDIDATE_MB_TYPE_BIDIR_I)
                                           : (CANDIDATE_MB_TYPE_FORWARD_I |CANDIDATE_MB_TYPE_BIDIR_I);
-                            ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j, 
+                            ff_fix_long_mvs(s, s->b_field_select_table[dir][i], j,
                                             s->b_field_mv_table[dir][i][j], dir ? s->b_code : s->f_code, type, 1);
                         }
                     }
@@ -5420,34 +5607,11 @@ static void encode_picture(MpegEncContext *s, int picture_number)
     }
 #endif /* #if 0 */
 
-    if (!s->fixed_qscale) 
-        s->current_picture.quality = ff_rate_estimate_qscale(s); //FIXME pic_ptr
+    estimate_qp(s, 0);
 
-    if(s->adaptive_quant){
-/* xine: do not need this for decode or MPEG-1 encoding modes */
-#if 0
-        switch(s->codec_id){
-        case CODEC_ID_MPEG4:
-            ff_clean_mpeg4_qscales(s);
-            break;
-        case CODEC_ID_H263:
-        case CODEC_ID_H263P:
-        case CODEC_ID_FLV1:
-            ff_clean_h263_qscales(s);
-            break;
-        }
-#endif /* #if 0 */
-
-        s->lambda= s->lambda_table[0];
-        //FIXME broken
-    }else
-        s->lambda= s->current_picture.quality;
-//printf("%d %d\n", s->avctx->global_quality, s->current_picture.quality);
-    update_qscale(s);
-    
-    if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE)) 
+    if(s->qscale < 3 && s->max_qcoeff<=128 && s->pict_type==I_TYPE && !(s->flags & CODEC_FLAG_QSCALE))
         s->qscale= 3; //reduce clipping problems
-        
+
     if (s->out_format == FMT_MJPEG) {
         /* for mjpeg, we do include qscale in the matrix */
         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
@@ -5456,11 +5620,11 @@ static void encode_picture(MpegEncContext *s, int picture_number)
 
             s->intra_matrix[j] = clip_uint8((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3) & 0xFF;
         }
-        convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16, 
+        convert_matrix(&s->dsp, s->q_intra_matrix, s->q_intra_matrix16,
                        s->intra_matrix, s->intra_quant_bias, 8, 8, 1);
         s->qscale= 8;
     }
-    
+
     //FIXME var duplication
     s->current_picture_ptr->key_frame=
     s->current_picture.key_frame= s->pict_type == I_TYPE; //FIXME pic_ptr
@@ -5483,18 +5647,18 @@ static void encode_picture(MpegEncContext *s, int picture_number)
         break;
 #endif
     case FMT_H263:
-        if (s->codec_id == CODEC_ID_WMV2) 
+        if (s->codec_id == CODEC_ID_WMV2)
             ff_wmv2_encode_picture_header(s, picture_number);
-        else if (s->h263_msmpeg4) 
+        else if (s->h263_msmpeg4)
             msmpeg4_encode_picture_header(s, picture_number);
         else if (s->h263_pred)
             mpeg4_encode_picture_header(s, picture_number);
 #ifdef CONFIG_RV10_ENCODER
-        else if (s->codec_id == CODEC_ID_RV10) 
+        else if (s->codec_id == CODEC_ID_RV10)
             rv10_encode_picture_header(s, picture_number);
 #endif
 #ifdef CONFIG_RV20_ENCODER
-        else if (s->codec_id == CODEC_ID_RV20) 
+        else if (s->codec_id == CODEC_ID_RV20)
             rv20_encode_picture_header(s, picture_number);
 #endif
         else if (s->codec_id == CODEC_ID_FLV1)
@@ -5516,7 +5680,7 @@ static void encode_picture(MpegEncContext *s, int picture_number)
     }
     bits= put_bits_count(&s->pb);
     s->header_bits= bits - s->last_bits;
-        
+
     for(i=1; i<s->avctx->thread_count; i++){
         update_duplicate_context_after_me(s->thread_context[i], s);
     }
@@ -5555,7 +5719,7 @@ static void  denoise_dct_c(MpegEncContext *s, DCTELEM *block){
 
 #ifdef CONFIG_ENCODERS
 
-static int dct_quantize_trellis_c(MpegEncContext *s, 
+static int dct_quantize_trellis_c(MpegEncContext *s,
                         DCTELEM *block, int n,
                         int qscale, int *overflow){
     const int *qmat;
@@ -5580,9 +5744,9 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
     uint8_t * length;
     uint8_t * last_length;
     const int lambda= s->lambda2 >> (FF_LAMBDA_SHIFT - 6);
-        
+
     s->dsp.fdct (block);
-    
+
     if(s->dct_error_sum)
         s->denoise_dct(s, block);
     qmul= qscale*16;
@@ -5601,7 +5765,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
             q = 1 << 3;
             qadd=0;
         }
-            
+
         /* note: block[0] is assumed to be positive */
         block[0] = (block[0] + (q >> 1)) / q;
         start_i = 1;
@@ -5659,9 +5823,9 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
             coeff_count[i]= 1;
         }
     }
-    
+
     *overflow= s->max_qcoeff < max; //overflow might have happened
-    
+
     if(last_non_zero < start_i){
         memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
         return last_non_zero;
@@ -5670,7 +5834,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
     score_tab[start_i]= 0;
     survivor[0]= start_i;
     survivor_count= 1;
-    
+
     for(i=start_i; i<=last_non_zero; i++){
         int level_index, j;
         const int dct_coeff= ABS(block[ scantable[i] ]);
@@ -5681,7 +5845,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
             int level= coeff[level_index][i];
             const int alevel= ABS(level);
             int unquant_coeff;
-            
+
             assert(level);
 
             if(s->out_format == FMT_H263){
@@ -5705,7 +5869,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
                     int run= i - survivor[j];
                     int score= distoration + length[UNI_AC_ENC_INDEX(run, level)]*lambda;
                     score += score_tab[i-run];
-                    
+
                     if(score < best_score){
                         best_score= score;
                         run_tab[i+1]= run;
@@ -5731,7 +5895,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
                 for(j=survivor_count-1; j>=0; j--){
                     int run= i - survivor[j];
                     int score= distoration + score_tab[i-run];
-                    
+
                     if(score < best_score){
                         best_score= score;
                         run_tab[i+1]= run;
@@ -5753,7 +5917,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
                 }
             }
         }
-        
+
         score_tab[i+1]= best_score;
 
         //Note: there is a vlc code in mpeg4 which is 1 bit shorter then another one with a shorter run and the same level
@@ -5788,18 +5952,18 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
     }
 
     s->coded_score[n] = last_score;
-    
+
     dc= ABS(block[0]);
     last_non_zero= last_i - 1;
     memset(block + start_i, 0, (64-start_i)*sizeof(DCTELEM));
-    
+
     if(last_non_zero < start_i)
         return last_non_zero;
 
     if(last_non_zero == 0 && start_i == 0){
         int best_level= 0;
         int best_score= dc * dc;
-        
+
         for(i=0; i<coeff_count[0]; i++){
             int level= coeff[i][0];
             int alevel= ABS(level);
@@ -5835,7 +5999,7 @@ static int dct_quantize_trellis_c(MpegEncContext *s,
 
     block[ perm_scantable[last_non_zero] ]= last_level;
     i -= last_run + 1;
-    
+
     for(; i>start_i; i -= run_tab[i] + 1){
         block[ perm_scantable[i-1] ]= level_tab[i];
     }
@@ -5869,7 +6033,7 @@ static int dct_quantize_refine(MpegEncContext *s, //FIXME breaks denoise?
                         DCTELEM *block, int16_t *weight, DCTELEM *orig,
                         int n, int qscale){
     int16_t rem[64];
-    DCTELEM d1[64] __align16;
+    DECLARE_ALIGNED_16(DCTELEM, d1[64]);
     const int *qmat;
     const uint8_t *scantable= s->intra_scantable.scantable;
     const uint8_t *perm_scantable= s->intra_scantable.permutated;
@@ -5895,7 +6059,7 @@ static int messed_sign=0;
 
     if(basis[0][0] == 0)
         build_basis(s->dsp.idct_permutation);
-    
+
     qmul= qscale*2;
     qadd= (qscale-1)|1;
     if (s->mb_intra) {
@@ -5949,7 +6113,7 @@ STOP_TIMER("memset rem[]")}
 
         weight[i] = w;
 //        w=weight[i] = (63*qns + (w/2)) / w;
-         
+
         assert(w>0);
         assert(w<(1<<6));
         sum += w*w;
@@ -5964,7 +6128,7 @@ STOP_TIMER("memset rem[]")}
         int j= perm_scantable[i];
         const int level= block[j];
         int coeff;
-        
+
         if(level){
             if(level<0) coeff= qmul*level - qadd;
             else        coeff= qmul*level + qadd;
@@ -6000,7 +6164,7 @@ STOP_TIMER("init rem[]")
 #endif
             for(i=0; i<64; i++){
                 int w= weight[i];
-            
+
                 d1[i] = (rem[i]*w*w + (1<<(RECON_SHIFT+12-1)))>>(RECON_SHIFT+12);
             }
 #ifdef REFINE_STATS
@@ -6018,13 +6182,13 @@ STOP_TIMER("dct")}
             int change, old_coeff;
 
             assert(s->mb_intra);
-            
+
             old_coeff= q*level;
-            
+
             for(change=-1; change<=1; change+=2){
                 int new_level= level + change;
                 int score, new_coeff;
-                
+
                 new_coeff= q*new_level;
                 if(new_coeff >= 2048 || new_coeff < 0)
                     continue;
@@ -6038,7 +6202,7 @@ STOP_TIMER("dct")}
                 }
             }
         }
-        
+
         run=0;
         rle_index=0;
         run2= run_tab[rle_index++];
@@ -6062,11 +6226,11 @@ STOP_TIMER("dct")}
                 run2--;
                 assert(run2>=0 || i >= last_non_zero );
             }
-            
+
             for(change=-1; change<=1; change+=2){
                 int new_level= level + change;
                 int score, new_coeff, unquant_change;
-                
+
                 score=0;
                 if(s->avctx->quantizer_noise_shaping < 2 && ABS(new_level) > ABS(level))
                    continue;
@@ -6077,7 +6241,7 @@ STOP_TIMER("dct")}
                     if(new_coeff >= 2048 || new_coeff <= -2048)
                         continue;
                     //FIXME check for overflow
-                    
+
                     if(level){
                         if(level < 63 && level > -63){
                             if(i < last_non_zero)
@@ -6089,7 +6253,7 @@ STOP_TIMER("dct")}
                         }
                     }else{
                         assert(ABS(new_level)==1);
-                        
+
                         if(analyze_gradient){
                             int g= d1[ scantable[i] ];
                             if(g && (g^new_level) >= 0)
@@ -6099,7 +6263,7 @@ STOP_TIMER("dct")}
                         if(i < last_non_zero){
                             int next_i= i + run2 + 1;
                             int next_level= block[ perm_scantable[next_i] ] + 64;
-                            
+
                             if(next_level&(~127))
                                 next_level= 0;
 
@@ -6126,7 +6290,7 @@ STOP_TIMER("dct")}
                     if(i < last_non_zero){
                         int next_i= i + run2 + 1;
                         int next_level= block[ perm_scantable[next_i] ] + 64;
-                            
+
                         if(next_level&(~127))
                             next_level= 0;
 
@@ -6146,12 +6310,12 @@ STOP_TIMER("dct")}
                         }
                     }
                 }
-                
+
                 score *= lambda;
 
                 unquant_change= new_coeff - old_coeff;
                 assert((score < 100*lambda && score > -100*lambda) || lambda==0);
-                
+
                 score+= s->dsp.try_8x8basis(rem, weight, basis[j], unquant_change);
                 if(score<best_score){
                     best_score= score;
@@ -6176,9 +6340,9 @@ STOP_TIMER("iterative step")}
 
         if(best_change){
             int j= perm_scantable[ best_coeff ];
-            
+
             block[j] += best_change;
-            
+
             if(best_coeff > last_non_zero){
                 last_non_zero= best_coeff;
                 assert(block[j]);
@@ -6217,7 +6381,7 @@ if(256*256*256*64 % count == 0){
             for(i=start_i; i<=last_non_zero; i++){
                 int j= perm_scantable[i];
                 const int level= block[j];
-        
+
                  if(level){
                      run_tab[rle_index++]=run;
                      run=0;
@@ -6225,7 +6389,7 @@ if(256*256*256*64 % count == 0){
                      run++;
                  }
             }
-            
+
             s->dsp.add_8x8basis(rem, basis[j], best_unquant_change);
         }else{
             break;
@@ -6241,7 +6405,7 @@ STOP_TIMER("iterative search")
     return last_non_zero;
 }
 
-static int dct_quantize_c(MpegEncContext *s, 
+static int dct_quantize_c(MpegEncContext *s,
                         DCTELEM *block, int n,
                         int qscale, int *overflow)
 {
@@ -6267,7 +6431,7 @@ static int dct_quantize_c(MpegEncContext *s,
         } else
             /* For AIC we skip quant/dequant of INTRADC */
             q = 1 << 3;
-            
+
         /* note: block[0] is assumed to be positive */
         block[0] = (block[0] + (q >> 1)) / q;
         start_i = 1;
@@ -6313,25 +6477,25 @@ static int dct_quantize_c(MpegEncContext *s,
         }
     }
     *overflow= s->max_qcoeff < max; //overflow might have happened
-    
+
     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
     if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
-	ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
+        ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
 
     return last_non_zero;
 }
 
 #endif //CONFIG_ENCODERS
 
-static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s, 
+static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
                                    DCTELEM *block, int n, int qscale)
 {
     int i, level, nCoeffs;
     const uint16_t *quant_matrix;
 
     nCoeffs= s->block_last_index[n];
-    
-    if (n < 4) 
+
+    if (n < 4)
         block[0] = block[0] * s->y_dc_scale;
     else
         block[0] = block[0] * s->c_dc_scale;
@@ -6355,14 +6519,14 @@ static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s,
     }
 }
 
-static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s, 
+static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
                                    DCTELEM *block, int n, int qscale)
 {
     int i, level, nCoeffs;
     const uint16_t *quant_matrix;
 
     nCoeffs= s->block_last_index[n];
-    
+
     quant_matrix = s->inter_matrix;
     for(i=0; i<=nCoeffs; i++) {
         int j= s->intra_scantable.permutated[i];
@@ -6384,7 +6548,7 @@ static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s,
     }
 }
 
-static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s, 
+static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
                                    DCTELEM *block, int n, int qscale)
 {
     int i, level, nCoeffs;
@@ -6392,8 +6556,8 @@ static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
 
     if(s->alternate_scan) nCoeffs= 63;
     else nCoeffs= s->block_last_index[n];
-    
-    if (n < 4) 
+
+    if (n < 4)
         block[0] = block[0] * s->y_dc_scale;
     else
         block[0] = block[0] * s->c_dc_scale;
@@ -6414,7 +6578,7 @@ static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s,
     }
 }
 
-static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s, 
+static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
                                    DCTELEM *block, int n, int qscale)
 {
     int i, level, nCoeffs;
@@ -6423,7 +6587,7 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
 
     if(s->alternate_scan) nCoeffs= 63;
     else nCoeffs= s->block_last_index[n];
-    
+
     quant_matrix = s->inter_matrix;
     for(i=0; i<=nCoeffs; i++) {
         int j= s->intra_scantable.permutated[i];
@@ -6445,18 +6609,18 @@ static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s,
     block[63]^=sum&1;
 }
 
-static void dct_unquantize_h263_intra_c(MpegEncContext *s, 
+static void dct_unquantize_h263_intra_c(MpegEncContext *s,
                                   DCTELEM *block, int n, int qscale)
 {
     int i, level, qmul, qadd;
     int nCoeffs;
-    
+
     assert(s->block_last_index[n]>=0);
-    
+
     qmul = qscale << 1;
-    
+
     if (!s->h263_aic) {
-        if (n < 4) 
+        if (n < 4)
             block[0] = block[0] * s->y_dc_scale;
         else
             block[0] = block[0] * s->c_dc_scale;
@@ -6482,17 +6646,17 @@ static void dct_unquantize_h263_intra_c(MpegEncContext *s,
     }
 }
 
-static void dct_unquantize_h263_inter_c(MpegEncContext *s, 
+static void dct_unquantize_h263_inter_c(MpegEncContext *s,
                                   DCTELEM *block, int n, int qscale)
 {
     int i, level, qmul, qadd;
     int nCoeffs;
-    
+
     assert(s->block_last_index[n]>=0);
-    
+
     qadd = (qscale - 1) | 1;
     qmul = qscale << 1;
-    
+
     nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ];
 
     for(i=0; i<=nCoeffs; i++) {
diff --git a/src/libffmpeg/libavcodec/mpegvideo.h b/src/libffmpeg/libavcodec/mpegvideo.h
index 9e02fdca3..888b0b608 100644
--- a/src/libffmpeg/libavcodec/mpegvideo.h
+++ b/src/libffmpeg/libavcodec/mpegvideo.h
@@ -15,14 +15,14 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file mpegvideo.h
  * mpegvideo header.
  */
- 
+
 #ifndef AVCODEC_MPEGVIDEO_H
 #define AVCODEC_MPEGVIDEO_H
 
@@ -35,7 +35,7 @@ enum OutputFormat {
     FMT_MPEG1,
     FMT_H261,
     FMT_H263,
-    FMT_MJPEG, 
+    FMT_MJPEG,
     FMT_H264,
 };
 
@@ -70,6 +70,8 @@ enum OutputFormat {
 
 #define MAX_MB_BYTES (30*16*16*3/8 + 120)
 
+#define INPLACE_OFFSET 16
+
 typedef struct Predictor{
     double coeff;
     double count;
@@ -83,12 +85,14 @@ typedef struct RateControlEntry{
     int i_tex_bits;
     int p_tex_bits;
     int misc_bits;
+    int header_bits;
     uint64_t expected_bits;
     int new_pict_type;
     float new_qscale;
     int mc_mb_var_sum;
     int mb_var_sum;
     int i_count;
+    int skip_count;
     int f_code;
     int b_code;
 }RateControlEntry;
@@ -98,16 +102,16 @@ typedef struct RateControlEntry{
  */
 typedef struct RateControlContext{
     FILE *stats_file;
-    int num_entries;              ///< number of RateControlEntries 
+    int num_entries;              ///< number of RateControlEntries
     RateControlEntry *entry;
-    double buffer_index;          ///< amount of bits in the video/audio buffer 
+    double buffer_index;          ///< amount of bits in the video/audio buffer
     Predictor pred[5];
-    double short_term_qsum;       ///< sum of recent qscales 
-    double short_term_qcount;     ///< count of recent qscales 
-    double pass1_rc_eq_output_sum;///< sum of the output of the rc equation, this is used for normalization  
-    double pass1_wanted_bits;     ///< bits which should have been outputed by the pass1 code (including complexity init) 
+    double short_term_qsum;       ///< sum of recent qscales
+    double short_term_qcount;     ///< count of recent qscales
+    double pass1_rc_eq_output_sum;///< sum of the output of the rc equation, this is used for normalization
+    double pass1_wanted_bits;     ///< bits which should have been outputed by the pass1 code (including complexity init)
     double last_qscale;
-    double last_qscale_for[5];    ///< last qscale for a specific pict type, used for max_diff & ipb factor stuff 
+    double last_qscale_for[5];    ///< last qscale for a specific pict type, used for max_diff & ipb factor stuff
     int last_mc_mb_var_sum;
     int last_mb_var_sum;
     uint64_t i_cplx_sum[5];
@@ -116,6 +120,9 @@ typedef struct RateControlContext{
     uint64_t qscale_sum[5];
     int frame_count[5];
     int last_non_b_pict_type;
+
+    void *non_lavc_opaque;        ///< context for non lavc rc code (for example xvid)
+    float dry_run_qscale;         ///< for xvid rc
 }RateControlContext;
 
 /**
@@ -126,8 +133,8 @@ typedef struct ScanTable{
     uint8_t permutated[64];
     uint8_t raster_end[64];
 #ifdef ARCH_POWERPC
-		/** Used by dct_quantise_alitvec to find last-non-zero */
-    uint8_t __align8 inverse[64];
+                /** Used by dct_quantise_alitvec to find last-non-zero */
+    DECLARE_ALIGNED_8(uint8_t, inverse[64]);
 #endif
 } ScanTable;
 
@@ -176,12 +183,12 @@ typedef struct Picture{
     int ref_poc[2][16];         ///< h264 POCs of the frames used as reference
     int ref_count[2];           ///< number of entries in ref_poc
 
-    int mb_var_sum;             ///< sum of MB variance for current frame 
-    int mc_mb_var_sum;          ///< motion compensated MB variance for current frame 
-    uint16_t *mb_var;           ///< Table for MB variances 
-    uint16_t *mc_mb_var;        ///< Table for motion compensated MB variances 
-    uint8_t *mb_mean;           ///< Table for MB luminance 
-    int32_t *mb_cmp_score;	///< Table for MB cmp scores, for mb decision FIXME remove
+    int mb_var_sum;             ///< sum of MB variance for current frame
+    int mc_mb_var_sum;          ///< motion compensated MB variance for current frame
+    uint16_t *mb_var;           ///< Table for MB variances
+    uint16_t *mc_mb_var;        ///< Table for motion compensated MB variances
+    uint8_t *mb_mean;           ///< Table for MB luminance
+    int32_t *mb_cmp_score;      ///< Table for MB cmp scores, for mb decision FIXME remove
     int b_frame_score;          /* */
 } Picture;
 
@@ -189,7 +196,7 @@ typedef struct ParseContext{
     uint8_t *buffer;
     int index;
     int last_index;
-    int buffer_size;
+    unsigned int buffer_size;
     uint32_t state;             ///< contains the last few bytes in MSB order
     int frame_start_found;
     int overread;               ///< the number of bytes which where irreversibly read from the next frame
@@ -203,17 +210,17 @@ struct MpegEncContext;
  */
 typedef struct MotionEstContext{
     AVCodecContext *avctx;
-    int skip;                          ///< set if ME is skipped for the current MB 
-    int co_located_mv[4][2];           ///< mv from last p frame for direct mode ME 
+    int skip;                          ///< set if ME is skipped for the current MB
+    int co_located_mv[4][2];           ///< mv from last p frame for direct mode ME
     int direct_basis_mv[4][2];
-    uint8_t *scratchpad;               ///< data area for the me algo, so that the ME doesnt need to malloc/free 
+    uint8_t *scratchpad;               ///< data area for the me algo, so that the ME doesnt need to malloc/free
     uint8_t *best_mb;
     uint8_t *temp_mb[2];
     uint8_t *temp;
     int best_bits;
-    uint32_t *map;                     ///< map to avoid duplicate evaluations 
-    uint32_t *score_map;               ///< map to store the scores 
-    int map_generation;  
+    uint32_t *map;                     ///< map to avoid duplicate evaluations
+    uint32_t *score_map;               ///< map to store the scores
+    int map_generation;
     int pre_penalty_factor;
     int penalty_factor;
     int sub_penalty_factor;
@@ -221,7 +228,7 @@ typedef struct MotionEstContext{
     int flags;
     int sub_flags;
     int mb_flags;
-    int pre_pass;                      ///< = 1 for the pre pass 
+    int pre_pass;                      ///< = 1 for the pre pass
     int dia_size;
     int xmin;
     int xmax;
@@ -242,10 +249,10 @@ typedef struct MotionEstContext{
     op_pixels_func (*hpel_avg)[4];
     qpel_mc_func (*qpel_put)[16];
     qpel_mc_func (*qpel_avg)[16];
-    uint8_t (*mv_penalty)[MAX_MV*2+1];  ///< amount of bits needed to encode a MV 
+    uint8_t (*mv_penalty)[MAX_MV*2+1];  ///< amount of bits needed to encode a MV
     uint8_t *current_mv_penalty;
     int (*sub_motion_search)(struct MpegEncContext * s,
-				  int *mx_ptr, int *my_ptr, int dmin,
+                                  int *mx_ptr, int *my_ptr, int dmin,
                                   int src_index, int ref_index,
                                   int size, int h);
 }MotionEstContext;
@@ -256,28 +263,28 @@ typedef struct MotionEstContext{
 typedef struct MpegEncContext {
     struct AVCodecContext *avctx;
     /* the following parameters must be initialized before encoding */
-    int width, height;///< picture size. must be a multiple of 16 
+    int width, height;///< picture size. must be a multiple of 16
     int gop_size;
-    int intra_only;   ///< if true, only intra pictures are generated 
-    int bit_rate;     ///< wanted bit rate 
-    enum OutputFormat out_format; ///< output format 
-    int h263_pred;    ///< use mpeg4/h263 ac/dc predictions 
+    int intra_only;   ///< if true, only intra pictures are generated
+    int bit_rate;     ///< wanted bit rate
+    enum OutputFormat out_format; ///< output format
+    int h263_pred;    ///< use mpeg4/h263 ac/dc predictions
 
 /* the following codec id fields are deprecated in favor of codec_id */
-    int h263_plus;    ///< h263 plus headers 
+    int h263_plus;    ///< h263 plus headers
     int h263_msmpeg4; ///< generate MSMPEG4 compatible stream (deprecated, use msmpeg4_version instead)
-    int h263_flv;     ///< use flv h263 header 
-    
+    int h263_flv;     ///< use flv h263 header
+
     enum CodecID codec_id;     /* see CODEC_ID_xxx */
-    int fixed_qscale; ///< fixed qscale if non zero 
-    int encoding;     ///< true if we are encoding (vs decoding) 
-    int flags;        ///< AVCodecContext.flags (HQ, MV4, ...) 
+    int fixed_qscale; ///< fixed qscale if non zero
+    int encoding;     ///< true if we are encoding (vs decoding)
+    int flags;        ///< AVCodecContext.flags (HQ, MV4, ...)
     int flags2;       ///< AVCodecContext.flags2
-    int max_b_frames; ///< max number of b-frames for encoding 
+    int max_b_frames; ///< max number of b-frames for encoding
     int luma_elim_threshold;
     int chroma_elim_threshold;
-    int strict_std_compliance; ///< strictly follow the std (MPEG4, ...) 
-    int workaround_bugs;       ///< workaround bugs in encoders which cannot be detected automatically 
+    int strict_std_compliance; ///< strictly follow the std (MPEG4, ...)
+    int workaround_bugs;       ///< workaround bugs in encoders which cannot be detected automatically
     /* the following fields are managed internally by the encoder */
 
     /** bit output */
@@ -288,148 +295,149 @@ typedef struct MpegEncContext {
     int input_picture_number;  ///< used to set pic->display_picture_number, shouldnt be used for/by anything else
     int coded_picture_number;  ///< used to set pic->coded_picture_number, shouldnt be used for/by anything else
     int picture_number;       //FIXME remove, unclear definition
-    int picture_in_gop_number; ///< 0-> first pic in gop, ... 
-    int b_frames_since_non_b;  ///< used for encoding, relative to not yet reordered input 
+    int picture_in_gop_number; ///< 0-> first pic in gop, ...
+    int b_frames_since_non_b;  ///< used for encoding, relative to not yet reordered input
     int64_t user_specified_pts;///< last non zero pts from AVFrame which was passed into avcodec_encode_video()
-    int mb_width, mb_height;   ///< number of MBs horizontally & vertically 
+    int mb_width, mb_height;   ///< number of MBs horizontally & vertically
     int mb_stride;             ///< mb_width+1 used for some arrays to allow simple addressing of left & top MBs without sig11
     int b8_stride;             ///< 2*mb_width+1 used for some 8x8 block arrays to allow simple addressing
     int b4_stride;             ///< 4*mb_width+1 used for some 4x4 block arrays to allow simple addressing
     int h_edge_pos, v_edge_pos;///< horizontal / vertical position of the right/bottom edge (pixel replication)
-    int mb_num;                ///< number of MBs of a picture 
-    int linesize;              ///< line size, in bytes, may be different from width 
-    int uvlinesize;            ///< line size, for chroma in bytes, may be different from width 
-    Picture *picture;          ///< main picture buffer 
+    int mb_num;                ///< number of MBs of a picture
+    int linesize;              ///< line size, in bytes, may be different from width
+    int uvlinesize;            ///< line size, for chroma in bytes, may be different from width
+    Picture *picture;          ///< main picture buffer
     Picture **input_picture;   ///< next pictures on display order for encoding
     Picture **reordered_input_picture; ///< pointer to the next pictures in codedorder for encoding
-    
+
     int start_mb_y;            ///< start mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y)
     int end_mb_y;              ///< end   mb_y of this thread (so current thread should process start_mb_y <= row < end_mb_y)
     struct MpegEncContext *thread_context[MAX_THREADS];
-    
-    /** 
+
+    /**
      * copy of the previous picture structure.
      * note, linesize & data, might not match the previous picture (for field pictures)
      */
-    Picture last_picture;       
-    
-    /** 
+    Picture last_picture;
+
+    /**
      * copy of the next picture structure.
      * note, linesize & data, might not match the next picture (for field pictures)
      */
     Picture next_picture;
-    
-    /** 
+
+    /**
      * copy of the source picture structure for encoding.
      * note, linesize & data, might not match the source picture (for field pictures)
      */
     Picture new_picture;
-    
-    /** 
+
+    /**
      * copy of the current picture structure.
      * note, linesize & data, might not match the current picture (for field pictures)
      */
-    Picture current_picture;    ///< buffer to store the decompressed current picture 
-    
+    Picture current_picture;    ///< buffer to store the decompressed current picture
+
     Picture *last_picture_ptr;     ///< pointer to the previous picture.
-    Picture *next_picture_ptr;     ///< pointer to the next picture (for bidir pred) 
+    Picture *next_picture_ptr;     ///< pointer to the next picture (for bidir pred)
     Picture *current_picture_ptr;  ///< pointer to the current picture
     uint8_t *visualization_buffer[3]; //< temporary buffer vor MV visualization
-    int last_dc[3];                ///< last DC values for MPEG1 
+    int last_dc[3];                ///< last DC values for MPEG1
     int16_t *dc_val_base;
-    int16_t *dc_val[3];            ///< used for mpeg4 DC prediction, all 3 arrays must be continuous 
+    int16_t *dc_val[3];            ///< used for mpeg4 DC prediction, all 3 arrays must be continuous
     int16_t dc_cache[4*5];
     int y_dc_scale, c_dc_scale;
-    const uint8_t *y_dc_scale_table;     ///< qscale -> y_dc_scale table 
-    const uint8_t *c_dc_scale_table;     ///< qscale -> c_dc_scale table 
+    const uint8_t *y_dc_scale_table;     ///< qscale -> y_dc_scale table
+    const uint8_t *c_dc_scale_table;     ///< qscale -> c_dc_scale table
     const uint8_t *chroma_qscale_table;  ///< qscale -> chroma_qscale (h263)
     uint8_t *coded_block_base;
     uint8_t *coded_block;          ///< used for coded block pattern prediction (msmpeg4v3, wmv1)
     int16_t (*ac_val_base)[16];
-    int16_t (*ac_val[3])[16];      ///< used for for mpeg4 AC prediction, all 3 arrays must be continuous 
+    int16_t (*ac_val[3])[16];      ///< used for for mpeg4 AC prediction, all 3 arrays must be continuous
     int ac_pred;
-    uint8_t *prev_pict_types;     ///< previous picture types in bitstream order, used for mb skip 
+    uint8_t *prev_pict_types;     ///< previous picture types in bitstream order, used for mb skip
 #define PREV_PICT_TYPES_BUFFER_SIZE 256
-    int mb_skipped;                ///< MUST BE SET only during DECODING 
-    uint8_t *mbskip_table;        /**< used to avoid copy if macroblock skipped (for black regions for example) 
+    int mb_skipped;                ///< MUST BE SET only during DECODING
+    uint8_t *mbskip_table;        /**< used to avoid copy if macroblock skipped (for black regions for example)
                                    and used for b-frame encoding & decoding (contains skip table of next P Frame) */
-    uint8_t *mbintra_table;       ///< used to avoid setting {ac, dc, cbp}-pred stuff to zero on inter MB decoding 
-    uint8_t *cbp_table;           ///< used to store cbp, ac_pred for partitioned decoding 
-    uint8_t *pred_dir_table;      ///< used to store pred_dir for partitioned decoding 
+    uint8_t *mbintra_table;       ///< used to avoid setting {ac, dc, cbp}-pred stuff to zero on inter MB decoding
+    uint8_t *cbp_table;           ///< used to store cbp, ac_pred for partitioned decoding
+    uint8_t *pred_dir_table;      ///< used to store pred_dir for partitioned decoding
     uint8_t *allocated_edge_emu_buffer;
     uint8_t *edge_emu_buffer;     ///< points into the middle of allocated_edge_emu_buffer
     uint8_t *rd_scratchpad;       ///< scratchpad for rate distortion mb decision
     uint8_t *obmc_scratchpad;
     uint8_t *b_scratchpad;        ///< scratchpad used for writing into write only buffers
 
-    int qscale;                 ///< QP 
-    int chroma_qscale;          ///< chroma QP 
+    int qscale;                 ///< QP
+    int chroma_qscale;          ///< chroma QP
     int lambda;                 ///< lagrange multipler used in rate distortion
-    int lambda2;                ///< (lambda*lambda) >> FF_LAMBDA_SHIFT 
+    int lambda2;                ///< (lambda*lambda) >> FF_LAMBDA_SHIFT
     int *lambda_table;
-    int adaptive_quant;         ///< use adaptive quantization 
-    int dquant;                 ///< qscale difference to prev qscale  
-    int pict_type;              ///< I_TYPE, P_TYPE, B_TYPE, ... 
+    int adaptive_quant;         ///< use adaptive quantization
+    int dquant;                 ///< qscale difference to prev qscale
+    int pict_type;              ///< I_TYPE, P_TYPE, B_TYPE, ...
     int last_pict_type; //FIXME removes
-    int last_non_b_pict_type;   ///< used for mpeg4 gmc b-frames & ratecontrol 
+    int last_non_b_pict_type;   ///< used for mpeg4 gmc b-frames & ratecontrol
     int dropable;
     int frame_rate_index;
+    int last_lambda_for[5];     ///< last lambda for a specific pict type
 
     /* motion compensation */
-    int unrestricted_mv;        ///< mv can point outside of the coded picture 
-    int h263_long_vectors;      ///< use horrible h263v1 long vector mode 
+    int unrestricted_mv;        ///< mv can point outside of the coded picture
+    int h263_long_vectors;      ///< use horrible h263v1 long vector mode
     int decode;                 ///< if 0 then decoding will be skipped (for encoding b frames for example)
 
     DSPContext dsp;             ///< pointers for accelerated dsp functions
-    int f_code;                 ///< forward MV resolution 
-    int b_code;                 ///< backward MV resolution for B Frames (mpeg4) 
+    int f_code;                 ///< forward MV resolution
+    int b_code;                 ///< backward MV resolution for B Frames (mpeg4)
     int16_t (*p_mv_table_base)[2];
     int16_t (*b_forw_mv_table_base)[2];
     int16_t (*b_back_mv_table_base)[2];
-    int16_t (*b_bidir_forw_mv_table_base)[2]; 
-    int16_t (*b_bidir_back_mv_table_base)[2]; 
+    int16_t (*b_bidir_forw_mv_table_base)[2];
+    int16_t (*b_bidir_back_mv_table_base)[2];
     int16_t (*b_direct_mv_table_base)[2];
     int16_t (*p_field_mv_table_base[2][2])[2];
     int16_t (*b_field_mv_table_base[2][2][2])[2];
-    int16_t (*p_mv_table)[2];            ///< MV table (1MV per MB) p-frame encoding 
-    int16_t (*b_forw_mv_table)[2];       ///< MV table (1MV per MB) forward mode b-frame encoding 
-    int16_t (*b_back_mv_table)[2];       ///< MV table (1MV per MB) backward mode b-frame encoding 
-    int16_t (*b_bidir_forw_mv_table)[2]; ///< MV table (1MV per MB) bidir mode b-frame encoding 
-    int16_t (*b_bidir_back_mv_table)[2]; ///< MV table (1MV per MB) bidir mode b-frame encoding 
-    int16_t (*b_direct_mv_table)[2];     ///< MV table (1MV per MB) direct mode b-frame encoding 
+    int16_t (*p_mv_table)[2];            ///< MV table (1MV per MB) p-frame encoding
+    int16_t (*b_forw_mv_table)[2];       ///< MV table (1MV per MB) forward mode b-frame encoding
+    int16_t (*b_back_mv_table)[2];       ///< MV table (1MV per MB) backward mode b-frame encoding
+    int16_t (*b_bidir_forw_mv_table)[2]; ///< MV table (1MV per MB) bidir mode b-frame encoding
+    int16_t (*b_bidir_back_mv_table)[2]; ///< MV table (1MV per MB) bidir mode b-frame encoding
+    int16_t (*b_direct_mv_table)[2];     ///< MV table (1MV per MB) direct mode b-frame encoding
     int16_t (*p_field_mv_table[2][2])[2];   ///< MV table (2MV per MB) interlaced p-frame encoding
     int16_t (*b_field_mv_table[2][2][2])[2];///< MV table (4MV per MB) interlaced b-frame encoding
     uint8_t (*p_field_select_table[2]);
     uint8_t (*b_field_select_table[2][2]);
-    int me_method;                       ///< ME algorithm 
+    int me_method;                       ///< ME algorithm
     int mv_dir;
 #define MV_DIR_BACKWARD  1
 #define MV_DIR_FORWARD   2
 #define MV_DIRECT        4 ///< bidirectional mode where the difference equals the MV of the last P/S/I-Frame (mpeg4)
     int mv_type;
-#define MV_TYPE_16X16       0   ///< 1 vector for the whole mb 
-#define MV_TYPE_8X8         1   ///< 4 vectors (h263, mpeg4 4MV) 
-#define MV_TYPE_16X8        2   ///< 2 vectors, one per 16x8 block  
-#define MV_TYPE_FIELD       3   ///< 2 vectors, one per field  
-#define MV_TYPE_DMV         4   ///< 2 vectors, special mpeg2 Dual Prime Vectors 
-    /**motion vectors for a macroblock 
+#define MV_TYPE_16X16       0   ///< 1 vector for the whole mb
+#define MV_TYPE_8X8         1   ///< 4 vectors (h263, mpeg4 4MV)
+#define MV_TYPE_16X8        2   ///< 2 vectors, one per 16x8 block
+#define MV_TYPE_FIELD       3   ///< 2 vectors, one per field
+#define MV_TYPE_DMV         4   ///< 2 vectors, special mpeg2 Dual Prime Vectors
+    /**motion vectors for a macroblock
        first coordinate : 0 = forward 1 = backward
        second "         : depend on type
        third  "         : 0 = x, 1 = y
     */
     int mv[2][4][2];
     int field_select[2][2];
-    int last_mv[2][2][2];             ///< last MV, used for MV prediction in MPEG1 & B-frame MPEG4 
-    uint8_t *fcode_tab;               ///< smallest fcode needed for each MV 
-    
+    int last_mv[2][2][2];             ///< last MV, used for MV prediction in MPEG1 & B-frame MPEG4
+    uint8_t *fcode_tab;               ///< smallest fcode needed for each MV
+
     MotionEstContext me;
 
-    int no_rounding;  /**< apply no rounding to motion compensation (MPEG4, msmpeg4, ...) 
+    int no_rounding;  /**< apply no rounding to motion compensation (MPEG4, msmpeg4, ...)
                         for b-frames rounding mode is allways 0 */
 
     int hurry_up;     /**< when set to 1 during decoding, b frames will be skipped
                          when set to 2 idct/dequant will be skipped too */
-                        
+
     /* macroblock layer */
     int mb_x, mb_y;
     int mb_skip_run;
@@ -454,7 +462,7 @@ typedef struct MpegEncContext {
     int block_index[6]; ///< index to current MB in block based arrays with edges
     int block_wrap[6];
     uint8_t *dest[3];
-    
+
     int *mb_index2xy;        ///< mb_index -> mb_x + mb_y*mb_stride
 
     /** matrix transmitted in the bitstream */
@@ -463,11 +471,11 @@ typedef struct MpegEncContext {
     uint16_t inter_matrix[64];
     uint16_t chroma_inter_matrix[64];
 #define QUANT_BIAS_SHIFT 8
-    int intra_quant_bias;    ///< bias for the quantizer 
-    int inter_quant_bias;    ///< bias for the quantizer 
-    int min_qcoeff;          ///< minimum encodable coefficient 
-    int max_qcoeff;          ///< maximum encodable coefficient 
-    int ac_esc_length;       ///< num of bits needed to encode the longest esc 
+    int intra_quant_bias;    ///< bias for the quantizer
+    int inter_quant_bias;    ///< bias for the quantizer
+    int min_qcoeff;          ///< minimum encodable coefficient
+    int max_qcoeff;          ///< maximum encodable coefficient
+    int ac_esc_length;       ///< num of bits needed to encode the longest esc
     uint8_t *intra_ac_vlc_length;
     uint8_t *intra_ac_vlc_last_length;
     uint8_t *inter_ac_vlc_length;
@@ -486,11 +494,11 @@ typedef struct MpegEncContext {
     uint16_t (*q_inter_matrix16)[2][64];
     int block_last_index[12];  ///< last non zero coefficient in block
     /* scantables */
-    ScanTable __align8 intra_scantable;
+    DECLARE_ALIGNED_8(ScanTable, intra_scantable);
     ScanTable intra_h_scantable;
     ScanTable intra_v_scantable;
     ScanTable inter_scantable; ///< if inter == intra then intra should be used to reduce tha cache usage
-    
+
     /* noise reduction */
     int (*dct_error_sum)[64];
     int dct_count[2];
@@ -501,7 +509,7 @@ typedef struct MpegEncContext {
     /* bit rate control */
     int64_t wanted_bits;
     int64_t total_bits;
-    int frame_bits;                ///< bits used for the current frame 
+    int frame_bits;                ///< bits used for the current frame
     RateControlContext rc_context; ///< contains stuff only accessed in ratecontrol.c
 
     /* statistics, used for 2-pass encoding */
@@ -515,11 +523,11 @@ typedef struct MpegEncContext {
     int skip_count;
     int misc_bits; ///< cbp, mb_type
     int last_bits; ///< temp var used for calculating the above vars
-    
+
     /* error concealment / resync */
     int error_count;
-    uint8_t *error_status_table;       ///< table of the error status of each MB  
-#define VP_START            1          ///< current MB is the first after a resync marker 
+    uint8_t *error_status_table;       ///< table of the error status of each MB
+#define VP_START            1          ///< current MB is the first after a resync marker
 #define AC_ERROR            2
 #define DC_ERROR            4
 #define MV_ERROR            8
@@ -527,40 +535,40 @@ typedef struct MpegEncContext {
 #define DC_END              32
 #define MV_END              64
 //FIXME some prefix?
-    
-    int resync_mb_x;                 ///< x position of last resync marker 
-    int resync_mb_y;                 ///< y position of last resync marker 
-    GetBitContext last_resync_gb;    ///< used to search for the next resync marker 
+
+    int resync_mb_x;                 ///< x position of last resync marker
+    int resync_mb_y;                 ///< y position of last resync marker
+    GetBitContext last_resync_gb;    ///< used to search for the next resync marker
     int mb_num_left;                 ///< number of MBs left in this video packet (for partitioned Slices only)
-    int next_p_frame_damaged;        ///< set if the next p frame is damaged, to avoid showing trashed b frames 
+    int next_p_frame_damaged;        ///< set if the next p frame is damaged, to avoid showing trashed b frames
     int error_resilience;
-    
+
     ParseContext parse_context;
 
     /* H.263 specific */
     int gob_index;
     int obmc;                       ///< overlapped block motion compensation
-        
+
     /* H.263+ specific */
-    int umvplus;                    ///< == H263+ && unrestricted_mv 
-    int h263_aic;                   ///< Advanded INTRA Coding (AIC) 
+    int umvplus;                    ///< == H263+ && unrestricted_mv
+    int h263_aic;                   ///< Advanded INTRA Coding (AIC)
     int h263_aic_dir;               ///< AIC direction: 0 = left, 1 = top
     int h263_slice_structured;
     int alt_inter_vlc;              ///< alternative inter vlc
     int modified_quant;
-    int loop_filter;    
+    int loop_filter;
     int custom_pcf;
-    
+
     /* mpeg4 specific */
-    int time_increment_bits;        ///< number of bits to represent the fractional part of time 
+    int time_increment_bits;        ///< number of bits to represent the fractional part of time
     int last_time_base;
-    int time_base;                  ///< time in seconds of last I,P,S Frame 
-    int64_t time;                   ///< time of current frame  
+    int time_base;                  ///< time in seconds of last I,P,S Frame
+    int64_t time;                   ///< time of current frame
     int64_t last_non_b_time;
-    uint16_t pp_time;               ///< time distance between the last 2 p,s,i frames 
-    uint16_t pb_time;               ///< time distance between the last b and p,s,i frame 
+    uint16_t pp_time;               ///< time distance between the last 2 p,s,i frames
+    uint16_t pb_time;               ///< time distance between the last b and p,s,i frame
     uint16_t pp_field_time;
-    uint16_t pb_field_time;         ///< like above, just for interlaced 
+    uint16_t pb_field_time;         ///< like above, just for interlaced
     int shape;
     int vol_sprite_usage;
     int sprite_width;
@@ -570,12 +578,12 @@ typedef struct MpegEncContext {
     int sprite_brightness_change;
     int num_sprite_warping_points;
     int real_sprite_warping_points;
-    int sprite_offset[2][2];         ///< sprite offset[isChroma][isMVY] 
-    int sprite_delta[2][2];          ///< sprite_delta [isY][isMVY]  
-    int sprite_shift[2];             ///< sprite shift [isChroma] 
+    int sprite_offset[2][2];         ///< sprite offset[isChroma][isMVY]
+    int sprite_delta[2][2];          ///< sprite_delta [isY][isMVY]
+    int sprite_shift[2];             ///< sprite shift [isChroma]
     int mcsel;
     int quant_precision;
-    int quarter_sample;              ///< 1->qpel, 0->half pel ME/MC  
+    int quarter_sample;              ///< 1->qpel, 0->half pel ME/MC
     int scalability;
     int hierachy_type;
     int enhancement_type;
@@ -584,19 +592,19 @@ typedef struct MpegEncContext {
     int aspect_ratio_info; //FIXME remove
     int sprite_warping_accuracy;
     int low_latency_sprite;
-    int data_partitioning;           ///< data partitioning flag from header 
-    int partitioned_frame;           ///< is current frame partitioned 
-    int rvlc;                        ///< reversible vlc 
+    int data_partitioning;           ///< data partitioning flag from header
+    int partitioned_frame;           ///< is current frame partitioned
+    int rvlc;                        ///< reversible vlc
     int resync_marker;               ///< could this stream contain resync markers
-    int low_delay;                   ///< no reordering needed / has no b-frames 
+    int low_delay;                   ///< no reordering needed / has no b-frames
     int vo_type;
-    int vol_control_parameters;      ///< does the stream contain the low_delay flag, used to workaround buggy encoders 
-    int intra_dc_threshold;          ///< QP above whch the ac VLC should be used for intra dc 
-    PutBitContext tex_pb;            ///< used for data partitioned VOPs 
-    PutBitContext pb2;               ///< used for data partitioned VOPs 
+    int vol_control_parameters;      ///< does the stream contain the low_delay flag, used to workaround buggy encoders
+    int intra_dc_threshold;          ///< QP above whch the ac VLC should be used for intra dc
+    PutBitContext tex_pb;            ///< used for data partitioned VOPs
+    PutBitContext pb2;               ///< used for data partitioned VOPs
     int mpeg_quant;
-    int t_frame;                       ///< time distance of first I -> B, used for interlaced b frames 
-    int padding_bug_score;             ///< used to detect the VERY common padding bug in MPEG4 
+    int t_frame;                       ///< time distance of first I -> B, used for interlaced b frames
+    int padding_bug_score;             ///< used to detect the VERY common padding bug in MPEG4
 
     /* divx specific, used to workaround (many) bugs in divx5 */
     int divx_version;
@@ -604,23 +612,23 @@ typedef struct MpegEncContext {
     int divx_packed;
     uint8_t *bitstream_buffer; //Divx 5.01 puts several frames in a single one, this is used to reorder them
     int bitstream_buffer_size;
-    int allocated_bitstream_buffer_size;
-    
+    unsigned int allocated_bitstream_buffer_size;
+
     int xvid_build;
-    
+
     /* lavc specific stuff, used to workaround bugs in libavcodec */
     int lavc_build;
-    
+
     /* RV10 specific */
-    int rv10_version; ///< RV10 version: 0 or 3 
+    int rv10_version; ///< RV10 version: 0 or 3
     int rv10_first_dc_coded[3];
-    
+
     /* MJPEG specific */
     struct MJpegContext *mjpeg_ctx;
-    int mjpeg_vsample[3];       ///< vertical sampling factors, default = {2, 1, 1} 
-    int mjpeg_hsample[3];       ///< horizontal sampling factors, default = {2, 1, 1} 
-    int mjpeg_write_tables;     ///< do we want to have quantisation- and huffmantables in the jpeg file ? 
-    int mjpeg_data_only_frames; ///< frames only with SOI, SOS and EOI markers 
+    int mjpeg_vsample[3];       ///< vertical sampling factors, default = {2, 1, 1}
+    int mjpeg_hsample[3];       ///< horizontal sampling factors, default = {2, 1, 1}
+    int mjpeg_write_tables;     ///< do we want to have quantisation- and huffmantables in the jpeg file ?
+    int mjpeg_data_only_frames; ///< frames only with SOI, SOS and EOI markers
 
     /* MSMPEG4 specific */
     int mv_table_index;
@@ -628,8 +636,8 @@ typedef struct MpegEncContext {
     int rl_chroma_table_index;
     int dc_table_index;
     int use_skip_mb_code;
-    int slice_height;      ///< in macroblocks 
-    int first_slice_line;  ///< used in mpeg4 too to handle resync markers 
+    int slice_height;      ///< in macroblocks
+    int first_slice_line;  ///< used in mpeg4 too to handle resync markers
     int flipflop_rounding;
     int msmpeg4_version;   ///< 0=not msmpeg4, 1=mp41, 2=mp42, 3=mp43/divx3 4=wmv1/7 5=wmv2/8
     int per_mb_rl_table;
@@ -644,11 +652,11 @@ typedef struct MpegEncContext {
     GetBitContext gb;
 
     /* Mpeg1 specific */
-    int gop_picture_number;  ///< index of the first picture of a GOP based on fake_pic_num & mpeg1 specific 
-    int last_mv_dir;         ///< last mv_dir, used for b frame encoding 
+    int gop_picture_number;  ///< index of the first picture of a GOP based on fake_pic_num & mpeg1 specific
+    int last_mv_dir;         ///< last mv_dir, used for b frame encoding
     int broken_link;         ///< no_output_of_prior_pics_flag
-    uint8_t *vbv_delay_ptr;  ///< pointer to vbv_delay in the bitstream 
-    
+    uint8_t *vbv_delay_ptr;  ///< pointer to vbv_delay in the bitstream
+
     /* MPEG2 specific - I wish I had not to support this mess. */
     int progressive_sequence;
     int mpeg_f_code[2][2];
@@ -682,34 +690,34 @@ typedef struct MpegEncContext {
 
     /* RTP specific */
     int rtp_mode;
-    
+
     uint8_t *ptr_lastgob;
     int swap_uv;//vcr2 codec is mpeg2 varint with UV swaped
     short * pblocks[12];
-    
-    DCTELEM (*block)[64]; ///< points to one of the following blocks 
+
+    DCTELEM (*block)[64]; ///< points to one of the following blocks
     DCTELEM (*blocks)[6][64]; // for HQ mode we need to keep the best block
     int (*decode_mb)(struct MpegEncContext *s, DCTELEM block[6][64]); // used by some codecs to avoid a switch()
 #define SLICE_OK         0
 #define SLICE_ERROR     -1
 #define SLICE_END       -2 ///<end marker found
 #define SLICE_NOEND     -3 ///<no end marker or error found but mb count exceeded
-    
-    void (*dct_unquantize_mpeg1_intra)(struct MpegEncContext *s, 
+
+    void (*dct_unquantize_mpeg1_intra)(struct MpegEncContext *s,
                            DCTELEM *block/*align 16*/, int n, int qscale);
-    void (*dct_unquantize_mpeg1_inter)(struct MpegEncContext *s, 
+    void (*dct_unquantize_mpeg1_inter)(struct MpegEncContext *s,
                            DCTELEM *block/*align 16*/, int n, int qscale);
-    void (*dct_unquantize_mpeg2_intra)(struct MpegEncContext *s, 
+    void (*dct_unquantize_mpeg2_intra)(struct MpegEncContext *s,
                            DCTELEM *block/*align 16*/, int n, int qscale);
-    void (*dct_unquantize_mpeg2_inter)(struct MpegEncContext *s, 
+    void (*dct_unquantize_mpeg2_inter)(struct MpegEncContext *s,
                            DCTELEM *block/*align 16*/, int n, int qscale);
-    void (*dct_unquantize_h263_intra)(struct MpegEncContext *s, 
+    void (*dct_unquantize_h263_intra)(struct MpegEncContext *s,
                            DCTELEM *block/*align 16*/, int n, int qscale);
-    void (*dct_unquantize_h263_inter)(struct MpegEncContext *s, 
+    void (*dct_unquantize_h263_inter)(struct MpegEncContext *s,
                            DCTELEM *block/*align 16*/, int n, int qscale);
-    void (*dct_unquantize_h261_intra)(struct MpegEncContext *s, 
+    void (*dct_unquantize_h261_intra)(struct MpegEncContext *s,
                            DCTELEM *block/*align 16*/, int n, int qscale);
-    void (*dct_unquantize_h261_inter)(struct MpegEncContext *s, 
+    void (*dct_unquantize_h261_inter)(struct MpegEncContext *s,
                            DCTELEM *block/*align 16*/, int n, int qscale);
     void (*dct_unquantize_intra)(struct MpegEncContext *s, // unquantizer to use (mpeg4 can use both)
                            DCTELEM *block/*align 16*/, int n, int qscale);
@@ -754,7 +762,7 @@ void ff_copy_bits(PutBitContext *pb, uint8_t *src, int length);
 void ff_clean_intra_table_entries(MpegEncContext *s);
 void ff_init_scantable(uint8_t *, ScanTable *st, const uint8_t *src_scantable);
 void ff_draw_horiz_band(MpegEncContext *s, int y, int h);
-void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h, 
+void ff_emulated_edge_mc(uint8_t *buf, uint8_t *src, int linesize, int block_w, int block_h,
                                     int src_x, int src_y, int w, int h);
 #define END_NOT_FOUND -100
 int ff_combine_frame(ParseContext *pc, int next, uint8_t **buf, int *buf_size);
@@ -765,6 +773,7 @@ void ff_write_quant_matrix(PutBitContext *pb, int16_t *matrix);
 int ff_find_unused_picture(MpegEncContext *s, int shared);
 void ff_denoise_dct(MpegEncContext *s, DCTELEM *block);
 void ff_update_duplicate_context(MpegEncContext *dst, MpegEncContext *src);
+const uint8_t *ff_find_start_code(const uint8_t *p, const uint8_t *end, uint32_t *state);
 
 void ff_er_frame_start(MpegEncContext *s);
 void ff_er_frame_end(MpegEncContext *s);
@@ -810,8 +819,8 @@ void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_
 void ff_init_me(MpegEncContext *s);
 int ff_pre_estimate_p_frame_motion(MpegEncContext * s, int mb_x, int mb_y);
 inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
-                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2], 
-                             int ref_mv_scale, int size, int h);                             
+                             int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
+                             int ref_mv_scale, int size, int h);
 int inline ff_get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
                                int ref_index, int size, int h, int add_rate);
 
@@ -832,16 +841,16 @@ int ff_mpeg1_find_frame_end(ParseContext *pc, const uint8_t *buf, int buf_size);
 
 /** RLTable. */
 typedef struct RLTable {
-    int n;                         ///< number of entries of table_vlc minus 1 
-    int last;                      ///< number of values for last = 0 
+    int n;                         ///< number of entries of table_vlc minus 1
+    int last;                      ///< number of values for last = 0
     const uint16_t (*table_vlc)[2];
     const int8_t *table_run;
     const int8_t *table_level;
-    uint8_t *index_run[2];         ///< encoding only 
-    int8_t *max_level[2];          ///< encoding & decoding 
-    int8_t *max_run[2];            ///< encoding & decoding 
+    uint8_t *index_run[2];         ///< encoding only
+    int8_t *max_level[2];          ///< encoding & decoding
+    int8_t *max_run[2];            ///< encoding & decoding
     VLC vlc;                       ///< decoding only deprected FIXME remove
-    RL_VLC_ELEM *rl_vlc[32];       ///< decoding only 
+    RL_VLC_ELEM *rl_vlc[32];       ///< decoding only
 } RLTable;
 
 void init_rl(RLTable *rl, int use_static);
@@ -878,14 +887,14 @@ void ff_h261_encode_init(MpegEncContext *s);
 
 /* h263.c, h263dec.c */
 int ff_h263_decode_init(AVCodecContext *avctx);
-int ff_h263_decode_frame(AVCodecContext *avctx, 
+int ff_h263_decode_frame(AVCodecContext *avctx,
                              void *data, int *data_size,
                              uint8_t *buf, int buf_size);
 int ff_h263_decode_end(AVCodecContext *avctx);
-void h263_encode_mb(MpegEncContext *s, 
+void h263_encode_mb(MpegEncContext *s,
                     DCTELEM block[6][64],
                     int motion_x, int motion_y);
-void mpeg4_encode_mb(MpegEncContext *s, 
+void mpeg4_encode_mb(MpegEncContext *s,
                     DCTELEM block[6][64],
                     int motion_x, int motion_y);
 void h263_encode_picture_header(MpegEncContext *s, int picture_number);
@@ -893,7 +902,7 @@ void ff_flv_encode_picture_header(MpegEncContext *s, int picture_number);
 void h263_encode_gob_header(MpegEncContext * s, int mb_line);
 int16_t *h263_pred_motion(MpegEncContext * s, int block, int dir,
                         int *px, int *py);
-void mpeg4_pred_ac(MpegEncContext * s, DCTELEM *block, int n, 
+void mpeg4_pred_ac(MpegEncContext * s, DCTELEM *block, int n,
                    int dir);
 void ff_set_mpeg4_time(MpegEncContext * s, int picture_number);
 void mpeg4_encode_picture_header(MpegEncContext *s, int picture_number);
@@ -941,7 +950,7 @@ void rv20_encode_picture_header(MpegEncContext *s, int picture_number);
 /* msmpeg4.c */
 void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number);
 void msmpeg4_encode_ext_header(MpegEncContext * s);
-void msmpeg4_encode_mb(MpegEncContext * s, 
+void msmpeg4_encode_mb(MpegEncContext * s,
                        DCTELEM block[6][64],
                        int motion_x, int motion_y);
 int msmpeg4_decode_picture_header(MpegEncContext * s);
@@ -956,14 +965,14 @@ void ff_mspel_motion(MpegEncContext *s,
                                uint8_t **ref_picture, op_pixels_func (*pix_op)[4],
                                int motion_x, int motion_y, int h);
 int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number);
-void ff_wmv2_encode_mb(MpegEncContext * s, 
+void ff_wmv2_encode_mb(MpegEncContext * s,
                        DCTELEM block[6][64],
                        int motion_x, int motion_y);
 
 /* mjpeg.c */
 int mjpeg_init(MpegEncContext *s);
 void mjpeg_close(MpegEncContext *s);
-void mjpeg_encode_mb(MpegEncContext *s, 
+void mjpeg_encode_mb(MpegEncContext *s,
                      DCTELEM block[6][64]);
 void mjpeg_picture_header(MpegEncContext *s);
 void mjpeg_picture_trailer(MpegEncContext *s);
@@ -972,7 +981,7 @@ void ff_mjpeg_stuffing(PutBitContext * pbc);
 
 /* rate control */
 int ff_rate_control_init(MpegEncContext *s);
-float ff_rate_estimate_qscale(MpegEncContext *s);
+float ff_rate_estimate_qscale(MpegEncContext *s, int dry_run);
 void ff_write_pass1_stats(MpegEncContext *s);
 void ff_rate_control_uninit(MpegEncContext *s);
 double ff_eval(char *s, double *const_value, const char **const_name,
@@ -980,6 +989,10 @@ double ff_eval(char *s, double *const_value, const char **const_name,
                double (**func2)(void *, double, double), char **func2_name,
                void *opaque);
 int ff_vbv_update(MpegEncContext *s, int frame_size);
+void ff_get_2pass_fcode(MpegEncContext *s);
 
+int ff_xvid_rate_control_init(MpegEncContext *s);
+void ff_xvid_rate_control_uninit(MpegEncContext *s);
+float ff_xvid_rate_estimate_qscale(MpegEncContext *s, int dry_run);
 
 #endif /* AVCODEC_MPEGVIDEO_H */
diff --git a/src/libffmpeg/libavcodec/msmpeg4.c b/src/libffmpeg/libavcodec/msmpeg4.c
index 81f147918..5bb7158e6 100644
--- a/src/libffmpeg/libavcodec/msmpeg4.c
+++ b/src/libffmpeg/libavcodec/msmpeg4.c
@@ -15,7 +15,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * msmpeg4v1 & v2 stuff by Michael Niedermayer <michaelni@gmx.at>
  */
@@ -30,11 +30,11 @@
 #include "mpegvideo.h"
 
 /*
- * You can also call this codec : MPEG4 with a twist ! 
+ * You can also call this codec : MPEG4 with a twist !
  *
- * TODO: 
+ * TODO:
  *        - (encoding) select best mv table (two choices)
- *        - (encoding) select best vlc/dc table 
+ *        - (encoding) select best vlc/dc table
  */
 //#define DEBUG
 
@@ -63,7 +63,7 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int
 static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
                                        int n, int coded, const uint8_t *scantable);
 static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr);
-static int msmpeg4_decode_motion(MpegEncContext * s, 
+static int msmpeg4_decode_motion(MpegEncContext * s,
                                  int *mx_ptr, int *my_ptr);
 static void msmpeg4v2_encode_motion(MpegEncContext * s, int val);
 static void init_h263_dc_for_msmpeg4(void);
@@ -119,14 +119,14 @@ void print_stats(void)
     if (total == 0)
         total = 1;
     for(i=0;i<ST_NB;i++) {
-        printf("%-10s : %10.1f %5.1f%%\n", 
-               st_names[i], 
-               (double)st_bit_counts[i] / 8.0, 
+        printf("%-10s : %10.1f %5.1f%%\n",
+               st_names[i],
+               (double)st_bit_counts[i] / 8.0,
                (double)st_bit_counts[i] * 100.0 / total);
     }
     printf("%-10s : %10.1f %5.1f%%\n",
-           "total", 
-           (double)total / 8.0, 
+           "total",
+           (double)total / 8.0,
            100.0);
 
     printf("Output:\n");
@@ -136,14 +136,14 @@ void print_stats(void)
     if (total == 0)
         total = 1;
     for(i=0;i<ST_NB;i++) {
-        printf("%-10s : %10.1f %5.1f%%\n", 
-               st_names[i], 
-               (double)st_out_bit_counts[i] / 8.0, 
+        printf("%-10s : %10.1f %5.1f%%\n",
+               st_names[i],
+               (double)st_out_bit_counts[i] / 8.0,
                (double)st_out_bit_counts[i] * 100.0 / total);
     }
     printf("%-10s : %10.1f %5.1f%%\n",
-           "total", 
-           (double)total / 8.0, 
+           "total",
+           (double)total / 8.0,
            100.0);
 }
 
@@ -156,7 +156,7 @@ void print_stats(void)
 static void common_init(MpegEncContext * s)
 {
     static int inited=0;
-    
+
     switch(s->msmpeg4_version){
     case 1:
     case 2:
@@ -186,7 +186,7 @@ static void common_init(MpegEncContext * s)
 
     }
 
-    
+
     if(s->msmpeg4_version>=4){
         ff_init_scantable(s->dsp.idct_permutation, &s->intra_scantable  , wmv1_scantable[1]);
         ff_init_scantable(s->dsp.idct_permutation, &s->intra_h_scantable, wmv1_scantable[2]);
@@ -194,7 +194,7 @@ static void common_init(MpegEncContext * s)
         ff_init_scantable(s->dsp.idct_permutation, &s->inter_scantable  , wmv1_scantable[0]);
     }
     //Note the default tables are set in common_init in mpegvideo.c
-    
+
     if(!inited){
         inited=1;
 
@@ -213,7 +213,7 @@ static void init_mv_table(MVTable *tab)
     /* mark all entries as not used */
     for(i=0;i<4096;i++)
         tab->table_mv_index[i] = tab->n;
-    
+
     for(i=0;i<tab->n;i++) {
         x = tab->table_mvx[i];
         y = tab->table_mvy[i];
@@ -269,14 +269,14 @@ static int get_size_of_code(MpegEncContext * s, RLTable *rl, int last, int run,
     int size=0;
     int code;
     int run_diff= intra ? 0 : 1;
-    
+
     code = get_rl_index(rl, last, run, level);
     size+= rl->table_vlc[code][1];
     if (code == rl->n) {
         int level1, run1;
 
         level1 = level - rl->max_level[last][run];
-        if (level1 < 1) 
+        if (level1 < 1)
             goto esc2;
         code = get_rl_index(rl, last, run, level1);
         if (code == rl->n) {
@@ -318,7 +318,7 @@ static void find_best_tables(MpegEncContext * s)
         int size=0;
 
         if(i>0){// ;)
-            size++; 
+            size++;
             chroma_size++;
         }
         for(level=0; level<=MAX_LEVEL; level++){
@@ -330,7 +330,7 @@ static void find_best_tables(MpegEncContext * s)
                     int inter_count       = s->ac_stats[0][0][level][run][last] + s->ac_stats[0][1][level][run][last];
                     int intra_luma_count  = s->ac_stats[1][0][level][run][last];
                     int intra_chroma_count= s->ac_stats[1][1][level][run][last];
-                    
+
                     if(s->pict_type==I_TYPE){
                         size       += intra_luma_count  *rl_length[i  ][level][run][last];
                         chroma_size+= intra_chroma_count*rl_length[i+3][level][run][last];
@@ -338,7 +338,7 @@ static void find_best_tables(MpegEncContext * s)
                         size+=        intra_luma_count  *rl_length[i  ][level][run][last]
                                      +intra_chroma_count*rl_length[i+3][level][run][last]
                                      +inter_count       *rl_length[i+3][level][run][last];
-                    }                   
+                    }
                 }
                 if(last_size == size+chroma_size) break;
             }
@@ -353,16 +353,16 @@ static void find_best_tables(MpegEncContext * s)
         }
     }
 
-//    printf("type:%d, best:%d, qp:%d, var:%d, mcvar:%d, size:%d //\n", 
+//    printf("type:%d, best:%d, qp:%d, var:%d, mcvar:%d, size:%d //\n",
 //           s->pict_type, best, s->qscale, s->mb_var_sum, s->mc_mb_var_sum, best_size);
-           
+
     if(s->pict_type==P_TYPE) chroma_best= best;
 
     memset(s->ac_stats, 0, sizeof(int)*(MAX_LEVEL+1)*(MAX_RUN+1)*2*2*2);
 
     s->rl_table_index       =        best;
     s->rl_chroma_table_index= chroma_best;
-    
+
     if(s->pict_type != s->last_non_b_pict_type){
         s->rl_table_index= 2;
         if(s->pict_type==I_TYPE)
@@ -398,7 +398,7 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
     if (s->pict_type == I_TYPE) {
         s->slice_height= s->mb_height/1;
         put_bits(&s->pb, 5, 0x16 + s->mb_height/s->slice_height);
-        
+
         if(s->msmpeg4_version==4){
             msmpeg4_encode_ext_header(s);
             if(s->bit_rate>MBAC_BITRATE)
@@ -415,7 +415,7 @@ void msmpeg4_encode_picture_header(MpegEncContext * s, int picture_number)
         }
     } else {
         put_bits(&s->pb, 1, s->use_skip_mb_code);
-        
+
         if(s->msmpeg4_version==4 && s->bit_rate>MBAC_BITRATE)
             put_bits(&s->pb, 1, s->per_mb_rl_table);
 
@@ -461,18 +461,18 @@ static inline int coded_block_pred(MpegEncContext * s, int n, uint8_t **coded_bl
     wrap = s->b8_stride;
 
     /* B C
-     * A X 
+     * A X
      */
     a = s->coded_block[xy - 1       ];
     b = s->coded_block[xy - 1 - wrap];
     c = s->coded_block[xy     - wrap];
-    
+
     if (b == c) {
         pred = a;
     } else {
         pred = c;
     }
-    
+
     /* store value */
     *coded_block_ptr = &s->coded_block[xy];
 
@@ -481,7 +481,7 @@ static inline int coded_block_pred(MpegEncContext * s, int n, uint8_t **coded_bl
 
 #ifdef CONFIG_ENCODERS
 
-static void msmpeg4_encode_motion(MpegEncContext * s, 
+static void msmpeg4_encode_motion(MpegEncContext * s,
                                   int mx, int my)
 {
     int code;
@@ -498,20 +498,20 @@ static void msmpeg4_encode_motion(MpegEncContext * s,
         my += 64;
     else if (my >= 64)
         my -= 64;
-    
+
     mx += 32;
     my += 32;
 #if 0
     if ((unsigned)mx >= 64 ||
-        (unsigned)my >= 64) 
+        (unsigned)my >= 64)
         fprintf(stderr, "error mx=%d my=%d\n", mx, my);
 #endif
     mv = &mv_tables[s->mv_table_index];
 
     code = mv->table_mv_index[(mx << 6) | my];
     set_stat(ST_MV);
-    put_bits(&s->pb, 
-             mv->table_mv_bits[code], 
+    put_bits(&s->pb,
+             mv->table_mv_bits[code],
              mv->table_mv_code[code]);
     if (code == mv->n) {
         /* escape : code litterally */
@@ -528,12 +528,12 @@ static inline void handle_slices(MpegEncContext *s){
             }
             s->first_slice_line = 1;
         } else {
-            s->first_slice_line = 0; 
+            s->first_slice_line = 0;
         }
     }
 }
 
-void msmpeg4_encode_mb(MpegEncContext * s, 
+void msmpeg4_encode_mb(MpegEncContext * s,
                        DCTELEM block[6][64],
                        int motion_x, int motion_y)
 {
@@ -542,36 +542,36 @@ void msmpeg4_encode_mb(MpegEncContext * s,
     uint8_t *coded_block;
 
     handle_slices(s);
-    
+
     if (!s->mb_intra) {
-	/* compute cbp */
+        /* compute cbp */
         set_stat(ST_INTER_MB);
-	cbp = 0;
-	for (i = 0; i < 6; i++) {
-	    if (s->block_last_index[i] >= 0)
-		cbp |= 1 << (5 - i);
-	}
-	if (s->use_skip_mb_code && (cbp | motion_x | motion_y) == 0) {
-	    /* skip macroblock */
-	    put_bits(&s->pb, 1, 1);
+        cbp = 0;
+        for (i = 0; i < 6; i++) {
+            if (s->block_last_index[i] >= 0)
+                cbp |= 1 << (5 - i);
+        }
+        if (s->use_skip_mb_code && (cbp | motion_x | motion_y) == 0) {
+            /* skip macroblock */
+            put_bits(&s->pb, 1, 1);
             s->last_bits++;
-	    s->misc_bits++;
+            s->misc_bits++;
             s->skip_count++;
 
-	    return;
-	}
+            return;
+        }
         if (s->use_skip_mb_code)
-            put_bits(&s->pb, 1, 0);	/* mb coded */
-        
+            put_bits(&s->pb, 1, 0);     /* mb coded */
+
         if(s->msmpeg4_version<=2){
-            put_bits(&s->pb, 
-                     v2_mb_type[cbp&3][1], 
+            put_bits(&s->pb,
+                     v2_mb_type[cbp&3][1],
                      v2_mb_type[cbp&3][0]);
             if((cbp&3) != 3) coded_cbp= cbp ^ 0x3C;
             else             coded_cbp= cbp;
 
-            put_bits(&s->pb, 
-                     cbpy_tab[coded_cbp>>2][1], 
+            put_bits(&s->pb,
+                     cbpy_tab[coded_cbp>>2][1],
                      cbpy_tab[coded_cbp>>2][0]);
 
             s->misc_bits += get_bits_diff(s);
@@ -580,15 +580,15 @@ void msmpeg4_encode_mb(MpegEncContext * s,
             msmpeg4v2_encode_motion(s, motion_x - pred_x);
             msmpeg4v2_encode_motion(s, motion_y - pred_y);
         }else{
-            put_bits(&s->pb, 
-                     table_mb_non_intra[cbp + 64][1], 
+            put_bits(&s->pb,
+                     table_mb_non_intra[cbp + 64][1],
                      table_mb_non_intra[cbp + 64][0]);
 
             s->misc_bits += get_bits_diff(s);
 
             /* motion vector */
             h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
-            msmpeg4_encode_motion(s, motion_x - pred_x, 
+            msmpeg4_encode_motion(s, motion_x - pred_x,
                                   motion_y - pred_y);
         }
 
@@ -599,10 +599,10 @@ void msmpeg4_encode_mb(MpegEncContext * s,
         }
         s->p_tex_bits += get_bits_diff(s);
     } else {
-	/* compute cbp */
-	cbp = 0;
+        /* compute cbp */
+        cbp = 0;
         coded_cbp = 0;
-	for (i = 0; i < 6; i++) {
+        for (i = 0; i < 6; i++) {
             int val, pred;
             val = (s->block_last_index[i] >= 1);
             cbp |= val << (5 - i);
@@ -613,7 +613,7 @@ void msmpeg4_encode_mb(MpegEncContext * s,
                 val = val ^ pred;
             }
             coded_cbp |= val << (5 - i);
-	}
+        }
 #if 0
         if (coded_cbp)
             printf("cbp=%x %x\n", cbp, coded_cbp);
@@ -621,33 +621,33 @@ void msmpeg4_encode_mb(MpegEncContext * s,
 
         if(s->msmpeg4_version<=2){
             if (s->pict_type == I_TYPE) {
-                put_bits(&s->pb, 
+                put_bits(&s->pb,
                          v2_intra_cbpc[cbp&3][1], v2_intra_cbpc[cbp&3][0]);
             } else {
                 if (s->use_skip_mb_code)
-                    put_bits(&s->pb, 1, 0);	/* mb coded */
-                put_bits(&s->pb, 
-                         v2_mb_type[(cbp&3) + 4][1], 
+                    put_bits(&s->pb, 1, 0);     /* mb coded */
+                put_bits(&s->pb,
+                         v2_mb_type[(cbp&3) + 4][1],
                          v2_mb_type[(cbp&3) + 4][0]);
             }
-            put_bits(&s->pb, 1, 0);	/* no AC prediction yet */
-            put_bits(&s->pb, 
-                     cbpy_tab[cbp>>2][1], 
+            put_bits(&s->pb, 1, 0);             /* no AC prediction yet */
+            put_bits(&s->pb,
+                     cbpy_tab[cbp>>2][1],
                      cbpy_tab[cbp>>2][0]);
         }else{
             if (s->pict_type == I_TYPE) {
                 set_stat(ST_INTRA_MB);
-                put_bits(&s->pb, 
+                put_bits(&s->pb,
                          ff_msmp4_mb_i_table[coded_cbp][1], ff_msmp4_mb_i_table[coded_cbp][0]);
             } else {
                 if (s->use_skip_mb_code)
-                    put_bits(&s->pb, 1, 0);	/* mb coded */
-                put_bits(&s->pb, 
-                         table_mb_non_intra[cbp][1], 
+                    put_bits(&s->pb, 1, 0);     /* mb coded */
+                put_bits(&s->pb,
+                         table_mb_non_intra[cbp][1],
                          table_mb_non_intra[cbp][0]);
             }
             set_stat(ST_INTRA_MB);
-            put_bits(&s->pb, 1, 0);	/* no AC prediction yet */
+            put_bits(&s->pb, 1, 0);             /* no AC prediction yet */
             if(s->inter_intra_pred){
                 s->h263_aic_dir=0;
                 put_bits(&s->pb, table_inter_intra[s->h263_aic_dir][1], table_inter_intra[s->h263_aic_dir][0]);
@@ -665,7 +665,7 @@ void msmpeg4_encode_mb(MpegEncContext * s,
 
 #endif //CONFIG_ENCODERS
 
-static inline int msmpeg4v1_pred_dc(MpegEncContext * s, int n, 
+static inline int msmpeg4v1_pred_dc(MpegEncContext * s, int n,
                                     int32_t **dc_val_ptr)
 {
     int i;
@@ -675,9 +675,9 @@ static inline int msmpeg4v1_pred_dc(MpegEncContext * s, int n,
     } else {
         i= n-3;
     }
-    
+
     *dc_val_ptr= &s->last_dc[i];
-    return s->last_dc[i]; 
+    return s->last_dc[i];
 }
 
 static int get_dc(uint8_t *src, int stride, int scale)
@@ -694,7 +694,7 @@ static int get_dc(uint8_t *src, int stride, int scale)
 }
 
 /* dir = 0: left, dir = 1: top prediction */
-static inline int msmpeg4_pred_dc(MpegEncContext * s, int n, 
+static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
                              uint16_t **dc_val_ptr, int *dir_ptr)
 {
     int a, b, c, wrap, pred, scale;
@@ -702,21 +702,21 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
 
     /* find prediction */
     if (n < 4) {
-	scale = s->y_dc_scale;
+        scale = s->y_dc_scale;
     } else {
-	scale = s->c_dc_scale;
+        scale = s->c_dc_scale;
     }
-    
+
     wrap = s->block_wrap[n];
     dc_val= s->dc_val[0] + s->block_index[n];
 
     /* B C
-     * A X 
+     * A X
      */
     a = dc_val[ - 1];
     b = dc_val[ - 1 - wrap];
     c = dc_val[ - wrap];
-    
+
     if(s->first_slice_line && (n&2)==0 && s->msmpeg4_version<4){
         b=c=1024;
     }
@@ -727,22 +727,22 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
        to problems if Q could vary !) */
 #if (defined(ARCH_X86) || defined(ARCH_X86_64)) && !defined PIC
     asm volatile(
-        "movl %3, %%eax		\n\t"
-	"shrl $1, %%eax		\n\t"
-	"addl %%eax, %2		\n\t"
-	"addl %%eax, %1		\n\t"
-	"addl %0, %%eax		\n\t"
-	"mull %4		\n\t"
-	"movl %%edx, %0		\n\t"
-	"movl %1, %%eax		\n\t"
-	"mull %4		\n\t"
-	"movl %%edx, %1		\n\t"
-	"movl %2, %%eax		\n\t"
-	"mull %4		\n\t"
-	"movl %%edx, %2		\n\t"
-	: "+b" (a), "+c" (b), "+D" (c)
-	: "g" (scale), "S" (inverse[scale])
-	: "%eax", "%edx"
+        "movl %3, %%eax         \n\t"
+        "shrl $1, %%eax         \n\t"
+        "addl %%eax, %2         \n\t"
+        "addl %%eax, %1         \n\t"
+        "addl %0, %%eax         \n\t"
+        "mull %4                \n\t"
+        "movl %%edx, %0         \n\t"
+        "movl %1, %%eax         \n\t"
+        "mull %4                \n\t"
+        "movl %%edx, %1         \n\t"
+        "movl %2, %%eax         \n\t"
+        "mull %4                \n\t"
+        "movl %%edx, %2         \n\t"
+        : "+b" (a), "+c" (b), "+D" (c)
+        : "g" (scale), "S" (inverse[scale])
+        : "%eax", "%edx"
     );
 #else
     /* #elif defined (ARCH_ALPHA) */
@@ -750,13 +750,13 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
        common case. But they are costly everywhere...
      */
     if (scale == 8) {
-	a = (a + (8 >> 1)) / 8;
-	b = (b + (8 >> 1)) / 8;
-	c = (c + (8 >> 1)) / 8;
+        a = (a + (8 >> 1)) / 8;
+        b = (b + (8 >> 1)) / 8;
+        c = (c + (8 >> 1)) / 8;
     } else {
-	a = FASTDIV((a + (scale >> 1)), scale);
-	b = FASTDIV((b + (scale >> 1)), scale);
-	c = FASTDIV((c + (scale >> 1)), scale);
+        a = FASTDIV((a + (scale >> 1)), scale);
+        b = FASTDIV((b + (scale >> 1)), scale);
+        c = FASTDIV((c + (scale >> 1)), scale);
     }
 #endif
     /* XXX: WARNING: they did not choose the same test as MPEG4. This
@@ -765,7 +765,7 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
         if(s->inter_intra_pred){
             uint8_t *dest;
             int wrap;
-            
+
             if(n==1){
                 pred=a;
                 *dir_ptr = 0;
@@ -792,7 +792,7 @@ static inline int msmpeg4_pred_dc(MpegEncContext * s, int n,
                 else           a= get_dc(dest-8, wrap, scale*8);
                 if(s->mb_y==0) c= (1024 + (scale>>1))/scale;
                 else           c= get_dc(dest-8*wrap, wrap, scale*8);
-                
+
                 if (s->h263_aic_dir==0) {
                     pred= a;
                     *dir_ptr = 0;
@@ -851,7 +851,7 @@ static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr
     if(s->msmpeg4_version==1){
         int32_t *dc_val;
         pred = msmpeg4v1_pred_dc(s, n, &dc_val);
-        
+
         /* update predictor */
         *dc_val= level;
     }else{
@@ -871,11 +871,11 @@ static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr
 
     if(s->msmpeg4_version<=2){
         if (n < 4) {
-            put_bits(&s->pb, 
+            put_bits(&s->pb,
                      v2_dc_lum_table[level+256][1],
                      v2_dc_lum_table[level+256][0]);
         }else{
-            put_bits(&s->pb, 
+            put_bits(&s->pb,
                      v2_dc_chroma_table[level+256][1],
                      v2_dc_chroma_table[level+256][0]);
         }
@@ -886,7 +886,7 @@ static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr
             sign = 1;
         }
         code = level;
-        if (code > DC_MAX) 
+        if (code > DC_MAX)
             code = DC_MAX;
 
         if (s->dc_table_index == 0) {
@@ -902,10 +902,10 @@ static void msmpeg4_encode_dc(MpegEncContext * s, int level, int n, int *dir_ptr
                 put_bits(&s->pb, ff_table1_dc_chroma[code][1], ff_table1_dc_chroma[code][0]);
             }
         }
-            
+
         if (code == DC_MAX)
             put_bits(&s->pb, 8, level);
-            
+
         if (level != 0) {
             put_bits(&s->pb, 1, sign);
         }
@@ -957,17 +957,17 @@ static inline void msmpeg4_encode_block(MpegEncContext * s, DCTELEM * block, int
     /* AC coefs */
     last_non_zero = i - 1;
     for (; i <= last_index; i++) {
-	j = scantable[i];
-	level = block[j];
-	if (level) {
-	    run = i - last_non_zero - 1;
-	    last = (i == last_index);
-	    sign = 0;
-	    slevel = level;
-	    if (level < 0) {
-		sign = 1;
-		level = -level;
-	    }
+        j = scantable[i];
+        level = block[j];
+        if (level) {
+            run = i - last_non_zero - 1;
+            last = (i == last_index);
+            sign = 0;
+            slevel = level;
+            if (level < 0) {
+                sign = 1;
+                level = -level;
+            }
 
             if(level<=MAX_LEVEL && run<=MAX_RUN){
                 s->ac_stats[s->mb_intra][n>3][level][run][last]++;
@@ -982,7 +982,7 @@ else
                 int level1, run1;
 
                 level1 = level - rl->max_level[last][run];
-                if (level1 < 1) 
+                if (level1 < 1)
                     goto esc2;
                 code = get_rl_index(rl, last, run, level1);
                 if (code == rl->n) {
@@ -1030,8 +1030,8 @@ else
             } else {
                 put_bits(&s->pb, 1, sign);
             }
-	    last_non_zero = i;
-	}
+            last_non_zero = i;
+        }
     }
 }
 
@@ -1064,7 +1064,7 @@ static void init_h263_dc_for_msmpeg4(void)
             v = abs(level);
             while (v) {
                 v >>= 1;
-		    size++;
+                    size++;
             }
 
             if (level < 0)
@@ -1092,7 +1092,7 @@ static void init_h263_dc_for_msmpeg4(void)
             uni_code= DCtab_chrom[size][0];
             uni_len = DCtab_chrom[size][1];
             uni_code ^= (1<<uni_len)-1; //M$ doesnt like compatibility
-            
+
             if (size > 0) {
                 uni_code<<=size; uni_code|=l;
                 uni_len+=size;
@@ -1125,31 +1125,31 @@ int ff_msmpeg4_decode_init(MpegEncContext *s)
         }
         for(i=0;i<2;i++) {
             mv = &mv_tables[i];
-            init_vlc(&mv->vlc, MV_VLC_BITS, mv->n + 1, 
+            init_vlc(&mv->vlc, MV_VLC_BITS, mv->n + 1,
                      mv->table_mv_bits, 1, 1,
                      mv->table_mv_code, 2, 2, 1);
         }
 
-        init_vlc(&ff_msmp4_dc_luma_vlc[0], DC_VLC_BITS, 120, 
+        init_vlc(&ff_msmp4_dc_luma_vlc[0], DC_VLC_BITS, 120,
                  &ff_table0_dc_lum[0][1], 8, 4,
                  &ff_table0_dc_lum[0][0], 8, 4, 1);
-        init_vlc(&ff_msmp4_dc_chroma_vlc[0], DC_VLC_BITS, 120, 
+        init_vlc(&ff_msmp4_dc_chroma_vlc[0], DC_VLC_BITS, 120,
                  &ff_table0_dc_chroma[0][1], 8, 4,
                  &ff_table0_dc_chroma[0][0], 8, 4, 1);
-        init_vlc(&ff_msmp4_dc_luma_vlc[1], DC_VLC_BITS, 120, 
+        init_vlc(&ff_msmp4_dc_luma_vlc[1], DC_VLC_BITS, 120,
                  &ff_table1_dc_lum[0][1], 8, 4,
                  &ff_table1_dc_lum[0][0], 8, 4, 1);
-        init_vlc(&ff_msmp4_dc_chroma_vlc[1], DC_VLC_BITS, 120, 
+        init_vlc(&ff_msmp4_dc_chroma_vlc[1], DC_VLC_BITS, 120,
                  &ff_table1_dc_chroma[0][1], 8, 4,
                  &ff_table1_dc_chroma[0][0], 8, 4, 1);
-    
-        init_vlc(&v2_dc_lum_vlc, DC_VLC_BITS, 512, 
+
+        init_vlc(&v2_dc_lum_vlc, DC_VLC_BITS, 512,
                  &v2_dc_lum_table[0][1], 8, 4,
                  &v2_dc_lum_table[0][0], 8, 4, 1);
-        init_vlc(&v2_dc_chroma_vlc, DC_VLC_BITS, 512, 
+        init_vlc(&v2_dc_chroma_vlc, DC_VLC_BITS, 512,
                  &v2_dc_chroma_table[0][1], 8, 4,
                  &v2_dc_chroma_table[0][0], 8, 4, 1);
-    
+
         init_vlc(&cbpy_vlc, CBPY_VLC_BITS, 16,
                  &cbpy_tab[0][1], 2, 1,
                  &cbpy_tab[0][0], 2, 1, 1);
@@ -1164,27 +1164,27 @@ int ff_msmpeg4_decode_init(MpegEncContext *s)
                  &mvtab[0][0], 2, 1, 1);
 
         for(i=0; i<4; i++){
-            init_vlc(&mb_non_intra_vlc[i], MB_NON_INTRA_VLC_BITS, 128, 
+            init_vlc(&mb_non_intra_vlc[i], MB_NON_INTRA_VLC_BITS, 128,
                      &wmv2_inter_table[i][0][1], 8, 4,
                      &wmv2_inter_table[i][0][0], 8, 4, 1); //FIXME name?
         }
-        
-        init_vlc(&ff_msmp4_mb_i_vlc, MB_INTRA_VLC_BITS, 64, 
+
+        init_vlc(&ff_msmp4_mb_i_vlc, MB_INTRA_VLC_BITS, 64,
                  &ff_msmp4_mb_i_table[0][1], 4, 2,
                  &ff_msmp4_mb_i_table[0][0], 4, 2, 1);
-        
-        init_vlc(&v1_intra_cbpc_vlc, V1_INTRA_CBPC_VLC_BITS, 8, 
+
+        init_vlc(&v1_intra_cbpc_vlc, V1_INTRA_CBPC_VLC_BITS, 8,
                  intra_MCBPC_bits, 1, 1,
                  intra_MCBPC_code, 1, 1, 1);
-        init_vlc(&v1_inter_cbpc_vlc, V1_INTER_CBPC_VLC_BITS, 25, 
+        init_vlc(&v1_inter_cbpc_vlc, V1_INTER_CBPC_VLC_BITS, 25,
                  inter_MCBPC_bits, 1, 1,
                  inter_MCBPC_code, 1, 1, 1);
-        
-        init_vlc(&inter_intra_vlc, INTER_INTRA_VLC_BITS, 4, 
+
+        init_vlc(&inter_intra_vlc, INTER_INTRA_VLC_BITS, 4,
                  &table_inter_intra[0][1], 2, 1,
                  &table_inter_intra[0][0], 2, 1, 1);
     }
-    
+
     switch(s->msmpeg4_version){
     case 1:
     case 2:
@@ -1200,9 +1200,9 @@ int ff_msmpeg4_decode_init(MpegEncContext *s)
         //FIXME + TODO VC9 decode mb
         break;
     }
-    
+
     s->slice_height= s->mb_height; //to avoid 1/0 if the first frame isnt a keyframe
-    
+
     return 0;
 }
 
@@ -1252,7 +1252,7 @@ return -1;
     }
 
     if (s->pict_type == I_TYPE) {
-        code = get_bits(&s->gb, 5); 
+        code = get_bits(&s->gb, 5);
         if(s->msmpeg4_version==1){
             if(code==0 || code>s->mb_height){
                 av_log(s->avctx, AV_LOG_ERROR, "invalid slice height %d\n", code);
@@ -1289,7 +1289,7 @@ return -1;
 
             if(s->bit_rate > MBAC_BITRATE) s->per_mb_rl_table= get_bits1(&s->gb);
             else                           s->per_mb_rl_table= 0;
-            
+
             if(!s->per_mb_rl_table){
                 s->rl_chroma_table_index = decode012(&s->gb);
                 s->rl_table_index = decode012(&s->gb);
@@ -1301,11 +1301,11 @@ return -1;
         }
         s->no_rounding = 1;
         if(s->avctx->debug&FF_DEBUG_PICT_INFO)
-	    av_log(s->avctx, AV_LOG_DEBUG, "qscale:%d rlc:%d rl:%d dc:%d mbrl:%d slice:%d   \n", 
-		s->qscale,
-		s->rl_chroma_table_index,
-		s->rl_table_index, 
-		s->dc_table_index,
+            av_log(s->avctx, AV_LOG_DEBUG, "qscale:%d rlc:%d rl:%d dc:%d mbrl:%d slice:%d   \n",
+                s->qscale,
+                s->rl_chroma_table_index,
+                s->rl_table_index,
+                s->dc_table_index,
                 s->per_mb_rl_table,
                 s->slice_height);
     } else {
@@ -1347,22 +1347,22 @@ return -1;
             s->inter_intra_pred= (s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE);
             break;
         }
-        
+
         if(s->avctx->debug&FF_DEBUG_PICT_INFO)
-	    av_log(s->avctx, AV_LOG_DEBUG, "skip:%d rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d   \n", 
-		s->use_skip_mb_code, 
-		s->rl_table_index, 
-		s->rl_chroma_table_index, 
-		s->dc_table_index,
-		s->mv_table_index,
+            av_log(s->avctx, AV_LOG_DEBUG, "skip:%d rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d   \n",
+                s->use_skip_mb_code,
+                s->rl_table_index,
+                s->rl_chroma_table_index,
+                s->dc_table_index,
+                s->mv_table_index,
                 s->per_mb_rl_table,
                 s->qscale);
 
-	if(s->flipflop_rounding){
-	    s->no_rounding ^= 1;
-	}else{
-	    s->no_rounding = 0;
-	}
+        if(s->flipflop_rounding){
+            s->no_rounding ^= 1;
+        }else{
+            s->no_rounding = 0;
+        }
     }
 //printf("%d %d %d %d %d\n", s->pict_type, s->bit_rate, s->inter_intra_pred, s->width, s->height);
 
@@ -1440,7 +1440,7 @@ static void msmpeg4v2_encode_motion(MpegEncContext * s, int val)
         code = (val >> bit_size) + 1;
         bits = val & (range - 1);
 
-        put_bits(&s->pb, mvtab[code][1] + 1, (mvtab[code][0] << 1) | sign); 
+        put_bits(&s->pb, mvtab[code][1] + 1, (mvtab[code][0] << 1) | sign);
         if (bit_size > 0) {
             put_bits(&s->pb, bit_size, bits);
         }
@@ -1482,7 +1482,7 @@ static int msmpeg4v2_decode_motion(MpegEncContext * s, int pred, int f_code)
 static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
 {
     int cbp, code, i;
-    
+
     if (s->pict_type == P_TYPE) {
         if (s->use_skip_mb_code) {
             if (get_bits1(&s->gb)) {
@@ -1509,7 +1509,7 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
         }
 
         s->mb_intra = code >>2;
-    
+
         cbp = code & 0x3;
     } else {
         s->mb_intra = 1;
@@ -1525,7 +1525,7 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
 
     if (!s->mb_intra) {
         int mx, my, cbpy;
-        
+
         cbpy= get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
         if(cbpy<0){
             av_log(s->avctx, AV_LOG_ERROR, "cbpy %d invalid at %d %d\n", cbp, s->mb_x, s->mb_y);
@@ -1534,11 +1534,11 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
 
         cbp|= cbpy<<2;
         if(s->msmpeg4_version==1 || (cbp&3) != 3) cbp^= 0x3C;
-        
+
         h263_pred_motion(s, 0, 0, &mx, &my);
         mx= msmpeg4v2_decode_motion(s, mx, 1);
         my= msmpeg4v2_decode_motion(s, my, 1);
-        
+
         s->mv_dir = MV_DIR_FORWARD;
         s->mv_type = MV_TYPE_16X16;
         s->mv[0][0][0] = mx;
@@ -1557,10 +1557,10 @@ static int msmpeg4v12_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
     s->dsp.clear_blocks(s->block[0]);
     for (i = 0; i < 6; i++) {
         if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
-	{
+        {
              av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
              return -1;
-	}
+        }
     }
     return 0;
 }
@@ -1589,13 +1589,13 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
                 return 0;
             }
         }
-        
+
         code = get_vlc2(&s->gb, mb_non_intra_vlc[DEFAULT_INTER_INDEX].table, MB_NON_INTRA_VLC_BITS, 3);
         if (code < 0)
             return -1;
-	//s->mb_intra = (code & 0x40) ? 0 : 1;
-	s->mb_intra = (~code & 0x40) >> 6;
-            
+        //s->mb_intra = (code & 0x40) ? 0 : 1;
+        s->mb_intra = (~code & 0x40) >> 6;
+
         cbp = code & 0x3f;
     } else {
         set_stat(ST_INTRA_MB);
@@ -1650,12 +1650,12 @@ static int msmpeg4v34_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
     s->dsp.clear_blocks(s->block[0]);
     for (i = 0; i < 6; i++) {
         if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
-	{
-	    av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
-	    return -1;
-	}
+        {
+            av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
+            return -1;
+        }
     }
-    
+
     return 0;
 }
 //#define ERROR_DETAILS
@@ -1672,10 +1672,10 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
         qmul=1;
         qadd=0;
 
-	/* DC coef */
+        /* DC coef */
         set_stat(ST_DC);
         level = msmpeg4_decode_dc(s, n, &dc_pred_dir);
-        
+
         if (level < 0){
             av_log(s->avctx, AV_LOG_ERROR, "dc overflow- block: %d qscale: %d//\n", n, s->qscale);
             if(s->inter_intra_pred) level=0;
@@ -1702,7 +1702,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
             goto not_coded;
         }
         if (s->ac_pred) {
-            if (dc_pred_dir == 0) 
+            if (dc_pred_dir == 0)
                 scan_table = s->intra_v_scantable.permutated; /* left */
             else
                 scan_table = s->intra_h_scantable.permutated; /* top */
@@ -1750,7 +1750,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
                         run=   SHOW_UBITS(re, &s->gb, 6); SKIP_CACHE(re, &s->gb, 6);
                         level= SHOW_SBITS(re, &s->gb, 8); LAST_SKIP_CACHE(re, &s->gb, 8);
                         SKIP_COUNTER(re, &s->gb, 1+6+8);
-                    }else{                        
+                    }else{
                         int sign;
                         last=  SHOW_UBITS(re, &s->gb, 1); SKIP_BITS(re, &s->gb, 1);
                         if(!s->esc3_level_length){
@@ -1777,13 +1777,13 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
 //printf("level length:%d, run length: %d\n", ll, s->esc3_run_length);
                             UPDATE_CACHE(re, &s->gb);
                         }
-                        run=   SHOW_UBITS(re, &s->gb, s->esc3_run_length); 
+                        run=   SHOW_UBITS(re, &s->gb, s->esc3_run_length);
                         SKIP_BITS(re, &s->gb, s->esc3_run_length);
-                        
-                        sign=  SHOW_UBITS(re, &s->gb, 1); 
+
+                        sign=  SHOW_UBITS(re, &s->gb, 1);
                         SKIP_BITS(re, &s->gb, 1);
-                        
-                        level= SHOW_UBITS(re, &s->gb, s->esc3_level_length); 
+
+                        level= SHOW_UBITS(re, &s->gb, s->esc3_level_length);
                         SKIP_BITS(re, &s->gb, s->esc3_level_length);
                         if(sign) level= -level;
                     }
@@ -1808,8 +1808,8 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
                         }
                     }
 #endif
-		    //level = level * qmul + (level>0) * qadd - (level<=0) * qadd ;
-		    if (level>0) level= level * qmul + qadd;
+                    //level = level * qmul + (level>0) * qadd - (level<=0) * qadd ;
+                    if (level>0) level= level * qmul + qadd;
                     else         level= level * qmul - qadd;
 #if 0 // waste of time too :(
                     if(level>2048 || level<-2048){
@@ -1905,7 +1905,7 @@ static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
     }
     if(s->msmpeg4_version>=4 && i>0) i=63; //FIXME/XXX optimize
     s->block_last_index[n] = i;
-    
+
     return 0;
 }
 
@@ -1919,7 +1919,7 @@ static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
         } else {
             level = get_vlc2(&s->gb, v2_dc_chroma_vlc.table, DC_VLC_BITS, 3);
         }
-        if (level < 0) 
+        if (level < 0)
             return -1;
         level-=256;
     }else{  //FIXME optimize use unified tables & index
@@ -1947,7 +1947,7 @@ static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
         int32_t *dc_val;
         pred = msmpeg4v1_pred_dc(s, n, &dc_val);
         level += pred;
-        
+
         /* update predictor */
         *dc_val= level;
     }else{
@@ -1966,7 +1966,7 @@ static int msmpeg4_decode_dc(MpegEncContext * s, int n, int *dir_ptr)
     return level;
 }
 
-static int msmpeg4_decode_motion(MpegEncContext * s, 
+static int msmpeg4_decode_motion(MpegEncContext * s,
                                  int *mx_ptr, int *my_ptr)
 {
     MVTable *mv;
@@ -2007,6 +2007,6 @@ static int msmpeg4_decode_motion(MpegEncContext * s,
 
 /* cleanest way to support it
  * there is too much shared between versions so that we cant have 1 file per version & 1 common
- * as allmost everything would be in the common file 
+ * as allmost everything would be in the common file
  */
 #include "wmv2.c"
diff --git a/src/libffmpeg/libavcodec/msmpeg4data.h b/src/libffmpeg/libavcodec/msmpeg4data.h
index 3b6940695..1fbd8aadf 100644
--- a/src/libffmpeg/libavcodec/msmpeg4data.h
+++ b/src/libffmpeg/libavcodec/msmpeg4data.h
@@ -1,6 +1,6 @@
 /**
  * @file msmpeg4data.h
- * MSMPEG4 data tables. 
+ * MSMPEG4 data tables.
  */
 
 /* intra picture macro block coded block pattern */
@@ -576,11 +576,11 @@ extern const uint8_t DCtab_chrom[13][2];
 extern const uint8_t cbpy_tab[16][2];
 extern const uint8_t mvtab[33][2];
 
-extern const uint8_t intra_MCBPC_code[8];
-extern const uint8_t intra_MCBPC_bits[8];
+extern const uint8_t intra_MCBPC_code[9];
+extern const uint8_t intra_MCBPC_bits[9];
 
-extern const uint8_t inter_MCBPC_code[25];
-extern const uint8_t inter_MCBPC_bits[25];
+extern const uint8_t inter_MCBPC_code[28];
+extern const uint8_t inter_MCBPC_bits[28];
 
 #define NB_RL_TABLES  6
 
@@ -1821,44 +1821,44 @@ static const uint8_t old_ff_c_dc_scale_table[32]={
 #define WMV1_SCANTABLE_COUNT 4
 
 static const uint8_t wmv1_scantable00[64]= {
-0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11, 
-0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28, 
-0x30, 0x38, 0x29, 0x21, 0x1A, 0x13, 0x0C, 0x05, 
-0x06, 0x0D, 0x14, 0x1B, 0x22, 0x31, 0x39, 0x3A, 
-0x32, 0x2A, 0x23, 0x1C, 0x15, 0x0E, 0x07, 0x0F, 
-0x16, 0x1D, 0x24, 0x2B, 0x33, 0x3B, 0x3C, 0x34, 
-0x2C, 0x25, 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x35, 
-0x3D, 0x3E, 0x36, 0x2E, 0x27, 0x2F, 0x37, 0x3F, 
+0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11,
+0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28,
+0x30, 0x38, 0x29, 0x21, 0x1A, 0x13, 0x0C, 0x05,
+0x06, 0x0D, 0x14, 0x1B, 0x22, 0x31, 0x39, 0x3A,
+0x32, 0x2A, 0x23, 0x1C, 0x15, 0x0E, 0x07, 0x0F,
+0x16, 0x1D, 0x24, 0x2B, 0x33, 0x3B, 0x3C, 0x34,
+0x2C, 0x25, 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x35,
+0x3D, 0x3E, 0x36, 0x2E, 0x27, 0x2F, 0x37, 0x3F,
 };
 static const uint8_t wmv1_scantable01[64]= {
-0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11, 
-0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28, 
-0x21, 0x30, 0x1A, 0x13, 0x0C, 0x05, 0x06, 0x0D, 
-0x14, 0x1B, 0x22, 0x29, 0x38, 0x31, 0x39, 0x2A, 
-0x23, 0x1C, 0x15, 0x0E, 0x07, 0x0F, 0x16, 0x1D, 
-0x24, 0x2B, 0x32, 0x3A, 0x33, 0x3B, 0x2C, 0x25, 
-0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3C, 0x35, 
-0x3D, 0x2E, 0x27, 0x2F, 0x36, 0x3E, 0x37, 0x3F, 
+0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11,
+0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28,
+0x21, 0x30, 0x1A, 0x13, 0x0C, 0x05, 0x06, 0x0D,
+0x14, 0x1B, 0x22, 0x29, 0x38, 0x31, 0x39, 0x2A,
+0x23, 0x1C, 0x15, 0x0E, 0x07, 0x0F, 0x16, 0x1D,
+0x24, 0x2B, 0x32, 0x3A, 0x33, 0x3B, 0x2C, 0x25,
+0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3C, 0x35,
+0x3D, 0x2E, 0x27, 0x2F, 0x36, 0x3E, 0x37, 0x3F,
 };
 static const uint8_t wmv1_scantable02[64]= {
-0x00, 0x01, 0x08, 0x02, 0x03, 0x09, 0x10, 0x18, 
-0x11, 0x0A, 0x04, 0x05, 0x0B, 0x12, 0x19, 0x20, 
-0x28, 0x30, 0x21, 0x1A, 0x13, 0x0C, 0x06, 0x07, 
-0x0D, 0x14, 0x1B, 0x22, 0x29, 0x38, 0x31, 0x39, 
-0x2A, 0x23, 0x1C, 0x15, 0x0E, 0x0F, 0x16, 0x1D, 
-0x24, 0x2B, 0x32, 0x3A, 0x33, 0x2C, 0x25, 0x1E, 
-0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3B, 0x3C, 0x35, 
-0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F, 
+0x00, 0x01, 0x08, 0x02, 0x03, 0x09, 0x10, 0x18,
+0x11, 0x0A, 0x04, 0x05, 0x0B, 0x12, 0x19, 0x20,
+0x28, 0x30, 0x21, 0x1A, 0x13, 0x0C, 0x06, 0x07,
+0x0D, 0x14, 0x1B, 0x22, 0x29, 0x38, 0x31, 0x39,
+0x2A, 0x23, 0x1C, 0x15, 0x0E, 0x0F, 0x16, 0x1D,
+0x24, 0x2B, 0x32, 0x3A, 0x33, 0x2C, 0x25, 0x1E,
+0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3B, 0x3C, 0x35,
+0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F,
 };
 static const uint8_t wmv1_scantable03[64]= {
-0x00, 0x08, 0x10, 0x01, 0x18, 0x20, 0x28, 0x09, 
-0x02, 0x03, 0x0A, 0x11, 0x19, 0x30, 0x38, 0x29, 
-0x21, 0x1A, 0x12, 0x0B, 0x04, 0x05, 0x0C, 0x13, 
-0x1B, 0x22, 0x31, 0x39, 0x32, 0x2A, 0x23, 0x1C, 
-0x14, 0x0D, 0x06, 0x07, 0x0E, 0x15, 0x1D, 0x24, 
-0x2B, 0x33, 0x3A, 0x3B, 0x34, 0x2C, 0x25, 0x1E, 
-0x16, 0x0F, 0x17, 0x1F, 0x26, 0x2D, 0x3C, 0x35, 
-0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F, 
+0x00, 0x08, 0x10, 0x01, 0x18, 0x20, 0x28, 0x09,
+0x02, 0x03, 0x0A, 0x11, 0x19, 0x30, 0x38, 0x29,
+0x21, 0x1A, 0x12, 0x0B, 0x04, 0x05, 0x0C, 0x13,
+0x1B, 0x22, 0x31, 0x39, 0x32, 0x2A, 0x23, 0x1C,
+0x14, 0x0D, 0x06, 0x07, 0x0E, 0x15, 0x1D, 0x24,
+0x2B, 0x33, 0x3A, 0x3B, 0x34, 0x2C, 0x25, 0x1E,
+0x16, 0x0F, 0x17, 0x1F, 0x26, 0x2D, 0x3C, 0x35,
+0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F,
 };
 
 static const uint8_t *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={
@@ -1878,108 +1878,108 @@ static const uint8_t table_inter_intra[4][2]={
 #define WMV2_INTER_CBP_TABLE_COUNT 4
 
 static const uint32_t table_mb_non_intra2[128][2] = {
-{0x0000A7, 14}, {0x01B2B8, 18}, {0x01B28E, 18}, {0x036575, 19}, 
-{0x006CAC, 16}, {0x000A69, 18}, {0x002934, 20}, {0x00526B, 21}, 
-{0x006CA1, 16}, {0x01B2B9, 18}, {0x0029AD, 20}, {0x029353, 24}, 
-{0x006CA7, 16}, {0x006CAB, 16}, {0x01B2BB, 18}, {0x00029B, 16}, 
-{0x00D944, 17}, {0x000A6A, 18}, {0x0149A8, 23}, {0x03651F, 19}, 
-{0x006CAF, 16}, {0x000A4C, 18}, {0x03651E, 19}, {0x000A48, 18}, 
-{0x00299C, 20}, {0x00299F, 20}, {0x029352, 24}, {0x0029AC, 20}, 
-{0x000296, 16}, {0x00D946, 17}, {0x000A68, 18}, {0x000298, 16}, 
-{0x000527, 17}, {0x00D94D, 17}, {0x0014D7, 19}, {0x036574, 19}, 
-{0x000A5C, 18}, {0x01B299, 18}, {0x00299D, 20}, {0x00299E, 20}, 
-{0x000525, 17}, {0x000A66, 18}, {0x00A4D5, 22}, {0x00149B, 19}, 
-{0x000295, 16}, {0x006CAD, 16}, {0x000A49, 18}, {0x000521, 17}, 
-{0x006CAA, 16}, {0x00D945, 17}, {0x01B298, 18}, {0x00052F, 17}, 
-{0x003654, 15}, {0x006CA0, 16}, {0x000532, 17}, {0x000291, 16}, 
-{0x003652, 15}, {0x000520, 17}, {0x000A5D, 18}, {0x000294, 16}, 
-{0x00009B, 11}, {0x0006E2, 12}, {0x000028, 12}, {0x0001B0, 10}, 
-{0x000001,  3}, {0x000010,  8}, {0x00002F,  6}, {0x00004C, 10}, 
-{0x00000D,  4}, {0x000000, 10}, {0x000006,  9}, {0x000134, 12}, 
-{0x00000C,  4}, {0x000007, 10}, {0x000007,  9}, {0x0006E1, 12}, 
-{0x00000E,  5}, {0x0000DA,  9}, {0x000022,  9}, {0x000364, 11}, 
-{0x00000F,  4}, {0x000006, 10}, {0x00000F,  9}, {0x000135, 12}, 
-{0x000014,  5}, {0x0000DD,  9}, {0x000004,  9}, {0x000015, 11}, 
-{0x00001A,  6}, {0x0001B3, 10}, {0x000005, 10}, {0x0006E3, 12}, 
-{0x00000C,  5}, {0x0000B9,  8}, {0x000004,  8}, {0x0000DB,  9}, 
-{0x00000E,  4}, {0x00000B, 10}, {0x000023,  9}, {0x0006CB, 12}, 
-{0x000005,  6}, {0x0001B1, 10}, {0x000001, 10}, {0x0006E0, 12}, 
-{0x000011,  5}, {0x0000DF,  9}, {0x00000E,  9}, {0x000373, 11}, 
-{0x000003,  5}, {0x0000B8,  8}, {0x000006,  8}, {0x000175,  9}, 
-{0x000015,  5}, {0x000174,  9}, {0x000027,  9}, {0x000372, 11}, 
-{0x000010,  5}, {0x0000BB,  8}, {0x000005,  8}, {0x0000DE,  9}, 
-{0x00000F,  5}, {0x000001,  9}, {0x000012,  8}, {0x000004, 10}, 
-{0x000002,  3}, {0x000016,  5}, {0x000009,  4}, {0x000001,  5}, 
+{0x0000A7, 14}, {0x01B2B8, 18}, {0x01B28E, 18}, {0x036575, 19},
+{0x006CAC, 16}, {0x000A69, 18}, {0x002934, 20}, {0x00526B, 21},
+{0x006CA1, 16}, {0x01B2B9, 18}, {0x0029AD, 20}, {0x029353, 24},
+{0x006CA7, 16}, {0x006CAB, 16}, {0x01B2BB, 18}, {0x00029B, 16},
+{0x00D944, 17}, {0x000A6A, 18}, {0x0149A8, 23}, {0x03651F, 19},
+{0x006CAF, 16}, {0x000A4C, 18}, {0x03651E, 19}, {0x000A48, 18},
+{0x00299C, 20}, {0x00299F, 20}, {0x029352, 24}, {0x0029AC, 20},
+{0x000296, 16}, {0x00D946, 17}, {0x000A68, 18}, {0x000298, 16},
+{0x000527, 17}, {0x00D94D, 17}, {0x0014D7, 19}, {0x036574, 19},
+{0x000A5C, 18}, {0x01B299, 18}, {0x00299D, 20}, {0x00299E, 20},
+{0x000525, 17}, {0x000A66, 18}, {0x00A4D5, 22}, {0x00149B, 19},
+{0x000295, 16}, {0x006CAD, 16}, {0x000A49, 18}, {0x000521, 17},
+{0x006CAA, 16}, {0x00D945, 17}, {0x01B298, 18}, {0x00052F, 17},
+{0x003654, 15}, {0x006CA0, 16}, {0x000532, 17}, {0x000291, 16},
+{0x003652, 15}, {0x000520, 17}, {0x000A5D, 18}, {0x000294, 16},
+{0x00009B, 11}, {0x0006E2, 12}, {0x000028, 12}, {0x0001B0, 10},
+{0x000001,  3}, {0x000010,  8}, {0x00002F,  6}, {0x00004C, 10},
+{0x00000D,  4}, {0x000000, 10}, {0x000006,  9}, {0x000134, 12},
+{0x00000C,  4}, {0x000007, 10}, {0x000007,  9}, {0x0006E1, 12},
+{0x00000E,  5}, {0x0000DA,  9}, {0x000022,  9}, {0x000364, 11},
+{0x00000F,  4}, {0x000006, 10}, {0x00000F,  9}, {0x000135, 12},
+{0x000014,  5}, {0x0000DD,  9}, {0x000004,  9}, {0x000015, 11},
+{0x00001A,  6}, {0x0001B3, 10}, {0x000005, 10}, {0x0006E3, 12},
+{0x00000C,  5}, {0x0000B9,  8}, {0x000004,  8}, {0x0000DB,  9},
+{0x00000E,  4}, {0x00000B, 10}, {0x000023,  9}, {0x0006CB, 12},
+{0x000005,  6}, {0x0001B1, 10}, {0x000001, 10}, {0x0006E0, 12},
+{0x000011,  5}, {0x0000DF,  9}, {0x00000E,  9}, {0x000373, 11},
+{0x000003,  5}, {0x0000B8,  8}, {0x000006,  8}, {0x000175,  9},
+{0x000015,  5}, {0x000174,  9}, {0x000027,  9}, {0x000372, 11},
+{0x000010,  5}, {0x0000BB,  8}, {0x000005,  8}, {0x0000DE,  9},
+{0x00000F,  5}, {0x000001,  9}, {0x000012,  8}, {0x000004, 10},
+{0x000002,  3}, {0x000016,  5}, {0x000009,  4}, {0x000001,  5},
 };
 
 static const uint32_t table_mb_non_intra3[128][2] = {
-{0x0002A1, 10}, {0x005740, 15}, {0x01A0BF, 18}, {0x015D19, 17}, 
-{0x001514, 13}, {0x00461E, 15}, {0x015176, 17}, {0x015177, 17}, 
-{0x0011AD, 13}, {0x00682E, 16}, {0x0682F9, 20}, {0x03417D, 19}, 
-{0x001A36, 14}, {0x002A2D, 14}, {0x00D05E, 17}, {0x006824, 16}, 
-{0x001515, 13}, {0x00545C, 15}, {0x0230E9, 18}, {0x011AFA, 17}, 
-{0x0015D7, 13}, {0x005747, 15}, {0x008D79, 16}, {0x006825, 16}, 
-{0x002BA2, 14}, {0x00A8BA, 16}, {0x0235F6, 18}, {0x015D18, 17}, 
-{0x0011AE, 13}, {0x00346F, 15}, {0x008C3B, 16}, {0x00346E, 15}, 
-{0x000D1A, 13}, {0x00461F, 15}, {0x0682F8, 20}, {0x011875, 17}, 
-{0x002BA1, 14}, {0x008D61, 16}, {0x0235F7, 18}, {0x0230E8, 18}, 
-{0x001513, 13}, {0x008D7B, 16}, {0x011AF4, 17}, {0x011AF5, 17}, 
-{0x001185, 13}, {0x0046BF, 15}, {0x008D60, 16}, {0x008D7C, 16}, 
-{0x001512, 13}, {0x00461C, 15}, {0x00AE8D, 16}, {0x008D78, 16}, 
-{0x000D0E, 13}, {0x003413, 15}, {0x0046B1, 15}, {0x003416, 15}, 
-{0x000AEA, 12}, {0x002A2C, 14}, {0x005741, 15}, {0x002A2F, 14}, 
-{0x000158,  9}, {0x0008D2, 12}, {0x00054C, 11}, {0x000686, 12}, 
-{0x000000,  2}, {0x000069,  8}, {0x00006B,  8}, {0x00068C, 12}, 
-{0x000007,  3}, {0x00015E,  9}, {0x0002A3, 10}, {0x000AE9, 12}, 
-{0x000006,  3}, {0x000231, 10}, {0x0002B8, 10}, {0x001A08, 14}, 
-{0x000010,  5}, {0x0001A9, 10}, {0x000342, 11}, {0x000A88, 12}, 
-{0x000004,  4}, {0x0001A2, 10}, {0x0002A4, 10}, {0x001184, 13}, 
-{0x000012,  5}, {0x000232, 10}, {0x0002B2, 10}, {0x000680, 12}, 
-{0x00001B,  6}, {0x00046A, 11}, {0x00068E, 12}, {0x002359, 14}, 
-{0x000016,  5}, {0x00015F,  9}, {0x0002A0, 10}, {0x00054D, 11}, 
-{0x000005,  4}, {0x000233, 10}, {0x0002B9, 10}, {0x0015D6, 13}, 
-{0x000022,  6}, {0x000468, 11}, {0x000683, 12}, {0x001A0A, 14}, 
-{0x000013,  5}, {0x000236, 10}, {0x0002BB, 10}, {0x001186, 13}, 
-{0x000017,  5}, {0x0001AB, 10}, {0x0002A7, 10}, {0x0008D3, 12}, 
-{0x000014,  5}, {0x000237, 10}, {0x000460, 11}, {0x000D0F, 13}, 
-{0x000019,  6}, {0x0001AA, 10}, {0x0002B3, 10}, {0x000681, 12}, 
-{0x000018,  6}, {0x0001A8, 10}, {0x0002A5, 10}, {0x00068F, 12}, 
-{0x000007,  4}, {0x000055,  7}, {0x000047,  7}, {0x0000AD,  8}, 
+{0x0002A1, 10}, {0x005740, 15}, {0x01A0BF, 18}, {0x015D19, 17},
+{0x001514, 13}, {0x00461E, 15}, {0x015176, 17}, {0x015177, 17},
+{0x0011AD, 13}, {0x00682E, 16}, {0x0682F9, 20}, {0x03417D, 19},
+{0x001A36, 14}, {0x002A2D, 14}, {0x00D05E, 17}, {0x006824, 16},
+{0x001515, 13}, {0x00545C, 15}, {0x0230E9, 18}, {0x011AFA, 17},
+{0x0015D7, 13}, {0x005747, 15}, {0x008D79, 16}, {0x006825, 16},
+{0x002BA2, 14}, {0x00A8BA, 16}, {0x0235F6, 18}, {0x015D18, 17},
+{0x0011AE, 13}, {0x00346F, 15}, {0x008C3B, 16}, {0x00346E, 15},
+{0x000D1A, 13}, {0x00461F, 15}, {0x0682F8, 20}, {0x011875, 17},
+{0x002BA1, 14}, {0x008D61, 16}, {0x0235F7, 18}, {0x0230E8, 18},
+{0x001513, 13}, {0x008D7B, 16}, {0x011AF4, 17}, {0x011AF5, 17},
+{0x001185, 13}, {0x0046BF, 15}, {0x008D60, 16}, {0x008D7C, 16},
+{0x001512, 13}, {0x00461C, 15}, {0x00AE8D, 16}, {0x008D78, 16},
+{0x000D0E, 13}, {0x003413, 15}, {0x0046B1, 15}, {0x003416, 15},
+{0x000AEA, 12}, {0x002A2C, 14}, {0x005741, 15}, {0x002A2F, 14},
+{0x000158,  9}, {0x0008D2, 12}, {0x00054C, 11}, {0x000686, 12},
+{0x000000,  2}, {0x000069,  8}, {0x00006B,  8}, {0x00068C, 12},
+{0x000007,  3}, {0x00015E,  9}, {0x0002A3, 10}, {0x000AE9, 12},
+{0x000006,  3}, {0x000231, 10}, {0x0002B8, 10}, {0x001A08, 14},
+{0x000010,  5}, {0x0001A9, 10}, {0x000342, 11}, {0x000A88, 12},
+{0x000004,  4}, {0x0001A2, 10}, {0x0002A4, 10}, {0x001184, 13},
+{0x000012,  5}, {0x000232, 10}, {0x0002B2, 10}, {0x000680, 12},
+{0x00001B,  6}, {0x00046A, 11}, {0x00068E, 12}, {0x002359, 14},
+{0x000016,  5}, {0x00015F,  9}, {0x0002A0, 10}, {0x00054D, 11},
+{0x000005,  4}, {0x000233, 10}, {0x0002B9, 10}, {0x0015D6, 13},
+{0x000022,  6}, {0x000468, 11}, {0x000683, 12}, {0x001A0A, 14},
+{0x000013,  5}, {0x000236, 10}, {0x0002BB, 10}, {0x001186, 13},
+{0x000017,  5}, {0x0001AB, 10}, {0x0002A7, 10}, {0x0008D3, 12},
+{0x000014,  5}, {0x000237, 10}, {0x000460, 11}, {0x000D0F, 13},
+{0x000019,  6}, {0x0001AA, 10}, {0x0002B3, 10}, {0x000681, 12},
+{0x000018,  6}, {0x0001A8, 10}, {0x0002A5, 10}, {0x00068F, 12},
+{0x000007,  4}, {0x000055,  7}, {0x000047,  7}, {0x0000AD,  8},
 };
 
 static const uint32_t table_mb_non_intra4[128][2] = {
-{0x0000D4,  8}, {0x0021C5, 14}, {0x00F18A, 16}, {0x00D5BC, 16}, 
-{0x000879, 12}, {0x00354D, 14}, {0x010E3F, 17}, {0x010F54, 17}, 
-{0x000866, 12}, {0x00356E, 14}, {0x010F55, 17}, {0x010E3E, 17}, 
-{0x0010CE, 13}, {0x003C84, 14}, {0x00D5BD, 16}, {0x00F18B, 16}, 
-{0x000868, 12}, {0x00438C, 15}, {0x0087AB, 16}, {0x00790B, 15}, 
-{0x000F10, 12}, {0x00433D, 15}, {0x006AD3, 15}, {0x00790A, 15}, 
-{0x001AA7, 13}, {0x0043D4, 15}, {0x00871E, 16}, {0x006ADF, 15}, 
-{0x000D7C, 12}, {0x003C94, 14}, {0x00438D, 15}, {0x006AD2, 15}, 
-{0x0006BC, 11}, {0x0021E9, 14}, {0x006ADA, 15}, {0x006A99, 15}, 
-{0x0010F7, 13}, {0x004389, 15}, {0x006ADB, 15}, {0x0078C4, 15}, 
-{0x000D56, 12}, {0x0035F7, 14}, {0x00438E, 15}, {0x006A98, 15}, 
-{0x000D52, 12}, {0x003C95, 14}, {0x004388, 15}, {0x00433C, 15}, 
-{0x000D54, 12}, {0x001E4B, 13}, {0x003C63, 14}, {0x003C83, 14}, 
-{0x000861, 12}, {0x0021EB, 14}, {0x00356C, 14}, {0x0035F6, 14}, 
-{0x000863, 12}, {0x00219F, 14}, {0x003568, 14}, {0x003C82, 14}, 
-{0x0001AE,  9}, {0x0010C0, 13}, {0x000F11, 12}, {0x001AFA, 13}, 
-{0x000000,  1}, {0x0000F0,  8}, {0x0001AD,  9}, {0x0010C1, 13}, 
-{0x00000A,  4}, {0x0003C5, 10}, {0x000789, 11}, {0x001AB5, 13}, 
-{0x000009,  4}, {0x000435, 11}, {0x000793, 11}, {0x001E40, 13}, 
-{0x00001D,  5}, {0x0003CB, 10}, {0x000878, 12}, {0x001AAF, 13}, 
-{0x00000B,  4}, {0x0003C7, 10}, {0x000791, 11}, {0x001AAB, 13}, 
-{0x00001F,  5}, {0x000436, 11}, {0x0006BF, 11}, {0x000F19, 12}, 
-{0x00003D,  6}, {0x000D51, 12}, {0x0010C4, 13}, {0x0021E8, 14}, 
-{0x000036,  6}, {0x000437, 11}, {0x0006AF, 11}, {0x0010C5, 13}, 
-{0x00000C,  4}, {0x000432, 11}, {0x000794, 11}, {0x001E30, 13}, 
-{0x000042,  7}, {0x000870, 12}, {0x000F24, 12}, {0x001E43, 13}, 
-{0x000020,  6}, {0x00043E, 11}, {0x000795, 11}, {0x001AAA, 13}, 
-{0x000037,  6}, {0x0006AC, 11}, {0x0006AE, 11}, {0x0010F6, 13}, 
-{0x000034,  6}, {0x00043A, 11}, {0x000D50, 12}, {0x001AAE, 13}, 
-{0x000039,  6}, {0x00043F, 11}, {0x00078D, 11}, {0x0010D2, 13}, 
-{0x000038,  6}, {0x00043B, 11}, {0x0006BD, 11}, {0x0010D3, 13}, 
-{0x000011,  5}, {0x0001AC,  9}, {0x0000F3,  8}, {0x000439, 11}, 
+{0x0000D4,  8}, {0x0021C5, 14}, {0x00F18A, 16}, {0x00D5BC, 16},
+{0x000879, 12}, {0x00354D, 14}, {0x010E3F, 17}, {0x010F54, 17},
+{0x000866, 12}, {0x00356E, 14}, {0x010F55, 17}, {0x010E3E, 17},
+{0x0010CE, 13}, {0x003C84, 14}, {0x00D5BD, 16}, {0x00F18B, 16},
+{0x000868, 12}, {0x00438C, 15}, {0x0087AB, 16}, {0x00790B, 15},
+{0x000F10, 12}, {0x00433D, 15}, {0x006AD3, 15}, {0x00790A, 15},
+{0x001AA7, 13}, {0x0043D4, 15}, {0x00871E, 16}, {0x006ADF, 15},
+{0x000D7C, 12}, {0x003C94, 14}, {0x00438D, 15}, {0x006AD2, 15},
+{0x0006BC, 11}, {0x0021E9, 14}, {0x006ADA, 15}, {0x006A99, 15},
+{0x0010F7, 13}, {0x004389, 15}, {0x006ADB, 15}, {0x0078C4, 15},
+{0x000D56, 12}, {0x0035F7, 14}, {0x00438E, 15}, {0x006A98, 15},
+{0x000D52, 12}, {0x003C95, 14}, {0x004388, 15}, {0x00433C, 15},
+{0x000D54, 12}, {0x001E4B, 13}, {0x003C63, 14}, {0x003C83, 14},
+{0x000861, 12}, {0x0021EB, 14}, {0x00356C, 14}, {0x0035F6, 14},
+{0x000863, 12}, {0x00219F, 14}, {0x003568, 14}, {0x003C82, 14},
+{0x0001AE,  9}, {0x0010C0, 13}, {0x000F11, 12}, {0x001AFA, 13},
+{0x000000,  1}, {0x0000F0,  8}, {0x0001AD,  9}, {0x0010C1, 13},
+{0x00000A,  4}, {0x0003C5, 10}, {0x000789, 11}, {0x001AB5, 13},
+{0x000009,  4}, {0x000435, 11}, {0x000793, 11}, {0x001E40, 13},
+{0x00001D,  5}, {0x0003CB, 10}, {0x000878, 12}, {0x001AAF, 13},
+{0x00000B,  4}, {0x0003C7, 10}, {0x000791, 11}, {0x001AAB, 13},
+{0x00001F,  5}, {0x000436, 11}, {0x0006BF, 11}, {0x000F19, 12},
+{0x00003D,  6}, {0x000D51, 12}, {0x0010C4, 13}, {0x0021E8, 14},
+{0x000036,  6}, {0x000437, 11}, {0x0006AF, 11}, {0x0010C5, 13},
+{0x00000C,  4}, {0x000432, 11}, {0x000794, 11}, {0x001E30, 13},
+{0x000042,  7}, {0x000870, 12}, {0x000F24, 12}, {0x001E43, 13},
+{0x000020,  6}, {0x00043E, 11}, {0x000795, 11}, {0x001AAA, 13},
+{0x000037,  6}, {0x0006AC, 11}, {0x0006AE, 11}, {0x0010F6, 13},
+{0x000034,  6}, {0x00043A, 11}, {0x000D50, 12}, {0x001AAE, 13},
+{0x000039,  6}, {0x00043F, 11}, {0x00078D, 11}, {0x0010D2, 13},
+{0x000038,  6}, {0x00043B, 11}, {0x0006BD, 11}, {0x0010D3, 13},
+{0x000011,  5}, {0x0001AC,  9}, {0x0000F3,  8}, {0x000439, 11},
 };
 
 static const uint32_t (*wmv2_inter_table[WMV2_INTER_CBP_TABLE_COUNT])[2]={
@@ -1998,7 +1998,7 @@ static const uint8_t wmv2_scantableA[64]={
 
 static const uint8_t wmv2_scantableB[64]={
 0x00, 0x08, 0x01, 0x10, 0x09, 0x18, 0x11, 0x02,
-0x20, 0x0A, 0x19, 0x28, 0x12, 0x30, 0x21, 0x1A, 
-0x38, 0x29, 0x22, 0x03, 0x31, 0x39, 0x0B, 0x2A, 
+0x20, 0x0A, 0x19, 0x28, 0x12, 0x30, 0x21, 0x1A,
+0x38, 0x29, 0x22, 0x03, 0x31, 0x39, 0x0B, 0x2A,
 0x13, 0x32, 0x1B, 0x3A, 0x23, 0x2B, 0x33, 0x3B,
 };
diff --git a/src/libffmpeg/libavcodec/msrle.c b/src/libffmpeg/libavcodec/msrle.c
index d95e3f79b..7cdbf7c77 100644
--- a/src/libffmpeg/libavcodec/msrle.c
+++ b/src/libffmpeg/libavcodec/msrle.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
diff --git a/src/libffmpeg/libavcodec/msvideo1.c b/src/libffmpeg/libavcodec/msvideo1.c
index 518df0e52..e8524b32e 100644
--- a/src/libffmpeg/libavcodec/msvideo1.c
+++ b/src/libffmpeg/libavcodec/msvideo1.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -156,8 +156,8 @@ static void msvideo1_decode_8bit(Msvideo1Context *s)
 
                 for (pixel_y = 0; pixel_y < 4; pixel_y++) {
                     for (pixel_x = 0; pixel_x < 4; pixel_x++, flags >>= 1)
-                        pixels[pixel_ptr++] = 
-                            colors[((pixel_y & 0x2) << 1) + 
+                        pixels[pixel_ptr++] =
+                            colors[((pixel_y & 0x2) << 1) +
                                 (pixel_x & 0x2) + ((flags & 0x1) ^ 1)];
                     pixel_ptr -= row_dec;
                 }
@@ -266,8 +266,8 @@ static void msvideo1_decode_16bit(Msvideo1Context *s)
 
                     for (pixel_y = 0; pixel_y < 4; pixel_y++) {
                         for (pixel_x = 0; pixel_x < 4; pixel_x++, flags >>= 1)
-                            pixels[pixel_ptr++] = 
-                                colors[((pixel_y & 0x2) << 1) + 
+                            pixels[pixel_ptr++] =
+                                colors[((pixel_y & 0x2) << 1) +
                                     (pixel_x & 0x2) + ((flags & 0x1) ^ 1)];
                         pixel_ptr -= row_dec;
                     }
diff --git a/src/libffmpeg/libavcodec/opt.h b/src/libffmpeg/libavcodec/opt.h
index c84db00fe..058c6b63a 100644
--- a/src/libffmpeg/libavcodec/opt.h
+++ b/src/libffmpeg/libavcodec/opt.h
@@ -28,13 +28,13 @@ typedef struct AVOption {
      * @fixme what about other languages
      */
     const char *help;
-    int offset;             ///< offset to context structure where the parsed value should be stored 
+    int offset;             ///< offset to context structure where the parsed value should be stored
     enum AVOptionType type;
-    
+
     double default_val;
     double min;
     double max;
-    
+
     int flags;
 #define AV_OPT_FLAG_ENCODING_PARAM  1   ///< a generic parameter which can be set by the user for muxing or encoding
 #define AV_OPT_FLAG_DECODING_PARAM  2   ///< a generic parameter which can be set by the user for demuxing or decoding
diff --git a/src/libffmpeg/libavcodec/parser.c b/src/libffmpeg/libavcodec/parser.c
index 06cb7d177..412cd8359 100644
--- a/src/libffmpeg/libavcodec/parser.c
+++ b/src/libffmpeg/libavcodec/parser.c
@@ -15,7 +15,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "avcodec.h"
 #include "mpegvideo.h"
@@ -34,7 +34,7 @@ AVCodecParserContext *av_parser_init(int codec_id)
     AVCodecParserContext *s;
     AVCodecParser *parser;
     int ret;
-    
+
     if(codec_id == CODEC_ID_NONE)
         return NULL;
 
@@ -71,15 +71,15 @@ AVCodecParserContext *av_parser_init(int codec_id)
 
 /* NOTE: buf_size == 0 is used to signal EOF so that the last frame
    can be returned if necessary */
-int av_parser_parse(AVCodecParserContext *s, 
+int av_parser_parse(AVCodecParserContext *s,
                     AVCodecContext *avctx,
-                    uint8_t **poutbuf, int *poutbuf_size, 
+                    uint8_t **poutbuf, int *poutbuf_size,
                     const uint8_t *buf, int buf_size,
                     int64_t pts, int64_t dts)
 {
     int index, i, k;
     uint8_t dummy_buf[FF_INPUT_BUFFER_PADDING_SIZE];
-    
+
     if (buf_size == 0) {
         /* padding is always necessary even if EOF, so we add it here */
         memset(dummy_buf, 0, sizeof(dummy_buf));
@@ -111,7 +111,7 @@ int av_parser_parse(AVCodecParserContext *s,
         s->frame_offset = s->last_frame_offset;
         s->pts = s->last_pts;
         s->dts = s->last_dts;
-        
+
         /* offset of the next frame */
         s->last_frame_offset = s->cur_offset + index;
         /* find the packet in which the new frame starts. It
@@ -129,7 +129,7 @@ int av_parser_parse(AVCodecParserContext *s,
 
         s->last_pts = s->cur_frame_pts[k];
         s->last_dts = s->cur_frame_dts[k];
-        
+
         /* some parsers tell us the packet size even before seeing the first byte of the next packet,
            so the next pts/dts is in the next chunk */
         if(index == buf_size){
@@ -148,9 +148,9 @@ int av_parser_parse(AVCodecParserContext *s,
  */
 int av_parser_change(AVCodecParserContext *s,
                      AVCodecContext *avctx,
-                     uint8_t **poutbuf, int *poutbuf_size, 
+                     uint8_t **poutbuf, int *poutbuf_size,
                      const uint8_t *buf, int buf_size, int keyframe){
-   
+
     if(s && s->parser->split){
         if((avctx->flags & CODEC_FLAG_GLOBAL_HEADER) || (avctx->flags2 & CODEC_FLAG2_LOCAL_HEADER)){
             int i= s->parser->split(avctx, buf, buf_size);
@@ -169,7 +169,7 @@ int av_parser_change(AVCodecParserContext *s,
             int size= buf_size + avctx->extradata_size;
             *poutbuf_size= size;
             *poutbuf= av_malloc(size + FF_INPUT_BUFFER_PADDING_SIZE);
-            
+
             memcpy(*poutbuf, avctx->extradata, avctx->extradata_size);
             memcpy((*poutbuf) + avctx->extradata_size, buf, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
             return 1;
@@ -191,11 +191,11 @@ void av_parser_close(AVCodecParserContext *s)
 
 //#define END_NOT_FOUND (-100)
 
-#define PICTURE_START_CODE	0x00000100
-#define SEQ_START_CODE		0x000001b3
-#define EXT_START_CODE		0x000001b5
-#define SLICE_MIN_START_CODE	0x00000101
-#define SLICE_MAX_START_CODE	0x000001af
+#define PICTURE_START_CODE      0x00000100
+#define SEQ_START_CODE          0x000001b3
+#define EXT_START_CODE          0x000001b5
+#define SLICE_MIN_START_CODE    0x00000101
+#define SLICE_MAX_START_CODE    0x000001af
 
 typedef struct ParseContext1{
     ParseContext pc;
@@ -246,7 +246,7 @@ int ff_combine_frame(ParseContext *pc, int next, uint8_t **buf, int *buf_size)
 
     *buf_size=
     pc->overread_index= pc->index + next;
-    
+
     /* append to buffer */
     if(pc->index){
         pc->buffer= av_fast_realloc(pc->buffer, &pc->buffer_size, next + pc->index + FF_INPUT_BUFFER_PADDING_SIZE);
@@ -272,33 +272,11 @@ int ff_combine_frame(ParseContext *pc, int next, uint8_t **buf, int *buf_size)
     return 0;
 }
 
-static int find_start_code(const uint8_t **pbuf_ptr, const uint8_t *buf_end)
-{
-    const uint8_t *buf_ptr;
-    unsigned int state=0xFFFFFFFF, v;
-    int val;
-
-    buf_ptr = *pbuf_ptr;
-    while (buf_ptr < buf_end) {
-        v = *buf_ptr++;
-        if (state == 0x000001) {
-            state = ((state << 8) | v) & 0xffffff;
-            val = state;
-            goto found;
-        }
-        state = ((state << 8) | v) & 0xffffff;
-    }
-    val = -1;
- found:
-    *pbuf_ptr = buf_ptr;
-    return val;
-}
-
 /* XXX: merge with libavcodec ? */
 #define MPEG1_FRAME_RATE_BASE 1001
 
 static const int frame_rate_tab[16] = {
-        0,        
+        0,
     24000,
     24024,
     25025,
@@ -320,7 +298,7 @@ static const int frame_rate_tab[16] = {
 };
 
 //FIXME move into mpeg12.c
-static void mpegvideo_extract_headers(AVCodecParserContext *s, 
+static void mpegvideo_extract_headers(AVCodecParserContext *s,
                                       AVCodecContext *avctx,
                                       const uint8_t *buf, int buf_size)
 {
@@ -335,7 +313,8 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s,
     s->repeat_pict = 0;
     buf_end = buf + buf_size;
     while (buf < buf_end) {
-        start_code = find_start_code(&buf, buf_end);
+        start_code= -1;
+        buf= ff_find_start_code(buf, buf_end, &start_code);
         bytes_left = buf_end - buf;
         switch(start_code) {
         case PICTURE_START_CODE:
@@ -386,7 +365,7 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s,
                         top_field_first = buf[3] & (1 << 7);
                         repeat_first_field = buf[3] & (1 << 1);
                         progressive_frame = buf[4] & (1 << 7);
-                    
+
                         /* check if we must repeat the frame */
                         if (repeat_first_field) {
                             if (pc->progressive_sequence) {
@@ -398,8 +377,8 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s,
                                 s->repeat_pict = 1;
                             }
                         }
-                        
-                        /* the packet only represents half a frame 
+
+                        /* the packet only represents half a frame
                            XXX,FIXME maybe find a different solution */
                         if(picture_structure != 3)
                             s->repeat_pict = -1;
@@ -413,7 +392,7 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s,
         default:
             /* we stop parsing when we encounter a slice. It ensures
                that this function takes a negligible amount of time */
-            if (start_code >= SLICE_MIN_START_CODE && 
+            if (start_code >= SLICE_MIN_START_CODE &&
                 start_code <= SLICE_MAX_START_CODE)
                 goto the_end;
             break;
@@ -424,31 +403,31 @@ static void mpegvideo_extract_headers(AVCodecParserContext *s,
 
 static int mpegvideo_parse(AVCodecParserContext *s,
                            AVCodecContext *avctx,
-                           uint8_t **poutbuf, int *poutbuf_size, 
+                           uint8_t **poutbuf, int *poutbuf_size,
                            const uint8_t *buf, int buf_size)
 {
     ParseContext1 *pc1 = s->priv_data;
     ParseContext *pc= &pc1->pc;
     int next;
-   
+
     if(s->flags & PARSER_FLAG_COMPLETE_FRAMES){
         next= buf_size;
     }else{
         next= ff_mpeg1_find_frame_end(pc, buf, buf_size);
-        
+
         if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
             *poutbuf = NULL;
             *poutbuf_size = 0;
             return buf_size;
         }
-       
+
     }
     /* we have a full frame : we just parse the first few MPEG headers
        to have the full timing information. The time take by this
        function should be negligible for uncorrupted streams */
     mpegvideo_extract_headers(s, avctx, buf, buf_size);
 #if 0
-    printf("pict_type=%d frame_rate=%0.3f repeat_pict=%d\n", 
+    printf("pict_type=%d frame_rate=%0.3f repeat_pict=%d\n",
            s->pict_type, (double)avctx->time_base.den / avctx->time_base.num, s->repeat_pict);
 #endif
 
@@ -462,7 +441,7 @@ static int mpegvideo_split(AVCodecContext *avctx,
 {
     int i;
     uint32_t state= -1;
-    
+
     for(i=0; i<buf_size; i++){
         state= (state<<8) | buf[i];
         if(state != 0x1B3 && state != 0x1B5 && state < 0x200 && state >= 0x100)
@@ -490,7 +469,7 @@ static void parse1_close(AVCodecParserContext *s)
 
 /* used by parser */
 /* XXX: make it use less memory */
-static int av_mpeg4_decode_header(AVCodecParserContext *s1, 
+static int av_mpeg4_decode_header(AVCodecParserContext *s1,
                                   AVCodecContext *avctx,
                                   const uint8_t *buf, int buf_size)
 {
@@ -530,17 +509,17 @@ static int mpeg4video_parse_init(AVCodecParserContext *s)
 
 static int mpeg4video_parse(AVCodecParserContext *s,
                            AVCodecContext *avctx,
-                           uint8_t **poutbuf, int *poutbuf_size, 
+                           uint8_t **poutbuf, int *poutbuf_size,
                            const uint8_t *buf, int buf_size)
 {
     ParseContext *pc = s->priv_data;
     int next;
-    
+
     if(s->flags & PARSER_FLAG_COMPLETE_FRAMES){
         next= buf_size;
     }else{
         next= ff_mpeg4_find_frame_end(pc, buf, buf_size);
-    
+
         if (ff_combine_frame(pc, next, (uint8_t **)&buf, &buf_size) < 0) {
             *poutbuf = NULL;
             *poutbuf_size = 0;
@@ -559,7 +538,7 @@ static int mpeg4video_split(AVCodecContext *avctx,
 {
     int i;
     uint32_t state= -1;
-    
+
     for(i=0; i<buf_size; i++){
         state= (state<<8) | buf[i];
         if(state == 0x1B3 || state == 0x1B6)
@@ -571,7 +550,7 @@ static int mpeg4video_split(AVCodecContext *avctx,
 /*************************/
 
 typedef struct MpegAudioParseContext {
-    uint8_t inbuf[MPA_MAX_CODED_FRAME_SIZE];	/* input buffer */
+    uint8_t inbuf[MPA_MAX_CODED_FRAME_SIZE];    /* input buffer */
     uint8_t *inbuf_ptr;
     int frame_size;
     int free_format_frame_size;
@@ -596,7 +575,7 @@ static int mpegaudio_parse_init(AVCodecParserContext *s1)
 
 static int mpegaudio_parse(AVCodecParserContext *s1,
                            AVCodecContext *avctx,
-                           uint8_t **poutbuf, int *poutbuf_size, 
+                           uint8_t **poutbuf, int *poutbuf_size,
                            const uint8_t *buf, int buf_size)
 {
     MpegAudioParseContext *s = s1->priv_data;
@@ -608,8 +587,8 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
     *poutbuf_size = 0;
     buf_ptr = buf;
     while (buf_size > 0) {
-	len = s->inbuf_ptr - s->inbuf;
-	if (s->frame_size == 0) {
+        len = s->inbuf_ptr - s->inbuf;
+        if (s->frame_size == 0) {
             /* special case for next header for first frame in free
                format case (XXX: find a simpler method) */
             if (s->free_format_next_header != 0) {
@@ -621,62 +600,62 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
                 s->free_format_next_header = 0;
                 goto got_header;
             }
-	    /* no header seen : find one. We need at least MPA_HEADER_SIZE
+            /* no header seen : find one. We need at least MPA_HEADER_SIZE
                bytes to parse it */
-	    len = MPA_HEADER_SIZE - len;
-	    if (len > buf_size)
-		len = buf_size;
-	    if (len > 0) {
-		memcpy(s->inbuf_ptr, buf_ptr, len);
-		buf_ptr += len;
-		buf_size -= len;
-		s->inbuf_ptr += len;
-	    }
-	    if ((s->inbuf_ptr - s->inbuf) >= MPA_HEADER_SIZE) {
+            len = MPA_HEADER_SIZE - len;
+            if (len > buf_size)
+                len = buf_size;
+            if (len > 0) {
+                memcpy(s->inbuf_ptr, buf_ptr, len);
+                buf_ptr += len;
+                buf_size -= len;
+                s->inbuf_ptr += len;
+            }
+            if ((s->inbuf_ptr - s->inbuf) >= MPA_HEADER_SIZE) {
             got_header:
                 sr= avctx->sample_rate;
-		header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
-		    (s->inbuf[2] << 8) | s->inbuf[3];
+                header = (s->inbuf[0] << 24) | (s->inbuf[1] << 16) |
+                    (s->inbuf[2] << 8) | s->inbuf[3];
 
                 ret = mpa_decode_header(avctx, header);
                 if (ret < 0) {
                     s->header_count= -2;
-		    /* no sync found : move by one byte (inefficient, but simple!) */
-		    memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
-		    s->inbuf_ptr--;
+                    /* no sync found : move by one byte (inefficient, but simple!) */
+                    memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
+                    s->inbuf_ptr--;
                     dprintf("skip %x\n", header);
                     /* reset free format frame size to give a chance
                        to get a new bitrate */
                     s->free_format_frame_size = 0;
-		} else {
+                } else {
                     if((header&SAME_HEADER_MASK) != (s->header&SAME_HEADER_MASK) && s->header)
                         s->header_count= -3;
                     s->header= header;
                     s->header_count++;
                     s->frame_size = ret;
-                    
+
 #if 0
                     /* free format: prepare to compute frame size */
-		    if (decode_header(s, header) == 1) {
-			s->frame_size = -1;
+                    if (decode_header(s, header) == 1) {
+                        s->frame_size = -1;
                     }
 #endif
-		}
+                }
                 if(s->header_count <= 0)
                     avctx->sample_rate= sr; //FIXME ugly
-	    }
-        } else 
+            }
+        } else
 #if 0
         if (s->frame_size == -1) {
             /* free format : find next sync to compute frame size */
-	    len = MPA_MAX_CODED_FRAME_SIZE - len;
-	    if (len > buf_size)
-		len = buf_size;
+            len = MPA_MAX_CODED_FRAME_SIZE - len;
+            if (len > buf_size)
+                len = buf_size;
             if (len == 0) {
-		/* frame too long: resync */
+                /* frame too long: resync */
                 s->frame_size = 0;
-		memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
-		s->inbuf_ptr--;
+                memmove(s->inbuf, s->inbuf + 1, s->inbuf_ptr - s->inbuf - 1);
+                s->inbuf_ptr--;
             } else {
                 uint8_t *p, *pend;
                 uint32_t header1;
@@ -708,7 +687,7 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
                             s->free_format_frame_size -= padding * 4;
                         else
                             s->free_format_frame_size -= padding;
-                        dprintf("free frame size=%d padding=%d\n", 
+                        dprintf("free frame size=%d padding=%d\n",
                                 s->free_format_frame_size, padding);
                         decode_header(s, header1);
                         goto next_data;
@@ -720,52 +699,134 @@ static int mpegaudio_parse(AVCodecParserContext *s1,
                 s->inbuf_ptr += len;
                 buf_size -= len;
             }
-	} else 
+        } else
 #endif
         if (len < s->frame_size) {
             if (s->frame_size > MPA_MAX_CODED_FRAME_SIZE)
                 s->frame_size = MPA_MAX_CODED_FRAME_SIZE;
-	    len = s->frame_size - len;
-	    if (len > buf_size)
-		len = buf_size;
-	    memcpy(s->inbuf_ptr, buf_ptr, len);
-	    buf_ptr += len;
-	    s->inbuf_ptr += len;
-	    buf_size -= len;
-	}
+            len = s->frame_size - len;
+            if (len > buf_size)
+                len = buf_size;
+            memcpy(s->inbuf_ptr, buf_ptr, len);
+            buf_ptr += len;
+            s->inbuf_ptr += len;
+            buf_size -= len;
+        }
         //    next_data:
-        if (s->frame_size > 0 && 
+        if (s->frame_size > 0 &&
             (s->inbuf_ptr - s->inbuf) >= s->frame_size) {
             if(s->header_count > 0){
                 *poutbuf = s->inbuf;
                 *poutbuf_size = s->inbuf_ptr - s->inbuf;
             }
-	    s->inbuf_ptr = s->inbuf;
-	    s->frame_size = 0;
-	    break;
-	}
+            s->inbuf_ptr = s->inbuf;
+            s->frame_size = 0;
+            break;
+        }
     }
     return buf_ptr - buf;
 }
 
-#ifdef CONFIG_AC3
-#ifdef CONFIG_A52BIN
-extern int ff_a52_syncinfo (AVCodecContext * avctx, const uint8_t * buf,
-                       int * flags, int * sample_rate, int * bit_rate);
-#else
-extern int a52_syncinfo (const uint8_t * buf, int * flags,
-                         int * sample_rate, int * bit_rate);
-#endif
-
 typedef struct AC3ParseContext {
     uint8_t inbuf[4096]; /* input buffer */
     uint8_t *inbuf_ptr;
     int frame_size;
-    int flags;
 } AC3ParseContext;
 
 #define AC3_HEADER_SIZE 7
-#define A52_LFE 16
+
+static const int ac3_sample_rates[4] = {
+    48000, 44100, 32000, 0
+};
+
+static const int ac3_frame_sizes[64][3] = {
+    { 64,   69,   96   },
+    { 64,   70,   96   },
+    { 80,   87,   120  },
+    { 80,   88,   120  },
+    { 96,   104,  144  },
+    { 96,   105,  144  },
+    { 112,  121,  168  },
+    { 112,  122,  168  },
+    { 128,  139,  192  },
+    { 128,  140,  192  },
+    { 160,  174,  240  },
+    { 160,  175,  240  },
+    { 192,  208,  288  },
+    { 192,  209,  288  },
+    { 224,  243,  336  },
+    { 224,  244,  336  },
+    { 256,  278,  384  },
+    { 256,  279,  384  },
+    { 320,  348,  480  },
+    { 320,  349,  480  },
+    { 384,  417,  576  },
+    { 384,  418,  576  },
+    { 448,  487,  672  },
+    { 448,  488,  672  },
+    { 512,  557,  768  },
+    { 512,  558,  768  },
+    { 640,  696,  960  },
+    { 640,  697,  960  },
+    { 768,  835,  1152 },
+    { 768,  836,  1152 },
+    { 896,  975,  1344 },
+    { 896,  976,  1344 },
+    { 1024, 1114, 1536 },
+    { 1024, 1115, 1536 },
+    { 1152, 1253, 1728 },
+    { 1152, 1254, 1728 },
+    { 1280, 1393, 1920 },
+    { 1280, 1394, 1920 },
+};
+
+static const int ac3_bitrates[64] = {
+    32, 32, 40, 40, 48, 48, 56, 56, 64, 64, 80, 80, 96, 96, 112, 112,
+    128, 128, 160, 160, 192, 192, 224, 224, 256, 256, 320, 320, 384,
+    384, 448, 448, 512, 512, 576, 576, 640, 640,
+};
+
+static const int ac3_channels[8] = {
+    2, 1, 2, 3, 3, 4, 4, 5
+};
+
+static int ac3_sync(const uint8_t *buf, int *channels, int *sample_rate,
+                    int *bit_rate)
+{
+    unsigned int fscod, frmsizecod, acmod, bsid, lfeon;
+    GetBitContext bits;
+
+    init_get_bits(&bits, buf, AC3_HEADER_SIZE * 8);
+
+    if(get_bits(&bits, 16) != 0x0b77)
+        return 0;
+
+    get_bits(&bits, 16);    /* crc */
+    fscod = get_bits(&bits, 2);
+    frmsizecod = get_bits(&bits, 6);
+
+    if(!ac3_sample_rates[fscod])
+        return 0;
+
+    bsid = get_bits(&bits, 5);
+    if(bsid > 8)
+        return 0;
+    get_bits(&bits, 3);     /* bsmod */
+    acmod = get_bits(&bits, 3);
+    if(acmod & 1 && acmod != 1)
+        get_bits(&bits, 2); /* cmixlev */
+    if(acmod & 4)
+        get_bits(&bits, 2); /* surmixlev */
+    if(acmod & 2)
+        get_bits(&bits, 2); /* dsurmod */
+    lfeon = get_bits(&bits, 1);
+
+    *sample_rate = ac3_sample_rates[fscod];
+    *bit_rate = ac3_bitrates[frmsizecod] * 1000;
+    *channels = ac3_channels[acmod] + lfeon;
+
+    return ac3_frame_sizes[frmsizecod][fscod] * 2;
+}
 
 static int ac3_parse_init(AVCodecParserContext *s1)
 {
@@ -776,15 +837,12 @@ static int ac3_parse_init(AVCodecParserContext *s1)
 
 static int ac3_parse(AVCodecParserContext *s1,
                      AVCodecContext *avctx,
-                     uint8_t **poutbuf, int *poutbuf_size, 
+                     uint8_t **poutbuf, int *poutbuf_size,
                      const uint8_t *buf, int buf_size)
 {
     AC3ParseContext *s = s1->priv_data;
     const uint8_t *buf_ptr;
-    int len, sample_rate, bit_rate;
-    static const int ac3_channels[8] = {
-	2, 1, 2, 3, 3, 4, 4, 5
-    };
+    int len, sample_rate, bit_rate, channels;
 
     *poutbuf = NULL;
     *poutbuf_size = 0;
@@ -794,57 +852,49 @@ static int ac3_parse(AVCodecParserContext *s1,
         len = s->inbuf_ptr - s->inbuf;
         if (s->frame_size == 0) {
             /* no header seen : find one. We need at least 7 bytes to parse it */
-            len = AC3_HEADER_SIZE - len;
-            if (len > buf_size)
-                len = buf_size;
+            len = FFMIN(AC3_HEADER_SIZE - len, buf_size);
+
             memcpy(s->inbuf_ptr, buf_ptr, len);
             buf_ptr += len;
             s->inbuf_ptr += len;
             buf_size -= len;
             if ((s->inbuf_ptr - s->inbuf) == AC3_HEADER_SIZE) {
-#ifdef CONFIG_A52BIN
-                len = ff_a52_syncinfo(avctx, s->inbuf, &s->flags, &sample_rate, &bit_rate);
-#else
-                len = a52_syncinfo(s->inbuf, &s->flags, &sample_rate, &bit_rate);
-#endif
+                len = ac3_sync(s->inbuf, &channels, &sample_rate, &bit_rate);
                 if (len == 0) {
                     /* no sync found : move by one byte (inefficient, but simple!) */
                     memmove(s->inbuf, s->inbuf + 1, AC3_HEADER_SIZE - 1);
                     s->inbuf_ptr--;
                 } else {
-		    s->frame_size = len;
+                    s->frame_size = len;
                     /* update codec info */
                     avctx->sample_rate = sample_rate;
                     /* set channels,except if the user explicitly requests 1 or 2 channels, XXX/FIXME this is a bit ugly */
                     if(avctx->channels!=1 && avctx->channels!=2){
-                        avctx->channels = ac3_channels[s->flags & 7];
-                        if (s->flags & A52_LFE)
-                            avctx->channels++;
+                        avctx->channels = channels;
                     }
-		    avctx->bit_rate = bit_rate;
+                    avctx->bit_rate = bit_rate;
                     avctx->frame_size = 6 * 256;
                 }
             }
-        } else if (len < s->frame_size) {
-            len = s->frame_size - len;
-            if (len > buf_size)
-                len = buf_size;
+        } else {
+            len = FFMIN(s->frame_size - len, buf_size);
 
             memcpy(s->inbuf_ptr, buf_ptr, len);
             buf_ptr += len;
             s->inbuf_ptr += len;
             buf_size -= len;
-        } else {
-            *poutbuf = s->inbuf;
-            *poutbuf_size = s->frame_size;
-            s->inbuf_ptr = s->inbuf;
-            s->frame_size = 0;
-            break;
+
+            if(s->inbuf_ptr - s->inbuf == s->frame_size){
+                *poutbuf = s->inbuf;
+                *poutbuf_size = s->frame_size;
+                s->inbuf_ptr = s->inbuf;
+                s->frame_size = 0;
+                break;
+            }
         }
     }
     return buf_ptr - buf;
 }
-#endif
 
 AVCodecParser mpegvideo_parser = {
     { CODEC_ID_MPEG1VIDEO, CODEC_ID_MPEG2VIDEO },
@@ -872,7 +922,6 @@ AVCodecParser mpegaudio_parser = {
     NULL,
 };
 
-#ifdef CONFIG_AC3
 AVCodecParser ac3_parser = {
     { CODEC_ID_AC3 },
     sizeof(AC3ParseContext),
@@ -880,4 +929,3 @@ AVCodecParser ac3_parser = {
     ac3_parse,
     NULL,
 };
-#endif
diff --git a/src/libffmpeg/libavcodec/pcm.c b/src/libffmpeg/libavcodec/pcm.c
index e3a81a590..0b4dd1c86 100644
--- a/src/libffmpeg/libavcodec/pcm.c
+++ b/src/libffmpeg/libavcodec/pcm.c
@@ -14,61 +14,61 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
- 
+
 /**
  * @file pcm.c
  * PCM codecs
  */
- 
+
 #include "avcodec.h"
 #include "bitstream.h" // for ff_reverse
 
 /* from g711.c by SUN microsystems (unrestricted use) */
 
-#define	SIGN_BIT	(0x80)		/* Sign bit for a A-law byte. */
-#define	QUANT_MASK	(0xf)		/* Quantization field mask. */
-#define	NSEGS		(8)		/* Number of A-law segments. */
-#define	SEG_SHIFT	(4)		/* Left shift for segment number. */
-#define	SEG_MASK	(0x70)		/* Segment field mask. */
+#define         SIGN_BIT        (0x80)      /* Sign bit for a A-law byte. */
+#define         QUANT_MASK      (0xf)       /* Quantization field mask. */
+#define         NSEGS           (8)         /* Number of A-law segments. */
+#define         SEG_SHIFT       (4)         /* Left shift for segment number. */
+#define         SEG_MASK        (0x70)      /* Segment field mask. */
 
-#define	BIAS		(0x84)		/* Bias for linear code. */
+#define         BIAS            (0x84)      /* Bias for linear code. */
 
 /*
  * alaw2linear() - Convert an A-law value to 16-bit linear PCM
  *
  */
-static int alaw2linear(unsigned char	a_val)
+static int alaw2linear(unsigned char a_val)
 {
-	int		t;
-	int		seg;
+        int t;
+        int seg;
 
-	a_val ^= 0x55;
+        a_val ^= 0x55;
 
-	t = a_val & QUANT_MASK;
-	seg = ((unsigned)a_val & SEG_MASK) >> SEG_SHIFT;
-	if(seg) t= (t + t + 1 + 32) << (seg + 2);
-	else    t= (t + t + 1     ) << 3;
+        t = a_val & QUANT_MASK;
+        seg = ((unsigned)a_val & SEG_MASK) >> SEG_SHIFT;
+        if(seg) t= (t + t + 1 + 32) << (seg + 2);
+        else    t= (t + t + 1     ) << 3;
 
-	return ((a_val & SIGN_BIT) ? t : -t);
+        return ((a_val & SIGN_BIT) ? t : -t);
 }
 
-static int ulaw2linear(unsigned char	u_val)
+static int ulaw2linear(unsigned char u_val)
 {
-	int		t;
+        int t;
 
-	/* Complement to obtain normal u-law value. */
-	u_val = ~u_val;
+        /* Complement to obtain normal u-law value. */
+        u_val = ~u_val;
 
-	/*
-	 * Extract and bias the quantization bits. Then
-	 * shift up by the segment number and subtract out the bias.
-	 */
-	t = ((u_val & QUANT_MASK) << 3) + BIAS;
-	t <<= ((unsigned)u_val & SEG_MASK) >> SEG_SHIFT;
+        /*
+         * Extract and bias the quantization bits. Then
+         * shift up by the segment number and subtract out the bias.
+         */
+        t = ((u_val & QUANT_MASK) << 3) + BIAS;
+        t <<= ((unsigned)u_val & SEG_MASK) >> SEG_SHIFT;
 
-	return ((u_val & SIGN_BIT) ? (BIAS - t) : (t - BIAS));
+        return ((u_val & SIGN_BIT) ? (BIAS - t) : (t - BIAS));
 }
 
 /* 16384 entries per table */
@@ -78,9 +78,9 @@ static int linear_to_alaw_ref = 0;
 static uint8_t *linear_to_ulaw = NULL;
 static int linear_to_ulaw_ref = 0;
 
-static void build_xlaw_table(uint8_t *linear_to_xlaw, 
+static void build_xlaw_table(uint8_t *linear_to_xlaw,
                              int (*xlaw2linear)(unsigned char),
-                             int mask) 
+                             int mask)
 {
     int i, j, v, v1, v2;
 
@@ -127,7 +127,7 @@ static int pcm_encode_init(AVCodecContext *avctx)
     default:
         break;
     }
-    
+
     switch(avctx->codec->id) {
     case CODEC_ID_PCM_S32LE:
     case CODEC_ID_PCM_S32BE:
@@ -160,7 +160,7 @@ static int pcm_encode_init(AVCodecContext *avctx)
 
     avctx->coded_frame= avcodec_alloc_frame();
     avctx->coded_frame->key_frame= 1;
-    
+
     return 0;
 }
 
@@ -209,7 +209,7 @@ static inline void encode_from16(int bps, int le, int us,
 }
 
 static int pcm_encode_frame(AVCodecContext *avctx,
-			    unsigned char *frame, int buf_size, void *data)
+                            unsigned char *frame, int buf_size, void *data)
 {
     int n, sample_size, v;
     short *samples;
@@ -397,8 +397,8 @@ static inline void decode_to16(int bps, int le, int us,
 }
 
 static int pcm_decode_frame(AVCodecContext *avctx,
-			    void *data, int *data_size,
-			    uint8_t *buf, int buf_size)
+                            void *data, int *data_size,
+                            uint8_t *buf, int buf_size)
 {
     PCMDecode *s = avctx->priv_data;
     int n;
@@ -509,9 +509,9 @@ AVCodec name ## _encoder = {                    \
     CODEC_TYPE_AUDIO,                           \
     id,                                         \
     0,                                          \
-    pcm_encode_init,				\
-    pcm_encode_frame,				\
-    pcm_encode_close,				\
+    pcm_encode_init,                            \
+    pcm_encode_frame,                           \
+    pcm_encode_close,                           \
     NULL,                                       \
 };                                              \
 AVCodec name ## _decoder = {                    \
@@ -519,7 +519,7 @@ AVCodec name ## _decoder = {                    \
     CODEC_TYPE_AUDIO,                           \
     id,                                         \
     sizeof(PCMDecode),                          \
-    pcm_decode_init,				\
+    pcm_decode_init,                            \
     NULL,                                       \
     NULL,                                       \
     pcm_decode_frame,                           \
diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c
index 57b687dfd..31464fb7a 100644
--- a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.c
@@ -15,9 +15,9 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
- 
+
 #include "../dsputil.h"
 
 #include "gcc_fixes.h"
@@ -44,7 +44,7 @@ static void sigill_handler (int sig)
         signal (sig, SIG_DFL);
         raise (sig);
     }
-    
+
     canjump = 0;
     siglongjmp (jmpbuf, 1);
 }
@@ -67,11 +67,11 @@ int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h
         /*
            Read unaligned pixels into our vectors. The vectors are as follows:
            pix1v: pix1[0]-pix1[15]
-           pix2v: pix2[0]-pix2[15]	pix2iv: pix2[1]-pix2[16]
+           pix2v: pix2[0]-pix2[15]      pix2iv: pix2[1]-pix2[16]
         */
         tv = (vector unsigned char *) pix1;
         pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
-        
+
         tv = (vector unsigned char *) &pix2[0];
         pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));
 
@@ -86,7 +86,7 @@ int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h
 
         /* Add each 4 pixel group together and put 4 results into sad */
         sad = vec_sum4s(t5, sad);
-        
+
         pix1 += line_size;
         pix2 += line_size;
     }
@@ -123,7 +123,7 @@ int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h
     */
     tv = (vector unsigned char *) &pix2[0];
     pix2v = vec_perm(tv[0], tv[1], vec_lvsl(0, &pix2[0]));
-    
+
     for(i=0;i<h;i++) {
         /*
            Read unaligned pixels into our vectors. The vectors are as follows:
@@ -144,18 +144,18 @@ int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h
 
         /* Add each 4 pixel group together and put 4 results into sad */
         sad = vec_sum4s(t5, sad);
-        
+
         pix1 += line_size;
         pix2v = pix3v;
         pix3 += line_size;
-        
+
     }
-    
+
     /* Sum up the four partial sums, and put the result into s */
     sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
     sumdiffs = vec_splat(sumdiffs, 3);
     vec_ste(sumdiffs, 0, &s);
-    return s;    
+    return s;
 }
 
 int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
@@ -175,7 +175,7 @@ int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int
     vector signed int sumdiffs;
 
     sad = (vector unsigned int)vec_splat_u32(0);
-    
+
     s = 0;
 
     /*
@@ -184,7 +184,7 @@ int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int
        fact to avoid a potentially expensive unaligned read, as well
        as some splitting, and vector addition each time around the loop.
        Read unaligned pixels into our vectors. The vectors are as follows:
-       pix2v: pix2[0]-pix2[15]	pix2iv: pix2[1]-pix2[16]
+       pix2v: pix2[0]-pix2[15]  pix2iv: pix2[1]-pix2[16]
        Split the pixel vectors into shorts
     */
     tv = (vector unsigned char *) &pix2[0];
@@ -199,12 +199,12 @@ int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int
     pix2ilv = (vector unsigned short) vec_mergel(zero, pix2iv);
     t1 = vec_add(pix2hv, pix2ihv);
     t2 = vec_add(pix2lv, pix2ilv);
-    
+
     for(i=0;i<h;i++) {
         /*
            Read unaligned pixels into our vectors. The vectors are as follows:
            pix1v: pix1[0]-pix1[15]
-           pix3v: pix3[0]-pix3[15]	pix3iv: pix3[1]-pix3[16]
+           pix3v: pix3[0]-pix3[15]      pix3iv: pix3[1]-pix3[16]
         */
         tv = (vector unsigned char *) pix1;
         pix1v = vec_perm(tv[0], tv[1], vec_lvsl(0, pix1));
@@ -268,25 +268,25 @@ int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
     vector unsigned char t1, t2, t3,t4, t5;
     vector unsigned int sad;
     vector signed int sumdiffs;
-    
+
     sad = (vector unsigned int)vec_splat_u32(0);
 
 
     for(i=0;i<h;i++) {
-	/* Read potentially unaligned pixels into t1 and t2 */
+        /* Read potentially unaligned pixels into t1 and t2 */
         perm1 = vec_lvsl(0, pix1);
         pix1v = (vector unsigned char *) pix1;
         perm2 = vec_lvsl(0, pix2);
         pix2v = (vector unsigned char *) pix2;
         t1 = vec_perm(pix1v[0], pix1v[1], perm1);
         t2 = vec_perm(pix2v[0], pix2v[1], perm2);
-       
-	/* Calculate a sum of abs differences vector */ 
+
+        /* Calculate a sum of abs differences vector */
         t3 = vec_max(t1, t2);
         t4 = vec_min(t1, t2);
         t5 = vec_sub(t3, t4);
-	
-	/* Add each 4 pixel group together and put 4 results into sad */
+
+        /* Add each 4 pixel group together and put 4 results into sad */
         sad = vec_sum4s(t5, sad);
 
         pix1 += line_size;
@@ -297,7 +297,7 @@ int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
     sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
     sumdiffs = vec_splat(sumdiffs, 3);
     vec_ste(sumdiffs, 0, &s);
-    
+
     return s;
 }
 
@@ -316,9 +316,9 @@ int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
     permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
 
     for(i=0;i<h;i++) {
-	/* Read potentially unaligned pixels into t1 and t2
-	   Since we're reading 16 pixels, and actually only want 8,
-	   mask out the last 8 pixels. The 0s don't change the sum. */
+        /* Read potentially unaligned pixels into t1 and t2
+           Since we're reading 16 pixels, and actually only want 8,
+           mask out the last 8 pixels. The 0s don't change the sum. */
         perm1 = vec_lvsl(0, pix1);
         pix1v = (vector unsigned char *) pix1;
         perm2 = vec_lvsl(0, pix2);
@@ -326,12 +326,12 @@ int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
         t1 = vec_and(vec_perm(pix1v[0], pix1v[1], perm1), permclear);
         t2 = vec_and(vec_perm(pix2v[0], pix2v[1], perm2), permclear);
 
-	/* Calculate a sum of abs differences vector */ 
+        /* Calculate a sum of abs differences vector */
         t3 = vec_max(t1, t2);
         t4 = vec_min(t1, t2);
         t5 = vec_sub(t3, t4);
 
-	/* Add each 4 pixel group together and put 4 results into sad */
+        /* Add each 4 pixel group together and put 4 results into sad */
         sad = vec_sum4s(t5, sad);
 
         pix1 += line_size;
@@ -355,9 +355,9 @@ int pix_norm1_altivec(uint8_t *pix, int line_size)
     vector unsigned char pixv;
     vector unsigned int sv;
     vector signed int sum;
-    
+
     sv = (vector unsigned int)vec_splat_u32(0);
-    
+
     s = 0;
     for (i = 0; i < 16; i++) {
         /* Read in the potentially unaligned pixels */
@@ -391,16 +391,16 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
     vector unsigned char t1, t2, t3,t4, t5;
     vector unsigned int sum;
     vector signed int sumsqr;
-    
+
     sum = (vector unsigned int)vec_splat_u32(0);
 
     permclear = (vector unsigned char)AVV(255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0);
 
-    
+
     for(i=0;i<h;i++) {
-	/* Read potentially unaligned pixels into t1 and t2
-	   Since we're reading 16 pixels, and actually only want 8,
-	   mask out the last 8 pixels. The 0s don't change the sum. */
+        /* Read potentially unaligned pixels into t1 and t2
+           Since we're reading 16 pixels, and actually only want 8,
+           mask out the last 8 pixels. The 0s don't change the sum. */
         perm1 = vec_lvsl(0, pix1);
         pix1v = (vector unsigned char *) pix1;
         perm2 = vec_lvsl(0, pix2);
@@ -412,24 +412,24 @@ int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
           Since we want to use unsigned chars, we can take advantage
           of the fact that abs(a-b)^2 = (a-b)^2.
         */
-        
-	/* Calculate abs differences vector */ 
+
+        /* Calculate abs differences vector */
         t3 = vec_max(t1, t2);
         t4 = vec_min(t1, t2);
         t5 = vec_sub(t3, t4);
-        
+
         /* Square the values and add them to our sum */
         sum = vec_msum(t5, t5, sum);
-        
+
         pix1 += line_size;
         pix2 += line_size;
     }
-    
+
     /* Sum up the four partial sums, and put the result into s */
     sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero);
     sumsqr = vec_splat(sumsqr, 3);
     vec_ste(sumsqr, 0, &s);
-    
+
     return s;
 }
 
@@ -447,11 +447,11 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
     vector unsigned char t1, t2, t3,t4, t5;
     vector unsigned int sum;
     vector signed int sumsqr;
-    
+
     sum = (vector unsigned int)vec_splat_u32(0);
-    
+
     for(i=0;i<h;i++) {
-	/* Read potentially unaligned pixels into t1 and t2 */
+        /* Read potentially unaligned pixels into t1 and t2 */
         perm1 = vec_lvsl(0, pix1);
         pix1v = (vector unsigned char *) pix1;
         perm2 = vec_lvsl(0, pix2);
@@ -463,24 +463,24 @@ int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
           Since we want to use unsigned chars, we can take advantage
           of the fact that abs(a-b)^2 = (a-b)^2.
         */
-        
-	/* Calculate abs differences vector */ 
+
+        /* Calculate abs differences vector */
         t3 = vec_max(t1, t2);
         t4 = vec_min(t1, t2);
         t5 = vec_sub(t3, t4);
-        
+
         /* Square the values and add them to our sum */
         sum = vec_msum(t5, t5, sum);
-        
+
         pix1 += line_size;
         pix2 += line_size;
     }
-    
+
     /* Sum up the four partial sums, and put the result into s */
     sumsqr = vec_sums((vector signed int) sum, (vector signed int) zero);
     sumsqr = vec_splat(sumsqr, 3);
     vec_ste(sumsqr, 0, &s);
-    
+
     return s;
 }
 
@@ -494,26 +494,26 @@ int pix_sum_altivec(uint8_t * pix, int line_size)
 
     int i;
     int s __attribute__((aligned(16)));
-    
+
     sad = (vector unsigned int)vec_splat_u32(0);
-    
+
     for (i = 0; i < 16; i++) {
-	/* Read the potentially unaligned 16 pixels into t1 */
+        /* Read the potentially unaligned 16 pixels into t1 */
         perm = vec_lvsl(0, pix);
         pixv = (vector unsigned char *) pix;
         t1 = vec_perm(pixv[0], pixv[1], perm);
 
-	/* Add each 4 pixel group together and put 4 results into sad */
+        /* Add each 4 pixel group together and put 4 results into sad */
         sad = vec_sum4s(t1, sad);
-        
+
         pix += line_size;
     }
-    
+
     /* Sum up the four partial sums, and put the result into s */
     sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
     sumdiffs = vec_splat(sumdiffs, 3);
     vec_ste(sumdiffs, 0, &s);
-    
+
     return s;
 }
 
@@ -633,7 +633,7 @@ void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) {
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
     register int i;
     register vector unsigned char vdst, vsrc;
-    
+
     /* dst and src are 16 bytes-aligned (guaranteed) */
     for(i = 0 ; (i + 15) < w ; i++)
     {
@@ -799,19 +799,19 @@ POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1);
     int i;
 
 POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1);
- 
+
    for (i = 0; i < h; i++) {
      /*
        block is 8 bytes-aligned, so we're either in the
        left block (16 bytes-aligned) or in the right block (not)
      */
      int rightside = ((unsigned long)block & 0x0000000F);
-     
+
      blockv = vec_ld(0, block);
      pixelsv1 = vec_ld(0, (unsigned char*)pixels);
      pixelsv2 = vec_ld(16, (unsigned char*)pixels);
      pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels));
-     
+
      if (rightside)
      {
        pixelsv = vec_perm(blockv, pixelsv, vcprm(0,1,s0,s1));
@@ -820,17 +820,17 @@ POWERPC_PERF_START_COUNT(altivec_avg_pixels8_num, 1);
      {
        pixelsv = vec_perm(blockv, pixelsv, vcprm(s0,s1,2,3));
      }
-     
+
      blockv = vec_avg(blockv, pixelsv);
 
      vec_st(blockv, 0, block);
-     
+
      pixels += line_size;
      block += line_size;
    }
-   
+
 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_num, 1);
- 
+
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
 
@@ -886,7 +886,7 @@ POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
      pixelssum1, pixelssum2, temp3;
    register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
    register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
-   
+
    temp1 = vec_ld(0, pixels);
    temp2 = vec_ld(16, pixels);
    pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
@@ -903,8 +903,8 @@ POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
    pixelssum1 = vec_add((vector unsigned short)pixelsv1,
                         (vector unsigned short)pixelsv2);
    pixelssum1 = vec_add(pixelssum1, vctwo);
-   
-POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1); 
+
+POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1);
    for (i = 0; i < h ; i++) {
      int rightside = ((unsigned long)block & 0x0000000F);
      blockv = vec_ld(0, block);
@@ -929,7 +929,7 @@ POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1);
      temp3 = vec_sra(temp3, vctwo);
      pixelssum1 = vec_add(pixelssum2, vctwo);
      pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
-     
+
      if (rightside)
      {
        blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
@@ -938,13 +938,13 @@ POWERPC_PERF_START_COUNT(altivec_put_pixels8_xy2_num, 1);
      {
        blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
      }
-     
+
      vec_st(blockv, 0, block);
-     
+
      block += line_size;
      pixels += line_size;
    }
-   
+
 POWERPC_PERF_STOP_COUNT(altivec_put_pixels8_xy2_num, 1);
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
@@ -987,7 +987,7 @@ POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
       } pixels += 4 - line_size * (h + 1);
       block += 4 - line_size * h;
     }
-    
+
 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
 
 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
@@ -1002,7 +1002,7 @@ POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
    register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
    register const_vector unsigned short vcone = (const_vector unsigned short)vec_splat_u16(1);
    register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
-   
+
    temp1 = vec_ld(0, pixels);
    temp2 = vec_ld(16, pixels);
    pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
@@ -1019,8 +1019,8 @@ POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
    pixelssum1 = vec_add((vector unsigned short)pixelsv1,
                         (vector unsigned short)pixelsv2);
    pixelssum1 = vec_add(pixelssum1, vcone);
-   
-POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1); 
+
+POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
    for (i = 0; i < h ; i++) {
      int rightside = ((unsigned long)block & 0x0000000F);
      blockv = vec_ld(0, block);
@@ -1045,7 +1045,7 @@ POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
      temp3 = vec_sra(temp3, vctwo);
      pixelssum1 = vec_add(pixelssum2, vcone);
      pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
-     
+
      if (rightside)
      {
        blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
@@ -1054,13 +1054,13 @@ POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
      {
        blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
      }
-     
+
      vec_st(blockv, 0, block);
-     
+
      block += line_size;
      pixels += line_size;
    }
-   
+
 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels8_xy2_num, 1);
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
@@ -1119,7 +1119,7 @@ POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
    register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
 
 POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
- 
+
    temp1 = vec_ld(0, pixels);
    temp2 = vec_ld(16, pixels);
    pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
@@ -1141,7 +1141,7 @@ POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
    pixelssum1 = vec_add((vector unsigned short)pixelsv1,
                         (vector unsigned short)pixelsv2);
    pixelssum1 = vec_add(pixelssum1, vctwo);
-   
+
    for (i = 0; i < h ; i++) {
      blockv = vec_ld(0, block);
 
@@ -1161,7 +1161,7 @@ POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
      pixelsv4 = vec_mergel(vczero, pixelsv2);
      pixelsv1 = vec_mergeh(vczero, pixelsv1);
      pixelsv2 = vec_mergeh(vczero, pixelsv2);
-     
+
      pixelssum4 = vec_add((vector unsigned short)pixelsv3,
                           (vector unsigned short)pixelsv4);
      pixelssum2 = vec_add((vector unsigned short)pixelsv1,
@@ -1175,13 +1175,13 @@ POWERPC_PERF_START_COUNT(altivec_put_pixels16_xy2_num, 1);
      pixelssum1 = vec_add(pixelssum2, vctwo);
 
      blockv = vec_packsu(temp3, temp4);
-     
+
      vec_st(blockv, 0, block);
-     
+
      block += line_size;
      pixels += line_size;
    }
-   
+
 POWERPC_PERF_STOP_COUNT(altivec_put_pixels16_xy2_num, 1);
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
@@ -1241,7 +1241,7 @@ POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
    register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
 
 POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
- 
+
    temp1 = vec_ld(0, pixels);
    temp2 = vec_ld(16, pixels);
    pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
@@ -1263,7 +1263,7 @@ POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
    pixelssum1 = vec_add((vector unsigned short)pixelsv1,
                         (vector unsigned short)pixelsv2);
    pixelssum1 = vec_add(pixelssum1, vcone);
-   
+
    for (i = 0; i < h ; i++) {
      blockv = vec_ld(0, block);
 
@@ -1283,7 +1283,7 @@ POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
      pixelsv4 = vec_mergel(vczero, pixelsv2);
      pixelsv1 = vec_mergeh(vczero, pixelsv1);
      pixelsv2 = vec_mergeh(vczero, pixelsv2);
-     
+
      pixelssum4 = vec_add((vector unsigned short)pixelsv3,
                           (vector unsigned short)pixelsv4);
      pixelssum2 = vec_add((vector unsigned short)pixelsv1,
@@ -1297,13 +1297,13 @@ POWERPC_PERF_START_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
      pixelssum1 = vec_add(pixelssum2, vcone);
 
      blockv = vec_packsu(temp3, temp4);
-     
+
      vec_st(blockv, 0, block);
-     
+
      block += line_size;
      pixels += line_size;
    }
-   
+
 POWERPC_PERF_STOP_COUNT(altivec_put_no_rnd_pixels16_xy2_num, 1);
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
@@ -1335,32 +1335,32 @@ POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);
        0x00, 0x01, 0x02, 0x03,
        0x04, 0x05, 0x06, 0x07);
 
-#define ONEITERBUTTERFLY(i, res)					\
-    {									\
-      register vector unsigned char src1, src2, srcO;		       	\
-      register vector unsigned char dst1, dst2, dstO;		       	\
-      src1 = vec_ld(stride * i, src);					\
-      if ((((stride * i) + (unsigned long)src) & 0x0000000F) > 8)	\
-	src2 = vec_ld((stride * i) + 16, src);				\
-      srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src));		\
-      dst1 = vec_ld(stride * i, dst);					\
-      if ((((stride * i) + (unsigned long)dst) & 0x0000000F) > 8)	\
-	dst2 = vec_ld((stride * i) + 16, dst);				\
-      dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst));		\
-      /* promote the unsigned chars to signed shorts */			\
-      /* we're in the 8x8 function, we only care for the first 8 */	\
-      register vector signed short srcV =			       	\
-	(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \
-      register vector signed short dstV =			       	\
-	(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \
-      /* substractions inside the first butterfly */			\
-      register vector signed short but0 = vec_sub(srcV, dstV);	       	\
-      register vector signed short op1 = vec_perm(but0, but0, perm1);  	\
-      register vector signed short but1 = vec_mladd(but0, vprod1, op1);	\
-      register vector signed short op2 = vec_perm(but1, but1, perm2);  	\
-      register vector signed short but2 = vec_mladd(but1, vprod2, op2);	\
-      register vector signed short op3 = vec_perm(but2, but2, perm3);  	\
-      res = vec_mladd(but2, vprod3, op3);				\
+#define ONEITERBUTTERFLY(i, res)                                        \
+    {                                                                   \
+      register vector unsigned char src1, src2, srcO;                   \
+      register vector unsigned char dst1, dst2, dstO;                   \
+      src1 = vec_ld(stride * i, src);                                   \
+      if ((((stride * i) + (unsigned long)src) & 0x0000000F) > 8)       \
+        src2 = vec_ld((stride * i) + 16, src);                          \
+      srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src));           \
+      dst1 = vec_ld(stride * i, dst);                                   \
+      if ((((stride * i) + (unsigned long)dst) & 0x0000000F) > 8)       \
+        dst2 = vec_ld((stride * i) + 16, dst);                          \
+      dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst));           \
+      /* promote the unsigned chars to signed shorts */                 \
+      /* we're in the 8x8 function, we only care for the first 8 */     \
+      register vector signed short srcV =                               \
+        (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \
+      register vector signed short dstV =                               \
+        (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \
+      /* substractions inside the first butterfly */                    \
+      register vector signed short but0 = vec_sub(srcV, dstV);          \
+      register vector signed short op1 = vec_perm(but0, but0, perm1);   \
+      register vector signed short but1 = vec_mladd(but0, vprod1, op1); \
+      register vector signed short op2 = vec_perm(but1, but1, perm2);   \
+      register vector signed short but2 = vec_mladd(but1, vprod2, op2); \
+      register vector signed short op3 = vec_perm(but2, but2, perm3);   \
+      res = vec_mladd(but2, vprod3, op3);                               \
     }
     ONEITERBUTTERFLY(0, temp0);
     ONEITERBUTTERFLY(1, temp1);
@@ -1382,7 +1382,7 @@ POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);
     register vector signed short line5 = vec_sub(temp4, temp5);
     register vector signed short line6 = vec_add(temp6, temp7);
     register vector signed short line7 = vec_sub(temp6, temp7);
-    
+
     register vector signed short line0B = vec_add(line0, line2);
     register vector signed short line2B = vec_sub(line0, line2);
     register vector signed short line1B = vec_add(line1, line3);
@@ -1391,7 +1391,7 @@ POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);
     register vector signed short line6B = vec_sub(line4, line6);
     register vector signed short line5B = vec_add(line5, line7);
     register vector signed short line7B = vec_sub(line5, line7);
-    
+
     register vector signed short line0C = vec_add(line0B, line4B);
     register vector signed short line4C = vec_sub(line0B, line4B);
     register vector signed short line1C = vec_add(line1B, line5B);
@@ -1400,7 +1400,7 @@ POWERPC_PERF_START_COUNT(altivec_hadamard8_diff8x8_num, 1);
     register vector signed short line6C = vec_sub(line2B, line6B);
     register vector signed short line3C = vec_add(line3B, line7B);
     register vector signed short line7C = vec_sub(line3B, line7B);
-    
+
     vsum = vec_sum4s(vec_abs(line0C), vec_splat_s32(0));
     vsum = vec_sum4s(vec_abs(line1C), vsum);
     vsum = vec_sum4s(vec_abs(line2C), vsum);
@@ -1421,7 +1421,7 @@ POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff8x8_num, 1);
   16x8 works with 16 elements ; it allows to avoid replicating
   loads, and give the compiler more rooms for scheduling.
   It's only used from inside hadamard8_diff16_altivec.
-  
+
   Unfortunately, it seems gcc-3.3 is a bit dumb, and
   the compiled code has a LOT of spill code, it seems
   gcc (unlike xlc) cannot keep everything in registers
@@ -1429,11 +1429,11 @@ POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff8x8_num, 1);
   registers allocation. It's not clean, but on
   a 7450 the resulting code is much faster (best case
   fall from 700+ cycles to 550).
-  
+
   xlc doesn't add spill code, but it doesn't know how to
   schedule for the 7450, and its code isn't much faster than
   gcc-3.3 on the 7450 (but uses 25% less instructions...)
-  
+
   On the 970, the hand-made RA is still a win (arount 690
   vs. around 780), but xlc goes to around 660 on the
   regular C code...
@@ -1480,26 +1480,26 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
        0x00, 0x01, 0x02, 0x03,
        0x04, 0x05, 0x06, 0x07);
 
-#define ONEITERBUTTERFLY(i, res1, res2)					\
-    {									\
+#define ONEITERBUTTERFLY(i, res1, res2)                                 \
+    {                                                                   \
       register vector unsigned char src1 asm ("v22"), src2 asm ("v23"); \
       register vector unsigned char dst1 asm ("v24"), dst2 asm ("v25"); \
-      src1 = vec_ld(stride * i, src);					\
-      src2 = vec_ld((stride * i) + 16, src);				\
+      src1 = vec_ld(stride * i, src);                                   \
+      src2 = vec_ld((stride * i) + 16, src);                            \
       register vector unsigned char srcO asm ("v22") = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
-      dst1 = vec_ld(stride * i, dst);					\
-      dst2 = vec_ld((stride * i) + 16, dst);				\
+      dst1 = vec_ld(stride * i, dst);                                   \
+      dst2 = vec_ld((stride * i) + 16, dst);                            \
       register vector unsigned char dstO asm ("v23") = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
-      /* promote the unsigned chars to signed shorts */			\
+      /* promote the unsigned chars to signed shorts */                 \
       register vector signed short srcV asm ("v24") =                   \
-	(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \
+        (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)srcO); \
       register vector signed short dstV asm ("v25") =                   \
-	(vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \
+        (vector signed short)vec_mergeh((vector signed char)vzero, (vector signed char)dstO); \
       register vector signed short srcW asm ("v26") =                   \
-	(vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \
+        (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)srcO); \
       register vector signed short dstW asm ("v27") =                   \
-	(vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \
-      /* substractions inside the first butterfly */			\
+        (vector signed short)vec_mergel((vector signed char)vzero, (vector signed char)dstO); \
+      /* substractions inside the first butterfly */                    \
       register vector signed short but0 asm ("v28") = vec_sub(srcV, dstV); \
       register vector signed short but0S asm ("v29") = vec_sub(srcW, dstW); \
       register vector signed short op1 asm ("v30") = vec_perm(but0, but0, perm1); \
@@ -1511,9 +1511,9 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
       register vector signed short op2S asm ("v27") = vec_perm(but1S, but1S, perm2); \
       register vector signed short but2S asm ("v28") = vec_mladd(but1S, vprod2, op2S); \
       register vector signed short op3 asm ("v29") = vec_perm(but2, but2, perm3); \
-      res1 = vec_mladd(but2, vprod3, op3);				\
+      res1 = vec_mladd(but2, vprod3, op3);                              \
       register vector signed short op3S asm ("v30") = vec_perm(but2S, but2S, perm3); \
-      res2 = vec_mladd(but2S, vprod3, op3S);				\
+      res2 = vec_mladd(but2S, vprod3, op3S);                            \
     }
     ONEITERBUTTERFLY(0, temp0, temp0S);
     ONEITERBUTTERFLY(1, temp1, temp1S);
@@ -1535,7 +1535,7 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
     register vector signed short line5 = vec_sub(temp4, temp5);
     register vector signed short line6 = vec_add(temp6, temp7);
     register vector signed short line7 = vec_sub(temp6, temp7);
-      
+
     register vector signed short line0B = vec_add(line0, line2);
     register vector signed short line2B = vec_sub(line0, line2);
     register vector signed short line1B = vec_add(line1, line3);
@@ -1544,7 +1544,7 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
     register vector signed short line6B = vec_sub(line4, line6);
     register vector signed short line5B = vec_add(line5, line7);
     register vector signed short line7B = vec_sub(line5, line7);
-      
+
     register vector signed short line0C = vec_add(line0B, line4B);
     register vector signed short line4C = vec_sub(line0B, line4B);
     register vector signed short line1C = vec_add(line1B, line5B);
@@ -1553,7 +1553,7 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
     register vector signed short line6C = vec_sub(line2B, line6B);
     register vector signed short line3C = vec_add(line3B, line7B);
     register vector signed short line7C = vec_sub(line3B, line7B);
-      
+
     vsum = vec_sum4s(vec_abs(line0C), vec_splat_s32(0));
     vsum = vec_sum4s(vec_abs(line1C), vsum);
     vsum = vec_sum4s(vec_abs(line2C), vsum);
@@ -1623,12 +1623,12 @@ POWERPC_PERF_STOP_COUNT(altivec_hadamard8_diff16_num, 1);
 int has_altivec(void)
 {
 #ifdef __AMIGAOS4__
-	ULONG result = 0;
-	extern struct ExecIFace *IExec;
+        ULONG result = 0;
+        extern struct ExecIFace *IExec;
 
-	IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE);
-	if (result == VECTORTYPE_ALTIVEC) return 1;
-	return 0;
+        IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE);
+        if (result == VECTORTYPE_ALTIVEC) return 1;
+        return 0;
 #else /* __AMIGAOS4__ */
 
 #ifdef CONFIG_DARWIN
@@ -1649,12 +1649,12 @@ int has_altivec(void)
         signal (SIGILL, SIG_DFL);
       } else {
         canjump = 1;
-        
+
         asm volatile ("mtspr 256, %0\n\t"
                       "vand %%v0, %%v0, %%v0"
                       :
                       : "r" (-1));
-        
+
         signal (SIGILL, SIG_DFL);
         return 1;
       }
@@ -1710,7 +1710,7 @@ POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
      pixelssum1, pixelssum2, temp3;
    register const_vector unsigned char vczero = (const_vector unsigned char)vec_splat_u8(0);
    register const_vector unsigned short vctwo = (const_vector unsigned short)vec_splat_u16(2);
-   
+
    temp1 = vec_ld(0, pixels);
    temp2 = vec_ld(16, pixels);
    pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
@@ -1727,8 +1727,8 @@ POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
    pixelssum1 = vec_add((vector unsigned short)pixelsv1,
                         (vector unsigned short)pixelsv2);
    pixelssum1 = vec_add(pixelssum1, vctwo);
-   
-POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1); 
+
+POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1);
    for (i = 0; i < h ; i++) {
      int rightside = ((unsigned long)block & 0x0000000F);
      blockv = vec_ld(0, block);
@@ -1753,7 +1753,7 @@ POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1);
      temp3 = vec_sra(temp3, vctwo);
      pixelssum1 = vec_add(pixelssum2, vctwo);
      pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
-     
+
      if (rightside)
      {
        blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
@@ -1762,14 +1762,14 @@ POWERPC_PERF_START_COUNT(altivec_avg_pixels8_xy2_num, 1);
      {
        blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
      }
-     
+
      blockv = vec_avg(blocktemp, blockv);
      vec_st(blockv, 0, block);
-     
+
      block += line_size;
      pixels += line_size;
    }
-   
+
 POWERPC_PERF_STOP_COUNT(altivec_avg_pixels8_xy2_num, 1);
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
 }
diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h
index 88f06c372..ac54817d0 100644
--- a/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h
+++ b/src/libffmpeg/libavcodec/ppc/dsputil_altivec.h
@@ -15,7 +15,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #ifndef _DSPUTIL_ALTIVEC_
diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_h264_altivec.c b/src/libffmpeg/libavcodec/ppc/dsputil_h264_altivec.c
index 1891e194a..b9fef005e 100755
--- a/src/libffmpeg/libavcodec/ppc/dsputil_h264_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/dsputil_h264_altivec.c
@@ -13,9 +13,9 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
- 
+
 #include "../dsputil.h"
 
 #include "gcc_fixes.h"
@@ -71,7 +71,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uin
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \
-    uint64_t temp[SIZE*SIZE/8] __align16;\
+    DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/8]);\
     uint8_t * const half= (uint8_t*)temp;\
     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
@@ -82,14 +82,14 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
-    uint64_t temp[SIZE*SIZE/8] __align16;\
+    DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/8]);\
     uint8_t * const half= (uint8_t*)temp;\
     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
-    uint64_t temp[SIZE*SIZE/8] __align16;\
+    DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/8]);\
     uint8_t * const half= (uint8_t*)temp;\
     put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
@@ -100,14 +100,14 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
-    uint64_t temp[SIZE*SIZE/8] __align16;\
+    DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/8]);\
     uint8_t * const half= (uint8_t*)temp;\
     put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
-    uint64_t temp[SIZE*SIZE/4] __align16;\
+    DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/4]);\
     uint8_t * const halfH= (uint8_t*)temp;\
     uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\
     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
@@ -116,7 +116,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
-    uint64_t temp[SIZE*SIZE/4] __align16;\
+    DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/4]);\
     uint8_t * const halfH= (uint8_t*)temp;\
     uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\
     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
@@ -125,7 +125,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
-    uint64_t temp[SIZE*SIZE/4] __align16;\
+    DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/4]);\
     uint8_t * const halfH= (uint8_t*)temp;\
     uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\
     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
@@ -134,7 +134,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
-    uint64_t temp[SIZE*SIZE/4] __align16;\
+    DECLARE_ALIGNED_16(uint64_t, temp[SIZE*SIZE/4]);\
     uint8_t * const halfH= (uint8_t*)temp;\
     uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\
     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
@@ -143,13 +143,13 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
-    uint64_t temp[SIZE*(SIZE+8)/4] __align16;\
+    DECLARE_ALIGNED_16(uint64_t, temp[SIZE*(SIZE+8)/4]);\
     int16_t * const tmp= (int16_t*)temp;\
     OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
-    uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4] __align16;\
+    DECLARE_ALIGNED_16(uint64_t, temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4]);\
     uint8_t * const halfH= (uint8_t*)temp;\
     uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\
     int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\
@@ -159,7 +159,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
-    uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4] __align16;\
+    DECLARE_ALIGNED_16(uint64_t, temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4]);\
     uint8_t * const halfH= (uint8_t*)temp;\
     uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\
     int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\
@@ -169,7 +169,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
-    uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4] __align16;\
+    DECLARE_ALIGNED_16(uint64_t, temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4]);\
     uint8_t * const halfV= (uint8_t*)temp;\
     uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\
     int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\
@@ -179,7 +179,7 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint
 }\
 \
 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
-    uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4] __align16;\
+    DECLARE_ALIGNED_16(uint64_t, temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4]);\
     uint8_t * const halfV= (uint8_t*)temp;\
     uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\
     int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\
@@ -191,33 +191,33 @@ static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint
 
 /* from dsputil.c */
 static inline void put_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
-	int             i;
-	for (i = 0; i < h; i++) {
-		uint32_t        a, b;
-		a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
-		b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
-		*((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(a, b);
-		a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
-		b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
-		*((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(a, b);
-	}
+        int             i;
+        for (i = 0; i < h; i++) {
+                uint32_t        a, b;
+                a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
+                b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
+                *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(a, b);
+                a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
+                b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
+                *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(a, b);
+        }
 } static inline void avg_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
-	int             i;
-	for (i = 0; i < h; i++) {
-		uint32_t        a, b;
-		a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
-		b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
-		*((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride]), rnd_avg32(a, b));
-		a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
-		b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
-		*((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride + 4]), rnd_avg32(a, b));
-	}
+        int             i;
+        for (i = 0; i < h; i++) {
+                uint32_t        a, b;
+                a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
+                b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
+                *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride]), rnd_avg32(a, b));
+                a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
+                b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
+                *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride + 4]), rnd_avg32(a, b));
+        }
 } static inline void put_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
-	put_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
-	put_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
+        put_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
+        put_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
 } static inline void avg_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
-	avg_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
-	avg_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
+        avg_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
+        avg_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
 }
 
 /* UNIMPLEMENTED YET !! */
@@ -228,7 +228,7 @@ H264_MC(put_, 16, altivec)
      H264_MC(avg_, 16, altivec)
 
 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
-    
+
 #ifdef HAVE_ALTIVEC
   if (has_altivec()) {
     c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
@@ -251,16 +251,16 @@ void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
     c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
     c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
     c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
-    
+
     dspfunc(put_h264_qpel, 0, 16);
     dspfunc(avg_h264_qpel, 0, 16);
 #undef dspfunc
-    
+
   } else
 #endif /* HAVE_ALTIVEC */
   {
     // Non-AltiVec PPC optimisations
-    
+
     // ... pending ...
   }
 }
diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_h264_template_altivec.c b/src/libffmpeg/libavcodec/ppc/dsputil_h264_template_altivec.c
index cb0fa954d..7f46ccf14 100755
--- a/src/libffmpeg/libavcodec/ppc/dsputil_h264_template_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/dsputil_h264_template_altivec.c
@@ -13,7 +13,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /* this code assume that stride % 16 == 0 */
@@ -47,7 +47,7 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in
 
     register int loadSecond = (((unsigned long)src) % 16) <= 7 ? 0 : 1;
     register int reallyBadAlign = (((unsigned long)src) % 16) == 15 ? 1 : 0;
-    
+
     vector unsigned char vsrcAuc;
     vector unsigned char vsrcBuc;
     vector unsigned char vsrcperm0;
@@ -57,7 +57,7 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in
       vsrcBuc = vec_ld(16, src);
     vsrcperm0 = vec_lvsl(0, src);
     vsrcperm1 = vec_lvsl(1, src);
-    
+
     vector unsigned char vsrc0uc;
     vector unsigned char vsrc1uc;
     vsrc0uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm0);
@@ -65,7 +65,7 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in
       vsrc1uc = vsrcBuc;
     else
       vsrc1uc = vec_perm(vsrcAuc, vsrcBuc, vsrcperm1);
-    
+
     vector signed short vsrc0ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc0uc);
     vector signed short vsrc1ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc1uc);
 
@@ -73,37 +73,37 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in
       for (i = 0 ; i < h ; i++) {
         vector unsigned char vsrcCuc;
         vsrcCuc = vec_ld(stride + 0, src);
-        
+
         vector unsigned char vsrc2uc;
         vector unsigned char vsrc3uc;
         vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
         vsrc3uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm1);
-        
+
         vector signed short vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc2uc);
         vector signed short vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc3uc);
-        
+
         vector signed short psum;
-        
+
         psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
         psum = vec_mladd(vB, vsrc1ssH, psum);
         psum = vec_mladd(vC, vsrc2ssH, psum);
         psum = vec_mladd(vD, vsrc3ssH, psum);
         psum = vec_add(v32ss, psum);
         psum = vec_sra(psum, v6us);
-        
+
         vector unsigned char vdst = vec_ld(0, dst);
         vector unsigned char ppsum = (vector unsigned char)vec_packsu(psum, psum);
-        
+
         vector unsigned char vfdst = vec_perm(vdst, ppsum, fperm);
         vector unsigned char fsum;
-        
+
         OP_U8_ALTIVEC(fsum, vfdst, vdst);
 
         vec_st(fsum, 0, dst);
-        
+
         vsrc0ssH = vsrc2ssH;
         vsrc1ssH = vsrc3ssH;
-        
+
         dst += stride;
         src += stride;
       }
@@ -113,7 +113,7 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in
         vector unsigned char vsrcDuc;
         vsrcCuc = vec_ld(stride + 0, src);
         vsrcDuc = vec_ld(stride + 16, src);
-        
+
         vector unsigned char vsrc2uc;
         vector unsigned char vsrc3uc;
         vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
@@ -121,32 +121,32 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in
           vsrc3uc = vsrcDuc;
         else
           vsrc3uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm1);
-        
+
         vector signed short vsrc2ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc2uc);
         vector signed short vsrc3ssH = (vector signed short)vec_mergeh((vector unsigned char)vzero, (vector unsigned char)vsrc3uc);
-        
+
         vector signed short psum;
-      
+
         psum = vec_mladd(vA, vsrc0ssH, vec_splat_s16(0));
         psum = vec_mladd(vB, vsrc1ssH, psum);
         psum = vec_mladd(vC, vsrc2ssH, psum);
         psum = vec_mladd(vD, vsrc3ssH, psum);
         psum = vec_add(v32ss, psum);
         psum = vec_sr(psum, v6us);
-        
+
         vector unsigned char vdst = vec_ld(0, dst);
-        vector unsigned char ppsum = (vector unsigned char)vec_pack(psum, psum); 
-        
+        vector unsigned char ppsum = (vector unsigned char)vec_pack(psum, psum);
+
         vector unsigned char vfdst = vec_perm(vdst, ppsum, fperm);
         vector unsigned char fsum;
-        
+
         OP_U8_ALTIVEC(fsum, vfdst, vdst);
 
         vec_st(fsum, 0, dst);
-        
+
         vsrc0ssH = vsrc2ssH;
         vsrc1ssH = vsrc3ssH;
-        
+
         dst += stride;
         src += stride;
       }
@@ -159,7 +159,7 @@ static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, i
   POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_h_lowpass_num, 1);
   POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
   register int i;
-  
+
   const vector signed int vzero = vec_splat_s32(0);
   const vector unsigned char permM2 = vec_lvsl(-2, src);
   const vector unsigned char permM1 = vec_lvsl(-1, src);
@@ -258,13 +258,13 @@ static void PREFIX_h264_qpel16_h_lowpass_altivec(uint8_t * dst, uint8_t * src, i
     const vector signed short sum2B = vec_adds(srcM1B, srcP2B);
     const vector signed short sum3A = vec_adds(srcM2A, srcP3A);
     const vector signed short sum3B = vec_adds(srcM2B, srcP3B);
-    
+
     const vector signed short pp1A = vec_mladd(sum1A, v20ss, v16ss);
     const vector signed short pp1B = vec_mladd(sum1B, v20ss, v16ss);
 
     const vector signed short pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
     const vector signed short pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
-    
+
     const vector signed short pp3A = vec_add(sum3A, pp1A);
     const vector signed short pp3B = vec_add(sum3B, pp1B);
 
@@ -300,7 +300,7 @@ POWERPC_PERF_STOP_COUNT(PREFIX_h264_qpel16_h_lowpass_num, 1);
 static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, int dstStride, int srcStride) {
   POWERPC_PERF_DECLARE(PREFIX_h264_qpel16_v_lowpass_num, 1);
   POWERPC_PERF_START_COUNT(PREFIX_h264_qpel16_v_lowpass_num, 1);
-  
+
   register int i;
 
   const vector signed int vzero = vec_splat_s32(0);
@@ -312,7 +312,7 @@ static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, i
   const vector unsigned char dstperm = vec_lvsr(0, dst);
   const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1);
   const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm);
-  
+
   uint8_t *srcbis = src - (srcStride * 2);
 
   const vector unsigned char srcM2a = vec_ld(0, srcbis);
@@ -372,13 +372,13 @@ static void PREFIX_h264_qpel16_v_lowpass_altivec(uint8_t * dst, uint8_t * src, i
     srcP1ssB = srcP2ssB;
     srcP2ssA = srcP3ssA;
     srcP2ssB = srcP3ssB;
-    
+
     const vector signed short pp1A = vec_mladd(sum1A, v20ss, v16ss);
     const vector signed short pp1B = vec_mladd(sum1B, v20ss, v16ss);
 
     const vector signed short pp2A = vec_mladd(sum2A, v5ss, (vector signed short)vzero);
     const vector signed short pp2B = vec_mladd(sum2B, v5ss, (vector signed short)vzero);
-    
+
     const vector signed short pp3A = vec_add(sum3A, pp1A);
     const vector signed short pp3B = vec_add(sum3B, pp1B);
 
@@ -513,7 +513,7 @@ static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp,
     const vector signed short sum2B = vec_adds(srcM1B, srcP2B);
     const vector signed short sum3A = vec_adds(srcM2A, srcP3A);
     const vector signed short sum3B = vec_adds(srcM2B, srcP3B);
-    
+
     const vector signed short pp1A = vec_mladd(sum1A, v20ss, sum3A);
     const vector signed short pp1B = vec_mladd(sum1B, v20ss, sum3B);
 
@@ -525,18 +525,18 @@ static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp,
 
     vec_st(psumA, 0, tmp);
     vec_st(psumB, 16, tmp);
-    
+
     src += srcStride;
     tmp += tmpStride; /* int16_t*, and stride is 16, so it's OK here */
   }
-  
+
   const vector unsigned char dstperm = vec_lvsr(0, dst);
   const vector unsigned char neg1 = (const vector unsigned char)vec_splat_s8(-1);
   const vector unsigned char dstmask = vec_perm((const vector unsigned char)vzero, neg1, dstperm);
   const vector unsigned char mperm = (const vector unsigned char)
     AVV(0x00, 0x08, 0x01, 0x09, 0x02, 0x0A, 0x03, 0x0B,
         0x04, 0x0C, 0x05, 0x0D, 0x06, 0x0E, 0x07, 0x0F);
-  
+
   int16_t *tmpbis = tmp - (tmpStride * 21);
 
   vector signed short tmpM2ssA = vec_ld(0, tmpbis);
@@ -607,7 +607,7 @@ static void PREFIX_h264_qpel16_hv_lowpass_altivec(uint8_t * dst, int16_t * tmp,
     const vector signed int sumAo = vec_add(pp1cAo, pp32Ao);
     const vector signed int sumBe = vec_add(pp1cBe, pp32Be);
     const vector signed int sumBo = vec_add(pp1cBo, pp32Bo);
-    
+
     const vector signed int ssumAe = vec_sra(sumAe, v10ui);
     const vector signed int ssumAo = vec_sra(sumAo, v10ui);
     const vector signed int ssumBe = vec_sra(sumBe, v10ui);
diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c
index 776f4235c..d5f55b80f 100644
--- a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c
+++ b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.c
@@ -15,7 +15,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "../dsputil.h"
@@ -87,16 +87,16 @@ void powerpc_display_perf_report(void)
   {
     for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
       {
-	if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
-	  av_log(NULL, AV_LOG_INFO,
-		  " Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
-		  perfname[i],
-		  j+1,
-		  perfdata[j][i][powerpc_data_min],
-		  perfdata[j][i][powerpc_data_max],
-		  (double)perfdata[j][i][powerpc_data_sum] /
-		  (double)perfdata[j][i][powerpc_data_num],
-		  perfdata[j][i][powerpc_data_num]);
+        if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
+          av_log(NULL, AV_LOG_INFO,
+                  " Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
+                  perfname[i],
+                  j+1,
+                  perfdata[j][i][powerpc_data_min],
+                  perfdata[j][i][powerpc_data_max],
+                  (double)perfdata[j][i][powerpc_data_sum] /
+                  (double)perfdata[j][i][powerpc_data_num],
+                  perfdata[j][i][powerpc_data_num]);
       }
   }
 }
@@ -179,7 +179,7 @@ POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
     }
     else
       for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
-	asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
+        asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
       }
 #else
     memset(blocks, 0, sizeof(DCTELEM)*6*64);
@@ -227,7 +227,7 @@ long check_dcbzl_effect(void)
   }
 
   av_free(fakedata);
-  
+
   return count;
 }
 #else
@@ -257,10 +257,10 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
 
 #ifdef HAVE_ALTIVEC
   dsputil_h264_init_ppc(c, avctx);
-  
+
     if (has_altivec()) {
         mm_flags |= MM_ALTIVEC;
-        
+
         // Altivec specific optimisations
         c->pix_abs[0][1] = sad16_x2_altivec;
         c->pix_abs[0][2] = sad16_y2_altivec;
@@ -284,25 +284,25 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
         c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
         c->avg_pixels_tab[0][0] = avg_pixels16_altivec;
         c->avg_pixels_tab[1][0] = avg_pixels8_altivec;
-	c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec;
+        c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec;
         c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;
         c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
         c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
         c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;
-        
-	c->gmc1 = gmc1_altivec;
+
+        c->gmc1 = gmc1_altivec;
 
 #ifdef CONFIG_DARWIN // ATM gcc-3.3 and gcc-3.4 fail to compile these in linux...
-	c->hadamard8_diff[0] = hadamard8_diff16_altivec;
-	c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
+        c->hadamard8_diff[0] = hadamard8_diff16_altivec;
+        c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
 #endif
 
 #ifdef CONFIG_ENCODERS
-	if (avctx->dct_algo == FF_DCT_AUTO ||
-	    avctx->dct_algo == FF_DCT_ALTIVEC)
-	{
-	    c->fdct = fdct_altivec;
-	}
+        if (avctx->dct_algo == FF_DCT_AUTO ||
+            avctx->dct_algo == FF_DCT_ALTIVEC)
+        {
+            c->fdct = fdct_altivec;
+        }
 #endif //CONFIG_ENCODERS
 
       if (avctx->lowres==0)
@@ -319,20 +319,20 @@ void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
         }
       }
-        
+
 #ifdef POWERPC_PERFORMANCE_REPORT
         {
           int i, j;
           for (i = 0 ; i < powerpc_perf_total ; i++)
           {
-	    for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
-	      {
-		perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
-		perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
-		perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
-		perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
-	      }
-	  }
+            for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
+              {
+                perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
+                perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
+                perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
+                perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
+              }
+          }
         }
 #endif /* POWERPC_PERFORMANCE_REPORT */
     } else
diff --git a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.h b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.h
index 7e01677f1..966ffa71a 100644
--- a/src/libffmpeg/libavcodec/ppc/dsputil_ppc.h
+++ b/src/libffmpeg/libavcodec/ppc/dsputil_ppc.h
@@ -13,7 +13,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #ifndef _DSPUTIL_PPC_
@@ -114,10 +114,10 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][
 #define POWERPC_GET_PMC6(a) do {} while (0)
 #endif
 #endif /* POWERPC_MODE_64BITS */
-#define POWERPC_PERF_DECLARE(a, cond)				\
-  POWERP_PMC_DATATYPE						\
-    pmc_start[POWERPC_NUM_PMC_ENABLED],				\
-    pmc_stop[POWERPC_NUM_PMC_ENABLED],				\
+#define POWERPC_PERF_DECLARE(a, cond)   \
+  POWERP_PMC_DATATYPE                   \
+    pmc_start[POWERPC_NUM_PMC_ENABLED], \
+    pmc_stop[POWERPC_NUM_PMC_ENABLED],  \
     pmc_loop_index;
 #define POWERPC_PERF_START_COUNT(a, cond) do { \
   POWERPC_GET_PMC6(pmc_start[5]); \
@@ -141,8 +141,8 @@ extern unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][
         pmc_loop_index++)         \
     {                             \
       if (pmc_stop[pmc_loop_index] >= pmc_start[pmc_loop_index])  \
-	{							  \
-        POWERP_PMC_DATATYPE diff =				  \
+        {                                                         \
+        POWERP_PMC_DATATYPE diff =                                \
           pmc_stop[pmc_loop_index] - pmc_start[pmc_loop_index];   \
         if (diff < perfdata[pmc_loop_index][a][powerpc_data_min]) \
           perfdata[pmc_loop_index][a][powerpc_data_min] = diff;   \
diff --git a/src/libffmpeg/libavcodec/ppc/fdct_altivec.c b/src/libffmpeg/libavcodec/ppc/fdct_altivec.c
index b38b909c6..f5778c24e 100644
--- a/src/libffmpeg/libavcodec/ppc/fdct_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/fdct_altivec.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 
diff --git a/src/libffmpeg/libavcodec/ppc/fft_altivec.c b/src/libffmpeg/libavcodec/ppc/fft_altivec.c
index 29d85e87d..f4ea78359 100644
--- a/src/libffmpeg/libavcodec/ppc/fft_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/fft_altivec.c
@@ -16,7 +16,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "../dsputil.h"
 
@@ -65,15 +65,15 @@ void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z)
 POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6);
 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
     int ln = s->nbits;
-    int	j, np, np2;
-    int	nblocks, nloops;
+    int j, np, np2;
+    int nblocks, nloops;
     register FFTComplex *p, *q;
     FFTComplex *exptab = s->exptab;
     int l;
     FFTSample tmp_re, tmp_im;
-    
+
 POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
- 
+
     np = 1 << ln;
 
     /* pass 0 */
@@ -81,29 +81,29 @@ POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
     p=&z[0];
     j=(np >> 1);
     do {
-        BF(p[0].re, p[0].im, p[1].re, p[1].im, 
+        BF(p[0].re, p[0].im, p[1].re, p[1].im,
            p[0].re, p[0].im, p[1].re, p[1].im);
         p+=2;
     } while (--j != 0);
 
     /* pass 1 */
 
-    
+
     p=&z[0];
     j=np >> 2;
     if (s->inverse) {
         do {
-            BF(p[0].re, p[0].im, p[2].re, p[2].im, 
+            BF(p[0].re, p[0].im, p[2].re, p[2].im,
                p[0].re, p[0].im, p[2].re, p[2].im);
-            BF(p[1].re, p[1].im, p[3].re, p[3].im, 
+            BF(p[1].re, p[1].im, p[3].re, p[3].im,
                p[1].re, p[1].im, -p[3].im, p[3].re);
             p+=4;
         } while (--j != 0);
     } else {
         do {
-            BF(p[0].re, p[0].im, p[2].re, p[2].im, 
+            BF(p[0].re, p[0].im, p[2].re, p[2].im,
                p[0].re, p[0].im, p[2].re, p[2].im);
-            BF(p[1].re, p[1].im, p[3].re, p[3].im, 
+            BF(p[1].re, p[1].im, p[3].re, p[3].im,
                p[1].re, p[1].im, p[3].im, -p[3].re);
             p+=4;
         } while (--j != 0);
@@ -119,7 +119,7 @@ POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
         for (j = 0; j < nblocks; ++j) {
             BF(p->re, p->im, q->re, q->im,
                p->re, p->im, q->re, q->im);
-            
+
             p++;
             q++;
             for(l = nblocks; l < np2; l += nblocks) {
@@ -145,10 +145,10 @@ POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
 #else
     register const vector float vczero = (const vector float){0.,0.,0.,0.};
 #endif
-    
+
     int ln = s->nbits;
-    int	j, np, np2;
-    int	nblocks, nloops;
+    int j, np, np2;
+    int nblocks, nloops;
     register FFTComplex *p, *q;
     FFTComplex *cptr, *cptr1;
     int k;
@@ -163,7 +163,7 @@ POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
         r = (vector float *)&z[0];
 
         c1 = vcii(p,p,n,n);
-        
+
         if (s->inverse)
             {
                 c2 = vcii(p,p,n,p);
@@ -172,27 +172,27 @@ POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
             {
                 c2 = vcii(p,p,p,n);
             }
-        
+
         j = (np >> 2);
         do {
             a = vec_ld(0, r);
             a1 = vec_ld(sizeof(vector float), r);
-            
+
             b = vec_perm(a,a,vcprmle(1,0,3,2));
             a = vec_madd(a,c1,b);
             /* do the pass 0 butterfly */
-            
+
             b = vec_perm(a1,a1,vcprmle(1,0,3,2));
             b = vec_madd(a1,c1,b);
             /* do the pass 0 butterfly */
-            
+
             /* multiply third by -i */
             b = vec_perm(b,b,vcprmle(2,3,1,0));
-            
+
             /* do the pass 1 butterfly */
             vec_st(vec_madd(b,c2,a), 0, r);
             vec_st(vec_nmsub(b,c2,a), sizeof(vector float), r);
-            
+
             r += 2;
         } while (--j != 0);
     }
@@ -215,7 +215,7 @@ POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
 
                 a = vec_ld(0, (float*)p);
                 b = vec_ld(0, (float*)q);
-                
+
                 /* complex mul */
                 c = vec_ld(0, (float*)cptr);
                 /*  cre*re cim*re */
@@ -223,16 +223,16 @@ POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
                 c = vec_ld(sizeof(vector float), (float*)cptr);
                 /*  -cim*im cre*im */
                 b = vec_madd(c, vec_perm(b,b,vcprmle(3,3,1,1)),t1);
-                
+
                 /* butterfly */
                 vec_st(vec_add(a,b), 0, (float*)p);
                 vec_st(vec_sub(a,b), 0, (float*)q);
-                
+
                 p += 2;
                 q += 2;
                 cptr += 4;
             } while (--k);
-            
+
             p += nloops;
             q += nloops;
         } while (--j);
diff --git a/src/libffmpeg/libavcodec/ppc/gcc_fixes.h b/src/libffmpeg/libavcodec/ppc/gcc_fixes.h
index 13d4ff12e..288fdf834 100644
--- a/src/libffmpeg/libavcodec/ppc/gcc_fixes.h
+++ b/src/libffmpeg/libavcodec/ppc/gcc_fixes.h
@@ -1,6 +1,6 @@
 /*
  * gcc fixes for altivec.
- * Used to workaround broken gcc (FSF gcc-3 pre gcc-3.3) 
+ * Used to workaround broken gcc (FSF gcc-3 pre gcc-3.3)
  * and to stay somewhat compatible with Darwin.
  */
 
@@ -19,7 +19,7 @@
 # endif
 #else
 #define AVV(x...) {x}
-#if (__GNUC__ * 100 + __GNUC_MINOR__ < 303)  
+#if (__GNUC__ * 100 + __GNUC_MINOR__ < 303)
 
 /* This code was provided to me by Bartosch Pixa
  * as a separate header file (broken_mergel.h).
@@ -30,37 +30,37 @@
  */
 
 static inline vector signed char ff_vmrglb (vector signed char const A,
-					  vector signed char const B)
+                                          vector signed char const B)
 {
     static const vector unsigned char lowbyte = {
-	0x08, 0x18, 0x09, 0x19, 0x0a, 0x1a, 0x0b,  0x1b,
-	0x0c, 0x1c, 0x0d, 0x1d, 0x0e, 0x1e, 0x0f, 0x1f
+        0x08, 0x18, 0x09, 0x19, 0x0a, 0x1a, 0x0b,  0x1b,
+        0x0c, 0x1c, 0x0d, 0x1d, 0x0e, 0x1e, 0x0f, 0x1f
     };
     return vec_perm (A, B, lowbyte);
 }
 
 static inline vector signed short ff_vmrglh (vector signed short const A,
-					  vector signed short const B)
+                                          vector signed short const B)
 {
     static const vector unsigned char lowhalf = {
-    	0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b,
-	0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f
+        0x08, 0x09, 0x18, 0x19, 0x0a, 0x0b, 0x1a, 0x1b,
+        0x0c, 0x0d, 0x1c, 0x1d, 0x0e, 0x0f, 0x1e, 0x1f
     };
     return vec_perm (A, B, lowhalf);
 }
 
 static inline vector signed int ff_vmrglw (vector signed int const A,
-					  vector signed int const B)
+                                          vector signed int const B)
 {
     static const vector unsigned char lowword = {
-    	0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b,
-	0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f
+        0x08, 0x09, 0x0a, 0x0b, 0x18, 0x19, 0x1a, 0x1b,
+        0x0c, 0x0d, 0x0e, 0x0f, 0x1c, 0x1d, 0x1e, 0x1f
     };
     return vec_perm (A, B, lowword);
 }
-/*#define ff_vmrglb ff_vmrglb 
-#define ff_vmrglh ff_vmrglh 
-#define ff_vmrglw ff_vmrglw 
+/*#define ff_vmrglb ff_vmrglb
+#define ff_vmrglh ff_vmrglh
+#define ff_vmrglw ff_vmrglw
 */
 #undef vec_mergel
 
diff --git a/src/libffmpeg/libavcodec/ppc/gmc_altivec.c b/src/libffmpeg/libavcodec/ppc/gmc_altivec.c
index 344821685..04978d825 100644
--- a/src/libffmpeg/libavcodec/ppc/gmc_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/gmc_altivec.c
@@ -15,7 +15,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "../dsputil.h"
@@ -40,7 +40,7 @@ POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
     int i;
 
 POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
-    
+
     for(i=0; i<h; i++)
     {
         dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
@@ -87,7 +87,7 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
     Dv = vec_splat(tempA, 3);
 
     rounderV = vec_ld(0, (unsigned short*)rounder_a);
-    
+
     // we'll be able to pick-up our 9 char elements
     // at src from those 32 bytes
     // we load the first batch here, as inside the loop
@@ -96,7 +96,7 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
     src_0 = vec_ld(0, src);
     src_1 = vec_ld(16, src);
     srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src));
-    
+
     if (src_really_odd != 0x0000000F)
     { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector.
       srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src));
@@ -107,14 +107,14 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
     }
     srcvA = vec_mergeh(vczero, srcvA);
     srcvB = vec_mergeh(vczero, srcvB);
-    
+
     for(i=0; i<h; i++)
     {
       dst_odd = (unsigned long)dst & 0x0000000F;
       src_really_odd = (((unsigned long)src) + stride) & 0x0000000F;
-      
+
       dstv = vec_ld(0, dst);
-      
+
       // we we'll be able to pick-up our 9 char elements
       // at src + stride from those 32 bytes
       // then reuse the resulting 2 vectors srvcC and srcvD
@@ -122,7 +122,7 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
       src_0 = vec_ld(stride + 0, src);
       src_1 = vec_ld(stride + 16, src);
       srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src));
-      
+
       if (src_really_odd != 0x0000000F)
       { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector.
         srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src));
@@ -131,10 +131,10 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
       {
         srcvD = src_1;
       }
-      
+
       srcvC = vec_mergeh(vczero, srcvC);
       srcvD = vec_mergeh(vczero, srcvD);
-      
+
 
       // OK, now we (finally) do the math :-)
       // those four instructions replaces 32 int muls & 32 int adds.
@@ -143,14 +143,14 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
       tempB = vec_mladd((vector unsigned short)srcvB, Bv, tempA);
       tempC = vec_mladd((vector unsigned short)srcvC, Cv, tempB);
       tempD = vec_mladd((vector unsigned short)srcvD, Dv, tempC);
-      
+
       srcvA = srcvC;
       srcvB = srcvD;
-      
+
       tempD = vec_sr(tempD, vcsr8);
-      
+
       dstv2 = vec_pack(tempD, (vector unsigned short)vczero);
-      
+
       if (dst_odd)
       {
         dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1));
@@ -159,9 +159,9 @@ POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
       {
         dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3));
       }
-      
+
       vec_st(dstv2, 0, dst);
-      
+
       dst += stride;
       src += stride;
     }
diff --git a/src/libffmpeg/libavcodec/ppc/idct_altivec.c b/src/libffmpeg/libavcodec/ppc/idct_altivec.c
index 3445adadd..93d63cfd3 100644
--- a/src/libffmpeg/libavcodec/ppc/idct_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/idct_altivec.c
@@ -13,7 +13,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -51,108 +51,108 @@
 #define vector_s32_t vector signed int
 #define vector_u32_t vector unsigned int
 
-#define IDCT_HALF					\
-    /* 1st stage */					\
-    t1 = vec_mradds (a1, vx7, vx1 );			\
-    t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7));	\
-    t7 = vec_mradds (a2, vx5, vx3);			\
-    t3 = vec_mradds (ma2, vx3, vx5);			\
-							\
-    /* 2nd stage */					\
-    t5 = vec_adds (vx0, vx4);				\
-    t0 = vec_subs (vx0, vx4);				\
-    t2 = vec_mradds (a0, vx6, vx2);			\
-    t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6));	\
-    t6 = vec_adds (t8, t3);				\
-    t3 = vec_subs (t8, t3);				\
-    t8 = vec_subs (t1, t7);				\
-    t1 = vec_adds (t1, t7);				\
-							\
-    /* 3rd stage */					\
-    t7 = vec_adds (t5, t2);				\
-    t2 = vec_subs (t5, t2);				\
-    t5 = vec_adds (t0, t4);				\
-    t0 = vec_subs (t0, t4);				\
-    t4 = vec_subs (t8, t3);				\
-    t3 = vec_adds (t8, t3);				\
-							\
-    /* 4th stage */					\
-    vy0 = vec_adds (t7, t1);				\
-    vy7 = vec_subs (t7, t1);				\
-    vy1 = vec_mradds (c4, t3, t5);			\
-    vy6 = vec_mradds (mc4, t3, t5);			\
-    vy2 = vec_mradds (c4, t4, t0);			\
-    vy5 = vec_mradds (mc4, t4, t0);			\
-    vy3 = vec_adds (t2, t6);				\
+#define IDCT_HALF                                       \
+    /* 1st stage */                                     \
+    t1 = vec_mradds (a1, vx7, vx1 );                    \
+    t8 = vec_mradds (a1, vx1, vec_subs (zero, vx7));    \
+    t7 = vec_mradds (a2, vx5, vx3);                     \
+    t3 = vec_mradds (ma2, vx3, vx5);                    \
+                                                        \
+    /* 2nd stage */                                     \
+    t5 = vec_adds (vx0, vx4);                           \
+    t0 = vec_subs (vx0, vx4);                           \
+    t2 = vec_mradds (a0, vx6, vx2);                     \
+    t4 = vec_mradds (a0, vx2, vec_subs (zero, vx6));    \
+    t6 = vec_adds (t8, t3);                             \
+    t3 = vec_subs (t8, t3);                             \
+    t8 = vec_subs (t1, t7);                             \
+    t1 = vec_adds (t1, t7);                             \
+                                                        \
+    /* 3rd stage */                                     \
+    t7 = vec_adds (t5, t2);                             \
+    t2 = vec_subs (t5, t2);                             \
+    t5 = vec_adds (t0, t4);                             \
+    t0 = vec_subs (t0, t4);                             \
+    t4 = vec_subs (t8, t3);                             \
+    t3 = vec_adds (t8, t3);                             \
+                                                        \
+    /* 4th stage */                                     \
+    vy0 = vec_adds (t7, t1);                            \
+    vy7 = vec_subs (t7, t1);                            \
+    vy1 = vec_mradds (c4, t3, t5);                      \
+    vy6 = vec_mradds (mc4, t3, t5);                     \
+    vy2 = vec_mradds (c4, t4, t0);                      \
+    vy5 = vec_mradds (mc4, t4, t0);                     \
+    vy3 = vec_adds (t2, t6);                            \
     vy4 = vec_subs (t2, t6);
 
-	
-#define IDCT								\
-    vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7;		\
-    vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7;		\
-    vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias;			\
-    vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8;			\
-    vector_u16_t shift;							\
-									\
-    c4 = vec_splat (constants[0], 0);					\
-    a0 = vec_splat (constants[0], 1);					\
-    a1 = vec_splat (constants[0], 2);					\
-    a2 = vec_splat (constants[0], 3);					\
-    mc4 = vec_splat (constants[0], 4);					\
-    ma2 = vec_splat (constants[0], 5);					\
-    bias = (vector_s16_t)vec_splat ((vector_s32_t)constants[0], 3);	\
-									\
-    zero = vec_splat_s16 (0);						\
-    shift = vec_splat_u16 (4);						\
-									\
-    vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero);	\
-    vx1 = vec_mradds (vec_sl (block[1], shift), constants[2], zero);	\
-    vx2 = vec_mradds (vec_sl (block[2], shift), constants[3], zero);	\
-    vx3 = vec_mradds (vec_sl (block[3], shift), constants[4], zero);	\
-    vx4 = vec_mradds (vec_sl (block[4], shift), constants[1], zero);	\
-    vx5 = vec_mradds (vec_sl (block[5], shift), constants[4], zero);	\
-    vx6 = vec_mradds (vec_sl (block[6], shift), constants[3], zero);	\
-    vx7 = vec_mradds (vec_sl (block[7], shift), constants[2], zero);	\
-									\
-    IDCT_HALF								\
-									\
-    vx0 = vec_mergeh (vy0, vy4);					\
-    vx1 = vec_mergel (vy0, vy4);					\
-    vx2 = vec_mergeh (vy1, vy5);					\
-    vx3 = vec_mergel (vy1, vy5);					\
-    vx4 = vec_mergeh (vy2, vy6);					\
-    vx5 = vec_mergel (vy2, vy6);					\
-    vx6 = vec_mergeh (vy3, vy7);					\
-    vx7 = vec_mergel (vy3, vy7);					\
-									\
-    vy0 = vec_mergeh (vx0, vx4);					\
-    vy1 = vec_mergel (vx0, vx4);					\
-    vy2 = vec_mergeh (vx1, vx5);					\
-    vy3 = vec_mergel (vx1, vx5);					\
-    vy4 = vec_mergeh (vx2, vx6);					\
-    vy5 = vec_mergel (vx2, vx6);					\
-    vy6 = vec_mergeh (vx3, vx7);					\
-    vy7 = vec_mergel (vx3, vx7);					\
-									\
-    vx0 = vec_adds (vec_mergeh (vy0, vy4), bias);			\
-    vx1 = vec_mergel (vy0, vy4);					\
-    vx2 = vec_mergeh (vy1, vy5);					\
-    vx3 = vec_mergel (vy1, vy5);					\
-    vx4 = vec_mergeh (vy2, vy6);					\
-    vx5 = vec_mergel (vy2, vy6);					\
-    vx6 = vec_mergeh (vy3, vy7);					\
-    vx7 = vec_mergel (vy3, vy7);					\
-									\
-    IDCT_HALF								\
-									\
-    shift = vec_splat_u16 (6);						\
-    vx0 = vec_sra (vy0, shift);						\
-    vx1 = vec_sra (vy1, shift);						\
-    vx2 = vec_sra (vy2, shift);						\
-    vx3 = vec_sra (vy3, shift);						\
-    vx4 = vec_sra (vy4, shift);						\
-    vx5 = vec_sra (vy5, shift);						\
-    vx6 = vec_sra (vy6, shift);						\
+
+#define IDCT                                                            \
+    vector_s16_t vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7;                \
+    vector_s16_t vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7;                \
+    vector_s16_t a0, a1, a2, ma2, c4, mc4, zero, bias;                  \
+    vector_s16_t t0, t1, t2, t3, t4, t5, t6, t7, t8;                    \
+    vector_u16_t shift;                                                 \
+                                                                        \
+    c4 = vec_splat (constants[0], 0);                                   \
+    a0 = vec_splat (constants[0], 1);                                   \
+    a1 = vec_splat (constants[0], 2);                                   \
+    a2 = vec_splat (constants[0], 3);                                   \
+    mc4 = vec_splat (constants[0], 4);                                  \
+    ma2 = vec_splat (constants[0], 5);                                  \
+    bias = (vector_s16_t)vec_splat ((vector_s32_t)constants[0], 3);     \
+                                                                        \
+    zero = vec_splat_s16 (0);                                           \
+    shift = vec_splat_u16 (4);                                          \
+                                                                        \
+    vx0 = vec_mradds (vec_sl (block[0], shift), constants[1], zero);    \
+    vx1 = vec_mradds (vec_sl (block[1], shift), constants[2], zero);    \
+    vx2 = vec_mradds (vec_sl (block[2], shift), constants[3], zero);    \
+    vx3 = vec_mradds (vec_sl (block[3], shift), constants[4], zero);    \
+    vx4 = vec_mradds (vec_sl (block[4], shift), constants[1], zero);    \
+    vx5 = vec_mradds (vec_sl (block[5], shift), constants[4], zero);    \
+    vx6 = vec_mradds (vec_sl (block[6], shift), constants[3], zero);    \
+    vx7 = vec_mradds (vec_sl (block[7], shift), constants[2], zero);    \
+                                                                        \
+    IDCT_HALF                                                           \
+                                                                        \
+    vx0 = vec_mergeh (vy0, vy4);                                        \
+    vx1 = vec_mergel (vy0, vy4);                                        \
+    vx2 = vec_mergeh (vy1, vy5);                                        \
+    vx3 = vec_mergel (vy1, vy5);                                        \
+    vx4 = vec_mergeh (vy2, vy6);                                        \
+    vx5 = vec_mergel (vy2, vy6);                                        \
+    vx6 = vec_mergeh (vy3, vy7);                                        \
+    vx7 = vec_mergel (vy3, vy7);                                        \
+                                                                        \
+    vy0 = vec_mergeh (vx0, vx4);                                        \
+    vy1 = vec_mergel (vx0, vx4);                                        \
+    vy2 = vec_mergeh (vx1, vx5);                                        \
+    vy3 = vec_mergel (vx1, vx5);                                        \
+    vy4 = vec_mergeh (vx2, vx6);                                        \
+    vy5 = vec_mergel (vx2, vx6);                                        \
+    vy6 = vec_mergeh (vx3, vx7);                                        \
+    vy7 = vec_mergel (vx3, vx7);                                        \
+                                                                        \
+    vx0 = vec_adds (vec_mergeh (vy0, vy4), bias);                       \
+    vx1 = vec_mergel (vy0, vy4);                                        \
+    vx2 = vec_mergeh (vy1, vy5);                                        \
+    vx3 = vec_mergel (vy1, vy5);                                        \
+    vx4 = vec_mergeh (vy2, vy6);                                        \
+    vx5 = vec_mergel (vy2, vy6);                                        \
+    vx6 = vec_mergeh (vy3, vy7);                                        \
+    vx7 = vec_mergel (vy3, vy7);                                        \
+                                                                        \
+    IDCT_HALF                                                           \
+                                                                        \
+    shift = vec_splat_u16 (6);                                          \
+    vx0 = vec_sra (vy0, shift);                                         \
+    vx1 = vec_sra (vy1, shift);                                         \
+    vx2 = vec_sra (vy2, shift);                                         \
+    vx3 = vec_sra (vy3, shift);                                         \
+    vx4 = vec_sra (vy4, shift);                                         \
+    vx5 = vec_sra (vy5, shift);                                         \
+    vx6 = vec_sra (vy6, shift);                                         \
     vx7 = vec_sra (vy7, shift);
 
 
@@ -180,18 +180,18 @@ POWERPC_PERF_START_COUNT(altivec_idct_put_num, 1);
 #endif
     IDCT
 
-#define COPY(dest,src)						\
-    tmp = vec_packsu (src, src);				\
-    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);	\
+#define COPY(dest,src)                                          \
+    tmp = vec_packsu (src, src);                                \
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);       \
     vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
 
-    COPY (dest, vx0)	dest += stride;
-    COPY (dest, vx1)	dest += stride;
-    COPY (dest, vx2)	dest += stride;
-    COPY (dest, vx3)	dest += stride;
-    COPY (dest, vx4)	dest += stride;
-    COPY (dest, vx5)	dest += stride;
-    COPY (dest, vx6)	dest += stride;
+    COPY (dest, vx0)    dest += stride;
+    COPY (dest, vx1)    dest += stride;
+    COPY (dest, vx2)    dest += stride;
+    COPY (dest, vx3)    dest += stride;
+    COPY (dest, vx4)    dest += stride;
+    COPY (dest, vx5)    dest += stride;
+    COPY (dest, vx6)    dest += stride;
     COPY (dest, vx7)
 
 POWERPC_PERF_STOP_COUNT(altivec_idct_put_num, 1);
@@ -225,22 +225,22 @@ POWERPC_PERF_START_COUNT(altivec_idct_add_num, 1);
     perm0 = vec_mergeh (p, p0);
     perm1 = vec_mergeh (p, p1);
 
-#define ADD(dest,src,perm)						\
-    /* *(uint64_t *)&tmp = *(uint64_t *)dest; */			\
-    tmp = vec_ld (0, dest);						\
-    tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm);	\
-    tmp3 = vec_adds (tmp2, src);					\
-    tmp = vec_packsu (tmp3, tmp3);					\
-    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);		\
+#define ADD(dest,src,perm)                                              \
+    /* *(uint64_t *)&tmp = *(uint64_t *)dest; */                        \
+    tmp = vec_ld (0, dest);                                             \
+    tmp2 = (vector_s16_t)vec_perm (tmp, (vector_u8_t)zero, perm);       \
+    tmp3 = vec_adds (tmp2, src);                                        \
+    tmp = vec_packsu (tmp3, tmp3);                                      \
+    vec_ste ((vector_u32_t)tmp, 0, (unsigned int *)dest);               \
     vec_ste ((vector_u32_t)tmp, 4, (unsigned int *)dest);
 
-    ADD (dest, vx0, perm0)	dest += stride;
-    ADD (dest, vx1, perm1)	dest += stride;
-    ADD (dest, vx2, perm0)	dest += stride;
-    ADD (dest, vx3, perm1)	dest += stride;
-    ADD (dest, vx4, perm0)	dest += stride;
-    ADD (dest, vx5, perm1)	dest += stride;
-    ADD (dest, vx6, perm0)	dest += stride;
+    ADD (dest, vx0, perm0)      dest += stride;
+    ADD (dest, vx1, perm1)      dest += stride;
+    ADD (dest, vx2, perm0)      dest += stride;
+    ADD (dest, vx3, perm1)      dest += stride;
+    ADD (dest, vx4, perm0)      dest += stride;
+    ADD (dest, vx5, perm1)      dest += stride;
+    ADD (dest, vx6, perm0)      dest += stride;
     ADD (dest, vx7, perm1)
 
 POWERPC_PERF_STOP_COUNT(altivec_idct_add_num, 1);
diff --git a/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c b/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c
index 91e744af9..7a771a8ec 100644
--- a/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c
+++ b/src/libffmpeg/libavcodec/ppc/mpegvideo_altivec.c
@@ -16,7 +16,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include <stdlib.h>
@@ -25,7 +25,7 @@
 #include "../mpegvideo.h"
 
 #include "gcc_fixes.h"
- 
+
 #include "dsputil_altivec.h"
 
 // Swaps two variables (used for altivec registers)
@@ -103,7 +103,7 @@ do { \
 // slower, for dumb non-apple GCC
 #define FOUROF(a) {a,a,a,a}
 #endif
-int dct_quantize_altivec(MpegEncContext* s, 
+int dct_quantize_altivec(MpegEncContext* s,
                         DCTELEM* data, int n,
                         int qscale, int* overflow)
 {
@@ -152,9 +152,9 @@ int dct_quantize_altivec(MpegEncContext* s,
     }
 
     // The following block could exist as a separate an altivec dct
-		// function.  However, if we put it inline, the DCT data can remain
-		// in the vector local variables, as floats, which we'll use during the
-		// quantize step...
+                // function.  However, if we put it inline, the DCT data can remain
+                // in the vector local variables, as floats, which we'll use during the
+                // quantize step...
     {
         const vector float vec_0_298631336 = (vector float)FOUROF(0.298631336f);
         const vector float vec_0_390180644 = (vector float)FOUROF(-0.390180644f);
@@ -206,11 +206,11 @@ int dct_quantize_altivec(MpegEncContext* s,
                 z1 = vec_madd(vec_add(tmp12, tmp13), vec_0_541196100, (vector float)zero);
 
                 // dataptr[2] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
-                //		   CONST_BITS-PASS1_BITS);
+                //                                CONST_BITS-PASS1_BITS);
                 row2 = vec_madd(tmp13, vec_0_765366865, z1);
 
                 // dataptr[6] = (DCTELEM) DESCALE(z1 + MULTIPLY(tmp12, - FIX_1_847759065),
-                //		   CONST_BITS-PASS1_BITS);
+                //                                CONST_BITS-PASS1_BITS);
                 row6 = vec_madd(tmp12, vec_1_847759065, z1);
 
                 z1 = vec_add(tmp4, tmp7); // z1 = tmp4 + tmp7;
@@ -273,7 +273,7 @@ int dct_quantize_altivec(MpegEncContext* s,
             if (whichPass == 1)
             {
                 // transpose the data for the second pass
-                 
+
                 // First, block transpose the upper right with lower left.
                 SWAP(row4, alt0);
                 SWAP(row5, alt1);
@@ -315,7 +315,7 @@ int dct_quantize_altivec(MpegEncContext* s,
         }
 
         // Load the bias vector (We add 0.5 to the bias so that we're
-				// rounding when we convert to int, instead of flooring.)
+                                // rounding when we convert to int, instead of flooring.)
         {
             vector signed int biasInt;
             const vector float negOneFloat = (vector float)FOUROF(-1.0f);
@@ -380,7 +380,7 @@ int dct_quantize_altivec(MpegEncContext* s,
                     vec_cmpgt(alt7, zero));
         }
 
- 
+
     }
 
     // Store the data back into the original block
@@ -469,7 +469,7 @@ int dct_quantize_altivec(MpegEncContext* s,
         vec_ste(scanIndices_01, 0, &lastNonZeroChar);
 
         lastNonZero = lastNonZeroChar;
-        
+
         // While the data is still in vectors we check for the transpose IDCT permute
         // and handle it using the vector unit if we can.  This is the permute used
         // by the altivec idct, so it is common when using the altivec dct.
@@ -523,30 +523,30 @@ int dct_quantize_altivec(MpegEncContext* s,
   AltiVec version of dct_unquantize_h263
   this code assumes `block' is 16 bytes-aligned
 */
-void dct_unquantize_h263_altivec(MpegEncContext *s, 
+void dct_unquantize_h263_altivec(MpegEncContext *s,
                                  DCTELEM *block, int n, int qscale)
 {
 POWERPC_PERF_DECLARE(altivec_dct_unquantize_h263_num, 1);
     int i, level, qmul, qadd;
     int nCoeffs;
-    
+
     assert(s->block_last_index[n]>=0);
 
 POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
-    
+
     qadd = (qscale - 1) | 1;
     qmul = qscale << 1;
-    
+
     if (s->mb_intra) {
         if (!s->h263_aic) {
-            if (n < 4) 
+            if (n < 4)
                 block[0] = block[0] * s->y_dc_scale;
             else
                 block[0] = block[0] * s->c_dc_scale;
         }else
             qadd = 0;
         i = 1;
-        nCoeffs= 63; //does not allways use zigzag table 
+        nCoeffs= 63; //does not allways use zigzag table
     } else {
         i = 0;
         nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
@@ -586,7 +586,7 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
       register vector bool short blockv_null, blockv_neg;
       register short backup_0 = block[0];
       register int j = 0;
-      
+
       qmulv = vec_ld(0, qmul8);
       qaddv = vec_ld(0, qadd8);
       nqaddv = vec_ld(0, nqadd8);
@@ -605,7 +605,7 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
         }
       }
 #endif
-      
+
       // vectorize all the 16 bytes-aligned blocks
       // of 8 elements
       for(; (j + 7) <= nCoeffs ; j+=8)
@@ -637,7 +637,7 @@ POWERPC_PERF_START_COUNT(altivec_dct_unquantize_h263_num, 1);
             block[j] = level;
         }
       }
-      
+
       if (i == 1)
       { // cheat. this avoid special-casing the first iteration
         block[0] = backup_0;
diff --git a/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c b/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c
index 832baced0..b391b4294 100644
--- a/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c
+++ b/src/libffmpeg/libavcodec/ppc/mpegvideo_ppc.c
@@ -13,9 +13,9 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
- 
+
 #include "../dsputil.h"
 #include "../mpegvideo.h"
 #include <time.h>
@@ -24,7 +24,7 @@
 #include "dsputil_altivec.h"
 #endif
 
-extern int dct_quantize_altivec(MpegEncContext *s,  
+extern int dct_quantize_altivec(MpegEncContext *s,
         DCTELEM *block, int n,
         int qscale, int *overflow);
 extern void dct_unquantize_h263_altivec(MpegEncContext *s,
diff --git a/src/libffmpeg/libavcodec/qdm2.c b/src/libffmpeg/libavcodec/qdm2.c
index 211859c46..98bec5cca 100644
--- a/src/libffmpeg/libavcodec/qdm2.c
+++ b/src/libffmpeg/libavcodec/qdm2.c
@@ -17,7 +17,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -25,8 +25,8 @@
  * @file qdm2.c
  * QDM2 decoder
  * @author Ewald Snel, Benjamin Larsson, Alex Beregszaszi, Roberto Togni
- * The decoder is not perfect yet, there are still some distorions expecially
- * on files encoded with 16 or 8 subbands
+ * The decoder is not perfect yet, there are still some distortions
+ * especially on files encoded with 16 or 8 subbands.
  */
 
 #include <math.h>
@@ -94,7 +94,7 @@ typedef struct {
 } QDM2SubPacket;
 
 /**
- * A node in subpacket list
+ * A node in the subpacket list
  */
 typedef struct _QDM2SubPNode {
     QDM2SubPacket *packet;      ///< packet
@@ -196,12 +196,12 @@ typedef struct {
     int8_t tone_level_idx_temp[MPA_MAX_CHANNELS][30][64];
 
     // Flags
-    int has_errors;         ///< packet have errors
+    int has_errors;         ///< packet has errors
     int superblocktype_2_3; ///< select fft tables and some algorithm based on superblock type
     int do_synth_filter;    ///< used to perform or skip synthesis filter
 
     int sub_packet;
-    int noise_idx; ///< Index for dithering noise table
+    int noise_idx; ///< index for dithering noise table
 } QDM2Context;
 
 
@@ -230,7 +230,7 @@ static float noise_samples[128];
 static MPA_INT mpa_window[512] __attribute__((aligned(16)));
 
 
-static void softclip_table_init() {
+static void softclip_table_init(void) {
     int i;
     double dfl = SOFTCLIP_THRESHOLD - 32767;
     float delta = 1.0 / -dfl;
@@ -240,7 +240,7 @@ static void softclip_table_init() {
 
 
 // random generated table
-static void rnd_table_init() {
+static void rnd_table_init(void) {
     int i,j;
     uint32_t ldw,hdw;
     uint64_t tmp64_1;
@@ -276,7 +276,7 @@ static void rnd_table_init() {
 }
 
 
-static void init_noise_samples() {
+static void init_noise_samples(void) {
     int i;
     int random_seed = 0;
     float delta = 1.0 / 16384.0;
@@ -287,7 +287,7 @@ static void init_noise_samples() {
 }
 
 
-static void qdm2_init_vlc()
+static void qdm2_init_vlc(void)
 {
     init_vlc (&vlc_tab_level, 8, 24,
         vlc_tab_level_huffbits, 1, 1,
@@ -401,7 +401,7 @@ static int qdm2_get_se_vlc (VLC *vlc, GetBitContext *gb, int depth)
  * @param length    data length
  * @param value     checksum value
  *
- * @return          0 if checksum is ok
+ * @return          0 if checksum is OK
  */
 static uint16_t qdm2_packet_checksum (uint8_t *data, int length, int value) {
     int i;
@@ -414,7 +414,7 @@ static uint16_t qdm2_packet_checksum (uint8_t *data, int length, int value) {
 
 
 /**
- * Fills a QDM2SubPacket structure with packet type, size, and data pointer
+ * Fills a QDM2SubPacket structure with packet type, size, and data pointer.
  *
  * @param gb            bitreader context
  * @param sub_packet    packet under analysis
@@ -441,15 +441,15 @@ static void qdm2_decode_sub_packet_header (GetBitContext *gb, QDM2SubPacket *sub
       sub_packet->data = &gb->buffer[get_bits_count(gb) / 8]; // FIXME: this depends on bitreader internal data
     }
 
-    av_log(NULL,AV_LOG_DEBUG,"Sub packet: type=%d size=%d start_offs=%x\n",
+    av_log(NULL,AV_LOG_DEBUG,"Subpacket: type=%d size=%d start_offs=%x\n",
         sub_packet->type, sub_packet->size, get_bits_count(gb) / 8);
 }
 
 
 /**
- * Return node pointer to first packet of requested type in list
+ * Return node pointer to first packet of requested type in list.
  *
- * @param list    list of subpacket to be scanned
+ * @param list    list of subpackets to be scanned
  * @param type    type of searched subpacket
  * @return        node pointer for subpacket if found, else NULL
  */
@@ -465,8 +465,8 @@ static QDM2SubPNode* qdm2_search_subpacket_type_in_list (QDM2SubPNode *list, int
 
 
 /**
- * Replaces 8 elements with their average value
- * Called by qdm2_decode_superblock before starting subblocks decoding
+ * Replaces 8 elements with their average value.
+ * Called by qdm2_decode_superblock before starting subblock decoding.
  *
  * @param q       context
  */
@@ -494,8 +494,8 @@ static void average_quantized_coeffs (QDM2Context *q)
 
 
 /**
- * Build subband samples with noise weighted by q->tone_level
- * Called by synthfilt_build_sb_samples
+ * Build subband samples with noise weighted by q->tone_level.
+ * Called by synthfilt_build_sb_samples.
  *
  * @param q     context
  * @param sb    subband index
@@ -518,14 +518,14 @@ static void build_sb_samples_from_noise (QDM2Context *q, int sb)
 
 
 /**
- * Called while processing data from subpackets 11 and 12
- * Used after making changes to coding_method array
+ * Called while processing data from subpackets 11 and 12.
+ * Used after making changes to coding_method array.
  *
  * @param sb               subband index
  * @param channels         number of channels
  * @param coding_method    q->coding_method[0][0][0]
  */
- void fix_coding_method_array (int sb, int channels, sb_int8_array coding_method)
+static void fix_coding_method_array (int sb, int channels, sb_int8_array coding_method)
 {
     int j,k;
     int ch;
@@ -657,7 +657,7 @@ static void fill_tone_level_array (QDM2Context *q, int flag)
  * c is built with data from subpacket 11
  * Most of this function is used only if superblock_type_2_3 == 0, never seen it in samples
  *
- * @param tone_level_idx           
+ * @param tone_level_idx
  * @param tone_level_idx_temp
  * @param coding_method        q->coding_method[0][0][0]
  * @param nb_channels          number of channels
@@ -790,7 +790,7 @@ static void fill_coding_method_array (sb_int8_array tone_level_idx, sb_int8_arra
  *
  * @param q         context
  * @param gb        bitreader context
- * @param length    packet length in bit
+ * @param length    packet length in bits
  * @param sb_min    lower subband processed (sb_min included)
  * @param sb_max    higher subband processed (sb_max excluded)
  */
@@ -916,7 +916,7 @@ static void synthfilt_build_sb_samples (QDM2Context *q, GetBitContext *gb, int l
                             samples[0] = type30_dequant[qdm2_get_vlc(gb, &vlc_tab_type30, 0, 1)];
                         else
                             samples[0] = SB_DITHERING_NOISE(sb,q->noise_idx);
-                        
+
                         run = 1;
                         break;
 
@@ -968,14 +968,14 @@ static void synthfilt_build_sb_samples (QDM2Context *q, GetBitContext *gb, int l
 
 
 /**
- * Init the first element of a channel in quantized_coeffs with data from packet 10 (quantized_coeffs[ch][0])
+ * Init the first element of a channel in quantized_coeffs with data from packet 10 (quantized_coeffs[ch][0]).
  * This is similar to process_subpacket_9, but for a single channel and for element [0]
- * same VLC tables as process_subpacket_9 are used
+ * same VLC tables as process_subpacket_9 are used.
  *
  * @param q         context
  * @param quantized_coeffs    pointer to quantized_coeffs[ch][0]
  * @param gb        bitreader context
- * @param length    packet length in bit
+ * @param length    packet length in bits
  */
 static void init_quantized_coeffs_elem0 (int8_t *quantized_coeffs, GetBitContext *gb, int length)
 {
@@ -995,10 +995,10 @@ static void init_quantized_coeffs_elem0 (int8_t *quantized_coeffs, GetBitContext
         if (BITS_LEFT(length,gb) < 16)
             break;
         diff = qdm2_get_se_vlc(&vlc_tab_diff, gb, 2);
-    
+
         for (k = 1; k <= run; k++)
             quantized_coeffs[i + k] = (level + ((k * diff) / run));
-    
+
         level += diff;
         i += run;
     }
@@ -1012,7 +1012,7 @@ static void init_quantized_coeffs_elem0 (int8_t *quantized_coeffs, GetBitContext
  *
  * @param q         context
  * @param gb        bitreader context
- * @param length    packet length in bit
+ * @param length    packet length in bits
  */
 static void init_tone_level_dequantization (QDM2Context *q, GetBitContext *gb, int length)
 {
@@ -1114,7 +1114,7 @@ static void process_subpacket_9 (QDM2Context *q, QDM2SubPNode *node)
  *
  * @param q         context
  * @param node      pointer to node with packet
- * @param length    packet length in bit
+ * @param length    packet length in bits
  */
 static void process_subpacket_10 (QDM2Context *q, QDM2SubPNode *node, int length)
 {
@@ -1160,7 +1160,7 @@ static void process_subpacket_11 (QDM2Context *q, QDM2SubPNode *node, int length
  *
  * @param q         context
  * @param node      pointer to node with packet
- * @param length    packet length in bit
+ * @param length    packet length in bits
  */
 static void process_subpacket_12 (QDM2Context *q, QDM2SubPNode *node, int length)
 {
@@ -1205,7 +1205,7 @@ static void process_synthesis_subpackets (QDM2Context *q, QDM2SubPNode *list)
 
 
 /*
- * Decode superblock, fill packet lists
+ * Decode superblock, fill packet lists.
  *
  * @param q    context
  */
@@ -1274,7 +1274,7 @@ static void qdm2_decode_super_block (QDM2Context *q)
                 break;
         }
 
-        /* decode sub packet */
+        /* decode subpacket */
         packet = &q->sub_packets[i];
         qdm2_decode_sub_packet_header(&gb, packet);
         next_index = packet->size + get_bits_count(&gb) / 8;
@@ -1291,10 +1291,10 @@ static void qdm2_decode_super_block (QDM2Context *q)
 
         packet_bytes -= sub_packet_size;
 
-        /* add sub packet to 'all sub packets' list */
+        /* add subpacket to 'all subpackets' list */
         q->sub_packet_list_A[i].packet = packet;
 
-        /* add sub packet to related list */
+        /* add subpacket to related list */
         if (packet->type == 8) {
             SAMPLES_NEEDED_2("packet type 8");
             return;
@@ -1435,11 +1435,11 @@ static void qdm2_decode_fft_packets (QDM2Context *q)
     for (i=0; i < 5; i++)
         q->fft_coefs_min_index[i] = -1;
 
-    /* process sub packets ordered by type, largest type first */
+    /* process subpackets ordered by type, largest type first */
     for (i = 0, max = 256; i < q->sub_packets_B; i++) {
         QDM2SubPacket *packet;
 
-        /* find sub packet with largest type less than max */
+        /* find subpacket with largest type less than max */
         for (j = 0, min = 0, packet = NULL; j < q->sub_packets_B; j++) {
             value = q->sub_packet_list_B[j].packet->type;
             if (value > min && value < max) {
@@ -1619,7 +1619,7 @@ static void qdm2_calculate_fft (QDM2Context *q, int channel, int sub_packet)
     float c, s, f0, f1, f2, f3;
     int i, j;
 
-    /* pre rotation (or something like that) */
+    /* prerotation (or something like that) */
     for (i=1; i < n2; i++) {
         j  = (n - i);
         c = q->exptab[i].re;
@@ -1690,7 +1690,7 @@ static void qdm2_synthesis_filter (QDM2Context *q, int index)
  *
  * @param q    context
  */
-void qdm2_init(QDM2Context *q) {
+static void qdm2_init(QDM2Context *q) {
     static int inited = 0;
 
     if (inited != 0)
@@ -1737,7 +1737,7 @@ static void dump_context(QDM2Context *q)
     for (i = q->fft_tone_start; i < q->fft_tone_end; i++)
     {
     FFTTone *t = &q->fft_tones[i];
-    
+
     av_log(NULL,AV_LOG_DEBUG,"Tone (%d) dump:\n", i);
     av_log(NULL,AV_LOG_DEBUG,"  level = %f\n", t->level);
 //  PRINT(" level", t->level);
@@ -1764,20 +1764,20 @@ static int qdm2_decode_init(AVCodecContext *avctx)
     int tmp_val, tmp, size;
     int i;
     float alpha;
-    
+
     /* extradata parsing
-    
+
     Structure:
     wave {
         frma (QDM2)
         QDCA
         QDCP
     }
-    
+
     32  size (including this field)
     32  tag (=frma)
     32  type (=QDM2 or QDMC)
-    
+
     32  size (including this field, in bytes)
     32  tag (=QDCA) // maybe mandatory parameters
     32  unknown (=1)
@@ -1787,7 +1787,7 @@ static int qdm2_decode_init(AVCodecContext *avctx)
     32  block size (=4096)
     32  frame size (=256) (for one channel)
     32  packet size (=1300)
-    
+
     32  size (including this field, in bytes)
     32  tag (=QDCP) // maybe some tuneable parameters
     32  float1 (=1.0)
@@ -1876,12 +1876,9 @@ static int qdm2_decode_init(AVCodecContext *avctx)
     s->group_order = av_log2(s->group_size) + 1;
     s->frame_size = s->group_size / 16; // 16 iterations per super block
 
-    if (s->fft_order == 8)
-        s->sub_sampling = 1;
-    else
-        s->sub_sampling = 2;
+    s->sub_sampling = s->fft_order - 7;
     s->frequency_range = 255 / (1 << (2 - s->sub_sampling));
-    
+
     switch ((s->sub_sampling * 2 + s->channels - 1)) {
         case 0: tmp = 40; break;
         case 1: tmp = 48; break;
@@ -1899,11 +1896,11 @@ static int qdm2_decode_init(AVCodecContext *avctx)
     s->cm_table_select = tmp_val;
 
     if (s->sub_sampling == 0)
-        tmp = 16000;
+        tmp = 7999;
     else
         tmp = ((-(s->sub_sampling -1)) & 8000) + 20000;
     /*
-    0: 16000 -> 1
+    0: 7999 -> 0
     1: 20000 -> 2
     2: 28000 -> 2
     */
@@ -1914,8 +1911,11 @@ static int qdm2_decode_init(AVCodecContext *avctx)
     else
         s->coeff_per_sb_select = 2;
 
-    if (s->fft_order != 8 && s->fft_order != 9)
+    // Fail on unknown fft order, if it's > 9 it can overflow s->exptab[]
+    if ((s->fft_order < 7) || (s->fft_order > 9)) {
         av_log(avctx, AV_LOG_ERROR, "Unknown FFT order (%d), contact the developers!\n", s->fft_order);
+        return -1;
+    }
 
     ff_fft_init(&s->fft_ctx, s->fft_order - 1, 1);
 
@@ -1925,9 +1925,8 @@ static int qdm2_decode_init(AVCodecContext *avctx)
         s->exptab[i].im = sin(alpha);
     }
 
-    ff_fft_init(&s->fft_ctx, s->fft_order - 1, 1);
     qdm2_init(s);
-    
+
 //    dump_context(s);
     return 0;
 }
@@ -1938,16 +1937,16 @@ static int qdm2_decode_close(AVCodecContext *avctx)
     QDM2Context *s = avctx->priv_data;
 
     ff_fft_end(&s->fft_ctx);
-    
+
     return 0;
 }
 
 
-void qdm2_decode (QDM2Context *q, uint8_t *in, int16_t *out)
+static void qdm2_decode (QDM2Context *q, uint8_t *in, int16_t *out)
 {
     int ch, i;
     const int frame_size = (q->frame_size * q->channels);
-  
+
     /* select input buffer */
     q->compressed_data = in;
     q->compressed_size = q->checksum_size;
@@ -1961,11 +1960,11 @@ void qdm2_decode (QDM2Context *q, uint8_t *in, int16_t *out)
     /* decode block of QDM2 compressed data */
     if (q->sub_packet == 0) {
         q->has_errors = 0; // zero it for a new super block
-        av_log(NULL,AV_LOG_DEBUG,"Super block follows\n");
+        av_log(NULL,AV_LOG_DEBUG,"Superblock follows\n");
         qdm2_decode_super_block(q);
     }
 
-    /* parse sub packets */
+    /* parse subpackets */
     if (!q->has_errors) {
         if (q->sub_packet == 2)
             qdm2_decode_fft_packets(q);
diff --git a/src/libffmpeg/libavcodec/qdm2data.h b/src/libffmpeg/libavcodec/qdm2data.h
index 9cc944bdf..f41a2078b 100644
--- a/src/libffmpeg/libavcodec/qdm2data.h
+++ b/src/libffmpeg/libavcodec/qdm2data.h
@@ -17,10 +17,10 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
- 
+
  /**
  * @file qdm2data.h
  * Various QDM2 tables.
@@ -379,7 +379,7 @@ static const float fft_tone_sample_table[4][16][5] = {
       { .1250000000f,-.0164473690f,-.0097465888f, .0558035709f, .0330687836f },
       { .0416666667f,-.0208333333f,-.0123456791f, .0000000000f, .0000000000f },
       { .0100000000f,-.0069444444f,-.0018416207f,-.0037037037f,-.0020000000f } },
-  
+
     { { .0050000000f,-.0200000000f, .0125000000f,-.3030303030f, .0020000000f },
       { .1041666642f, .0400000000f,-.0250000000f, .0333333333f,-.0200000000f },
       { .1250000000f, .0100000000f, .0142857144f,-.0500000007f,-.0200000000f },
@@ -396,7 +396,7 @@ static const float fft_tone_sample_table[4][16][5] = {
       { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
       { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
       { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f } },
-  
+
     { { .1428571492f, .1250000000f,-.0285714287f,-.0357142873f, .0208333333f },
       { .1818181818f, .0588235296f, .0333333333f, .0212765951f, .0100000000f },
       { .1818181818f, .0212765951f, .0100000000f, .0588235296f, .0333333333f },
@@ -413,7 +413,7 @@ static const float fft_tone_sample_table[4][16][5] = {
       { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
       { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
       { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f } },
-  
+
     { { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
       { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
       { .0000000000f, .0000000000f, .0000000000f, .0000000000f, .0000000000f },
diff --git a/src/libffmpeg/libavcodec/qdrw.c b/src/libffmpeg/libavcodec/qdrw.c
index a12d45067..846365917 100644
--- a/src/libffmpeg/libavcodec/qdrw.c
+++ b/src/libffmpeg/libavcodec/qdrw.c
@@ -14,15 +14,15 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
- 
+
 /**
  * @file qdrw.c
  * Apple QuickDraw codec.
  */
- 
+
 #include "avcodec.h"
 #include "mpegvideo.h"
 
@@ -32,7 +32,7 @@ typedef struct QdrawContext{
     uint8_t palette[256*3];
 } QdrawContext;
 
-static int decode_frame(AVCodecContext *avctx, 
+static int decode_frame(AVCodecContext *avctx,
                         void *data, int *data_size,
                         uint8_t *buf, int buf_size)
 {
@@ -41,7 +41,7 @@ static int decode_frame(AVCodecContext *avctx,
     uint8_t* outdata;
     int colors;
     int i;
-    
+
     if(p->data[0])
         avctx->release_buffer(avctx, p);
 
@@ -54,21 +54,21 @@ static int decode_frame(AVCodecContext *avctx,
     p->key_frame= 1;
 
     outdata = a->pic.data[0];
-    
+
     buf += 0x68; /* jump to palette */
     colors = BE_32(buf);
     buf += 4;
-    
+
     if(colors < 0 || colors > 256) {
         av_log(avctx, AV_LOG_ERROR, "Error color count - %i(0x%X)\n", colors, colors);
         return -1;
     }
-    
+
     for (i = 0; i <= colors; i++) {
         unsigned int idx;
         idx = BE_16(buf); /* color index */
         buf += 2;
-        
+
         if (idx > 255) {
             av_log(avctx, AV_LOG_ERROR, "Palette index out of range: %u\n", idx);
             buf += 6;
@@ -88,7 +88,7 @@ static int decode_frame(AVCodecContext *avctx,
         uint8_t *next;
         uint8_t *out;
         int tsize = 0;
-        
+
         /* decode line */
         out = outdata;
         size = BE_16(buf); /* size of packed line */
@@ -129,7 +129,7 @@ static int decode_frame(AVCodecContext *avctx,
 
     *data_size = sizeof(AVFrame);
     *(AVFrame*)data = a->pic;
-    
+
     return buf_size;
 }
 
diff --git a/src/libffmpeg/libavcodec/qpeg.c b/src/libffmpeg/libavcodec/qpeg.c
index e4a78bcb3..f7323a871 100644
--- a/src/libffmpeg/libavcodec/qpeg.c
+++ b/src/libffmpeg/libavcodec/qpeg.c
@@ -14,15 +14,15 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
- 
+
 /**
  * @file qpeg.c
  * QPEG codec.
  */
- 
+
 #include "avcodec.h"
 #include "mpegvideo.h"
 
@@ -33,7 +33,7 @@ typedef struct QpegContext{
 } QpegContext;
 
 static void qpeg_decode_intra(uint8_t *src, uint8_t *dst, int size,
-			    int stride, int width, int height)
+                            int stride, int width, int height)
 {
     int i;
     int code;
@@ -41,130 +41,130 @@ static void qpeg_decode_intra(uint8_t *src, uint8_t *dst, int size,
     int run, copy;
     int filled = 0;
     int rows_to_go;
-    
+
     rows_to_go = height;
     height--;
     dst = dst + height * stride;
-    
+
     while((size > 0) && (rows_to_go > 0)) {
-	code = *src++;
-	size--;
-	run = copy = 0;
-	if(code == 0xFC) /* end-of-picture code */
-	    break;
-	if(code >= 0xF8) { /* very long run */
-	    c0 = *src++;
-	    c1 = *src++;
-	    size -= 2;
-	    run = ((code & 0x7) << 16) + (c0 << 8) + c1 + 2;
-	} else if (code >= 0xF0) { /* long run */
-	    c0 = *src++;
-	    size--;
-	    run = ((code & 0xF) << 8) + c0 + 2;
-	} else if (code >= 0xE0) { /* short run */
-	    run = (code & 0x1F) + 2;
-	} else if (code >= 0xC0) { /* very long copy */
-	    c0 = *src++;
-	    c1 = *src++;
-	    size -= 2;
-	    copy = ((code & 0x3F) << 16) + (c0 << 8) + c1 + 1;
-	} else if (code >= 0x80) { /* long copy */
-	    c0 = *src++;
-	    size--;
-	    copy = ((code & 0x7F) << 8) + c0 + 1;
-	} else { /* short copy */
-	    copy = code + 1;
-	}
-	
-	/* perform actual run or copy */
-	if(run) {
-	    int p;
-	    
-	    p = *src++;
-	    size--;
-	    for(i = 0; i < run; i++) {
-		dst[filled++] = p;
-		if (filled >= width) {
-		    filled = 0;
-		    dst -= stride;
+        code = *src++;
+        size--;
+        run = copy = 0;
+        if(code == 0xFC) /* end-of-picture code */
+            break;
+        if(code >= 0xF8) { /* very long run */
+            c0 = *src++;
+            c1 = *src++;
+            size -= 2;
+            run = ((code & 0x7) << 16) + (c0 << 8) + c1 + 2;
+        } else if (code >= 0xF0) { /* long run */
+            c0 = *src++;
+            size--;
+            run = ((code & 0xF) << 8) + c0 + 2;
+        } else if (code >= 0xE0) { /* short run */
+            run = (code & 0x1F) + 2;
+        } else if (code >= 0xC0) { /* very long copy */
+            c0 = *src++;
+            c1 = *src++;
+            size -= 2;
+            copy = ((code & 0x3F) << 16) + (c0 << 8) + c1 + 1;
+        } else if (code >= 0x80) { /* long copy */
+            c0 = *src++;
+            size--;
+            copy = ((code & 0x7F) << 8) + c0 + 1;
+        } else { /* short copy */
+            copy = code + 1;
+        }
+
+        /* perform actual run or copy */
+        if(run) {
+            int p;
+
+            p = *src++;
+            size--;
+            for(i = 0; i < run; i++) {
+                dst[filled++] = p;
+                if (filled >= width) {
+                    filled = 0;
+                    dst -= stride;
                     rows_to_go--;
                     if(rows_to_go <= 0)
                         break;
-		}
-	    }
-	} else {
+                }
+            }
+        } else {
             size -= copy;
-	    for(i = 0; i < copy; i++) {
-		dst[filled++] = *src++;
-		if (filled >= width) {
-		    filled = 0;
-		    dst -= stride;
+            for(i = 0; i < copy; i++) {
+                dst[filled++] = *src++;
+                if (filled >= width) {
+                    filled = 0;
+                    dst -= stride;
                     rows_to_go--;
                     if(rows_to_go <= 0)
                         break;
-		}
-	    }
-	}
+                }
+            }
+        }
     }
 }
 
-static int qpeg_table_h[16] = 
+static int qpeg_table_h[16] =
  { 0x00, 0x20, 0x20, 0x20, 0x18, 0x10, 0x10, 0x20, 0x10, 0x08, 0x18, 0x08, 0x08, 0x18, 0x10, 0x04};
 static int qpeg_table_w[16] =
  { 0x00, 0x20, 0x18, 0x08, 0x18, 0x10, 0x20, 0x10, 0x08, 0x10, 0x20, 0x20, 0x08, 0x10, 0x18, 0x04};
- 
+
 /* Decodes delta frames */
 static void qpeg_decode_inter(uint8_t *src, uint8_t *dst, int size,
-			    int stride, int width, int height,
-			    int delta, uint8_t *ctable, uint8_t *refdata)
+                            int stride, int width, int height,
+                            int delta, uint8_t *ctable, uint8_t *refdata)
 {
     int i, j;
     int code;
     int filled = 0;
     int orig_height;
     uint8_t *blkdata;
-    
+
     /* copy prev frame */
     for(i = 0; i < height; i++)
-	memcpy(refdata + (i * width), dst + (i * stride), width);
-    
+        memcpy(refdata + (i * width), dst + (i * stride), width);
+
     orig_height = height;
     blkdata = src - 0x86;
     height--;
     dst = dst + height * stride;
 
     while((size > 0) && (height >= 0)) {
-	code = *src++;
-	size--;
-	
-	if(delta) {
-	    /* motion compensation */
-	    while((code & 0xF0) == 0xF0) {
-		if(delta == 1) {
-		    int me_idx;
-		    int me_w, me_h, me_x, me_y;
-		    uint8_t *me_plane;
-		    int corr, val;
-		    
-		    /* get block size by index */
-		    me_idx = code & 0xF;
-		    me_w = qpeg_table_w[me_idx];
-		    me_h = qpeg_table_h[me_idx];
-		    
-		    /* extract motion vector */
-		    corr = *src++;
-		    size--;
-
-		    val = corr >> 4;
-		    if(val > 7)
-			val -= 16;
-		    me_x = val;
-		    
-		    val = corr & 0xF;
-		    if(val > 7)
-			val -= 16;
-		    me_y = val;
-		    
+        code = *src++;
+        size--;
+
+        if(delta) {
+            /* motion compensation */
+            while((code & 0xF0) == 0xF0) {
+                if(delta == 1) {
+                    int me_idx;
+                    int me_w, me_h, me_x, me_y;
+                    uint8_t *me_plane;
+                    int corr, val;
+
+                    /* get block size by index */
+                    me_idx = code & 0xF;
+                    me_w = qpeg_table_w[me_idx];
+                    me_h = qpeg_table_h[me_idx];
+
+                    /* extract motion vector */
+                    corr = *src++;
+                    size--;
+
+                    val = corr >> 4;
+                    if(val > 7)
+                        val -= 16;
+                    me_x = val;
+
+                    val = corr & 0xF;
+                    if(val > 7)
+                        val -= 16;
+                    me_y = val;
+
                     /* check motion vector */
                     if ((me_x + filled < 0) || (me_x + me_w + filled > width) ||
                        (height - me_y - me_h < 0) || (height - me_y > orig_height) ||
@@ -178,77 +178,77 @@ static void qpeg_decode_inter(uint8_t *src, uint8_t *dst, int size,
                             for(i = 0; i < me_w; i++)
                                 dst[filled + i - (j * stride)] = me_plane[i - (j * width)];
                         }
-		    }
-		}
-		code = *src++;
-		size--;
-	    }
-	}
-	
-	if(code == 0xE0) /* end-of-picture code */
-	    break;
-	if(code > 0xE0) { /* run code: 0xE1..0xFF */
-	    int p;
-
-	    code &= 0x1F;
-	    p = *src++;
-	    size--;
-	    for(i = 0; i <= code; i++) {
-		dst[filled++] = p;
-		if(filled >= width) {
-		    filled = 0;
-		    dst -= stride;
-		    height--;
-		}
-	    }
-	} else if(code >= 0xC0) { /* copy code: 0xC0..0xDF */
-	    code &= 0x1F;
-	    
-	    for(i = 0; i <= code; i++) {
-		dst[filled++] = *src++;
-		if(filled >= width) {
-		    filled = 0;
-		    dst -= stride;
-		    height--;
-		}
-	    }
-	    size -= code + 1;
-	} else if(code >= 0x80) { /* skip code: 0x80..0xBF */
-	    int skip;
-	    
-	    code &= 0x3F;
-	    /* codes 0x80 and 0x81 are actually escape codes,
-	       skip value minus constant is in the next byte */
-	    if(!code)
-		skip = (*src++) + 64;
-	    else if(code == 1)
-		skip = (*src++) + 320;
-	    else
-		skip = code;
-	    filled += skip;
-	    while( filled >= width) {
-		filled -= width;
-		dst -= stride;
-		height--;
+                    }
+                }
+                code = *src++;
+                size--;
+            }
+        }
+
+        if(code == 0xE0) /* end-of-picture code */
+            break;
+        if(code > 0xE0) { /* run code: 0xE1..0xFF */
+            int p;
+
+            code &= 0x1F;
+            p = *src++;
+            size--;
+            for(i = 0; i <= code; i++) {
+                dst[filled++] = p;
+                if(filled >= width) {
+                    filled = 0;
+                    dst -= stride;
+                    height--;
+                }
+            }
+        } else if(code >= 0xC0) { /* copy code: 0xC0..0xDF */
+            code &= 0x1F;
+
+            for(i = 0; i <= code; i++) {
+                dst[filled++] = *src++;
+                if(filled >= width) {
+                    filled = 0;
+                    dst -= stride;
+                    height--;
+                }
+            }
+            size -= code + 1;
+        } else if(code >= 0x80) { /* skip code: 0x80..0xBF */
+            int skip;
+
+            code &= 0x3F;
+            /* codes 0x80 and 0x81 are actually escape codes,
+               skip value minus constant is in the next byte */
+            if(!code)
+                skip = (*src++) + 64;
+            else if(code == 1)
+                skip = (*src++) + 320;
+            else
+                skip = code;
+            filled += skip;
+            while( filled >= width) {
+                filled -= width;
+                dst -= stride;
+                height--;
                 if(height < 0)
                     break;
-	    }
-	} else {
-	    /* zero code treated as one-pixel skip */
-	    if(code)
-		dst[filled++] = ctable[code & 0x7F];
-	    else
-		filled++;
-	    if(filled >= width) {
-		filled = 0;
-		dst -= stride;
-		height--;
-	    }
-	}
+            }
+        } else {
+            /* zero code treated as one-pixel skip */
+            if(code)
+                dst[filled++] = ctable[code & 0x7F];
+            else
+                filled++;
+            if(filled >= width) {
+                filled = 0;
+                dst -= stride;
+                height--;
+            }
+        }
     }
 }
 
-static int decode_frame(AVCodecContext *avctx, 
+static int decode_frame(AVCodecContext *avctx,
                         void *data, int *data_size,
                         uint8_t *buf, int buf_size)
 {
@@ -256,7 +256,7 @@ static int decode_frame(AVCodecContext *avctx,
     AVFrame * const p= (AVFrame*)&a->pic;
     uint8_t* outdata;
     int delta;
-    
+
     if(p->data[0])
         avctx->release_buffer(avctx, p);
 
@@ -267,10 +267,10 @@ static int decode_frame(AVCodecContext *avctx,
     }
     outdata = a->pic.data[0];
     if(buf[0x85] == 0x10) {
-	qpeg_decode_intra(buf+0x86, outdata, buf_size - 0x86, a->pic.linesize[0], avctx->width, avctx->height);
+        qpeg_decode_intra(buf+0x86, outdata, buf_size - 0x86, a->pic.linesize[0], avctx->width, avctx->height);
     } else {
-	delta = buf[0x85];
-	qpeg_decode_inter(buf+0x86, outdata, buf_size - 0x86, a->pic.linesize[0], avctx->width, avctx->height, delta, buf + 4, a->refdata);
+        delta = buf[0x85];
+        qpeg_decode_inter(buf+0x86, outdata, buf_size - 0x86, a->pic.linesize[0], avctx->width, avctx->height, delta, buf + 4, a->refdata);
     }
 
     /* make the palette available on the way out */
@@ -282,13 +282,13 @@ static int decode_frame(AVCodecContext *avctx,
 
     *data_size = sizeof(AVFrame);
     *(AVFrame*)data = a->pic;
-    
+
     return buf_size;
 }
 
 static int decode_init(AVCodecContext *avctx){
     QpegContext * const a = avctx->priv_data;
-    
+
     a->avctx = avctx;
     avctx->pix_fmt= PIX_FMT_PAL8;
     avctx->has_b_frames = 0;
@@ -301,7 +301,7 @@ static int decode_init(AVCodecContext *avctx){
 static int decode_end(AVCodecContext *avctx){
     QpegContext * const a = avctx->priv_data;
     AVFrame * const p= (AVFrame*)&a->pic;
-    
+
     if(p->data[0])
         avctx->release_buffer(avctx, p);
 
diff --git a/src/libffmpeg/libavcodec/qtrle.c b/src/libffmpeg/libavcodec/qtrle.c
index 0d79c5c9e..0db003146 100644
--- a/src/libffmpeg/libavcodec/qtrle.c
+++ b/src/libffmpeg/libavcodec/qtrle.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -251,7 +251,7 @@ static void qtrle_decode_16bpp(QtrleContext *s)
     int header;
     int start_line;
     int lines_to_change;
-    signed char rle_code;
+    int rle_code;
     int row_ptr, pixel_ptr;
     int row_inc = s->frame.linesize[0];
     unsigned short rgb16;
@@ -329,7 +329,7 @@ static void qtrle_decode_24bpp(QtrleContext *s)
     int header;
     int start_line;
     int lines_to_change;
-    signed char rle_code;
+    int rle_code;
     int row_ptr, pixel_ptr;
     int row_inc = s->frame.linesize[0];
     unsigned char r, g, b;
@@ -408,10 +408,10 @@ static void qtrle_decode_32bpp(QtrleContext *s)
     int header;
     int start_line;
     int lines_to_change;
-    signed char rle_code;
+    int rle_code;
     int row_ptr, pixel_ptr;
     int row_inc = s->frame.linesize[0];
-    unsigned char r, g, b;
+    unsigned char a, r, g, b;
     unsigned int argb;
     unsigned char *rgb = s->frame.data[0];
     int pixel_limit = s->frame.linesize[0] * s->avctx->height;
@@ -455,11 +455,11 @@ static void qtrle_decode_32bpp(QtrleContext *s)
                 /* decode the run length code */
                 rle_code = -rle_code;
                 CHECK_STREAM_PTR(4);
-                stream_ptr++;  /* skip the alpha (?) byte */
+                a = s->buf[stream_ptr++];
                 r = s->buf[stream_ptr++];
                 g = s->buf[stream_ptr++];
                 b = s->buf[stream_ptr++];
-                argb = (r << 16) | (g << 8) | (b << 0);
+                argb = (a << 24) | (r << 16) | (g << 8) | (b << 0);
 
                 CHECK_PIXEL_PTR(rle_code * 4);
 
@@ -473,11 +473,11 @@ static void qtrle_decode_32bpp(QtrleContext *s)
 
                 /* copy pixels directly to output */
                 while (rle_code--) {
-                    stream_ptr++;  /* skip the alpha (?) byte */
+                    a = s->buf[stream_ptr++];
                     r = s->buf[stream_ptr++];
                     g = s->buf[stream_ptr++];
                     b = s->buf[stream_ptr++];
-                    argb = (r << 16) | (g << 8) | (b << 0);
+                    argb = (a << 24) | (r << 16) | (g << 8) | (b << 0);
                     *(unsigned int *)(&rgb[pixel_ptr]) = argb;
                     pixel_ptr += 4;
                 }
diff --git a/src/libffmpeg/libavcodec/ra144.c b/src/libffmpeg/libavcodec/ra144.c
index 79cce2cef..059236dfe 100644
--- a/src/libffmpeg/libavcodec/ra144.c
+++ b/src/libffmpeg/libavcodec/ra144.c
@@ -14,78 +14,78 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "avcodec.h"
 #include "ra144.h"
 
-#define DATABLOCK1	20			/* size of 14.4 input block in bytes */
-#define DATACHUNK1	1440			/* size of 14.4 input chunk in bytes */
-#define AUDIOBLOCK	160			/* size of output block in 16-bit words (320 bytes) */
-#define AUDIOBUFFER	12288			/* size of output buffer in 16-bit words (24576 bytes) */
+#define DATABLOCK1      20      /* size of 14.4 input block in bytes */
+#define DATACHUNK1      1440    /* size of 14.4 input chunk in bytes */
+#define AUDIOBLOCK      160     /* size of output block in 16-bit words (320 bytes) */
+#define AUDIOBUFFER     12288   /* size of output buffer in 16-bit words (24576 bytes) */
 /* consts */
-#define NBLOCKS		4				/* number of segments within a block */
-#define BLOCKSIZE	40				/* (quarter) block size in 16-bit words (80 bytes) */
-#define HALFBLOCK	20				/* BLOCKSIZE/2 */
-#define BUFFERSIZE	146				/* for do_output */
+#define NBLOCKS         4       /* number of segments within a block */
+#define BLOCKSIZE       40      /* (quarter) block size in 16-bit words (80 bytes) */
+#define HALFBLOCK       20      /* BLOCKSIZE/2 */
+#define BUFFERSIZE      146     /* for do_output */
 
 
 /* internal globals */
 typedef struct {
-	unsigned int	 resetflag, val, oldval;
-	unsigned int	 unpacked[28];		/* buffer for unpacked input */
-	unsigned int	*iptr;				/* pointer to current input (from unpacked) */
-	unsigned int	 gval;
-	unsigned short	*gsp;
-	unsigned int	 gbuf1[8];
-	unsigned short	 gbuf2[120];
-	signed   short	 output_buffer[40];
-	unsigned int	*decptr;			/* decoder ptr */
-	signed   short	*decsp;
-
-	/* the swapped buffers */
-	unsigned int	 swapb1a[10];
-	unsigned int	 swapb2a[10];
-	unsigned int	 swapb1b[10];
-	unsigned int	 swapb2b[10];
-	unsigned int	*swapbuf1;
-	unsigned int	*swapbuf2;
-	unsigned int	*swapbuf1alt;
-	unsigned int	*swapbuf2alt;
-
-	unsigned int buffer[5];
-	unsigned short int buffer_2[148];
-	unsigned short int buffer_a[40];
-	unsigned short int buffer_b[40];
-	unsigned short int buffer_c[40];
-	unsigned short int buffer_d[40];
-
-	unsigned short int work[50];
-	unsigned short *sptr;
-
-	int buffer1[10];
-	int buffer2[10];
-
-	signed short wavtable1[2304];
-	unsigned short wavtable2[2304];
+        unsigned int     resetflag, val, oldval;
+        unsigned int     unpacked[28];          /* buffer for unpacked input */
+        unsigned int    *iptr;                  /* pointer to current input (from unpacked) */
+        unsigned int     gval;
+        unsigned short  *gsp;
+        unsigned int     gbuf1[8];
+        unsigned short   gbuf2[120];
+        signed   short   output_buffer[40];
+        unsigned int    *decptr;                /* decoder ptr */
+        signed   short  *decsp;
+
+        /* the swapped buffers */
+        unsigned int     swapb1a[10];
+        unsigned int     swapb2a[10];
+        unsigned int     swapb1b[10];
+        unsigned int     swapb2b[10];
+        unsigned int    *swapbuf1;
+        unsigned int    *swapbuf2;
+        unsigned int    *swapbuf1alt;
+        unsigned int    *swapbuf2alt;
+
+        unsigned int buffer[5];
+        unsigned short int buffer_2[148];
+        unsigned short int buffer_a[40];
+        unsigned short int buffer_b[40];
+        unsigned short int buffer_c[40];
+        unsigned short int buffer_d[40];
+
+        unsigned short int work[50];
+        unsigned short *sptr;
+
+        int buffer1[10];
+        int buffer2[10];
+
+        signed short wavtable1[2304];
+        unsigned short wavtable2[2304];
 } Real144_internal;
 
 static int ra144_decode_init(AVCodecContext * avctx)
 {
-	Real144_internal *glob=avctx->priv_data;
+        Real144_internal *glob=avctx->priv_data;
 
-	memset(glob,0,sizeof(Real144_internal));
-	glob->resetflag=1;
-	glob->swapbuf1=glob->swapb1a;
-	glob->swapbuf2=glob->swapb2a;
-	glob->swapbuf1alt=glob->swapb1b;
-	glob->swapbuf2alt=glob->swapb2b;
+        memset(glob,0,sizeof(Real144_internal));
+        glob->resetflag=1;
+        glob->swapbuf1=glob->swapb1a;
+        glob->swapbuf2=glob->swapb2a;
+        glob->swapbuf1alt=glob->swapb1b;
+        glob->swapbuf2alt=glob->swapb2b;
 
-	memcpy(glob->wavtable1,wavtable1,sizeof(wavtable1));
-	memcpy(glob->wavtable2,wavtable2,sizeof(wavtable2));
+        memcpy(glob->wavtable1,wavtable1,sizeof(wavtable1));
+        memcpy(glob->wavtable2,wavtable2,sizeof(wavtable2));
 
-	return 0;
+        return 0;
 }
 
 static void final(Real144_internal *glob, short *i1, short *i2, void *out, int *statbuf, int len);
@@ -107,10 +107,10 @@ static void do_voice(int *a1, int *a2)
   int *b1,*b2;
   int x,y;
   int *ptr,*tmp;
-  
+
   b1=buffer;
   b2=a2;
-  
+
   for (x=0;x<10;x++) {
     b1[x]=(*a1)<<4;
 
@@ -123,7 +123,7 @@ static void do_voice(int *a1, int *a2)
     b1=b2;
     b2=tmp;
     a1++;
-  }  
+  }
   ptr=a2+10;
   while (ptr>a2) (*a2++)>>=4;
 }
@@ -190,7 +190,7 @@ static void add_wav(Real144_internal *glob, int n, int f, int m1, int m2, int m3
   ptr=glob->wavtable1+n*9;
   ptr2=glob->wavtable2+n*9;
   if (f!=0) {
-    a=((*ptr)*m1)>>((*ptr2)+1); 
+    a=((*ptr)*m1)>>((*ptr2)+1);
   } else {
     a=0;
   }
@@ -299,7 +299,7 @@ static void unpack_input(unsigned char *input, unsigned int *output)
     *(output++)=ptr[x];
     *(output++)=ptr[x+2];
     *(output++)=ptr[x+3];
-    *(output++)=ptr[x+1];    
+    *(output++)=ptr[x+1];
   }
 }
 
@@ -438,10 +438,10 @@ static int ra144_decode_frame(AVCodecContext * avctx,
 
   if(buf_size==0)
       return 0;
-  
+
   datao = data;
   unpack_input(buf,glob->unpacked);
-  
+
   glob->iptr=glob->unpacked;
   glob->val=decodetable[0][(*(glob->iptr++))<<1];
 
diff --git a/src/libffmpeg/libavcodec/ra144.h b/src/libffmpeg/libavcodec/ra144.h
index f95a8bab3..4ce2df867 100644
--- a/src/libffmpeg/libavcodec/ra144.h
+++ b/src/libffmpeg/libavcodec/ra144.h
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #ifndef RA144TABLES_H
@@ -2302,7 +2302,7 @@ static const signed short etable2[5120]={
 0x0004,0x0016,0xfff6,0xfff0,0x0024,0x0005,0x000b,0xffc8,
 0xffdb,0xfffa,0x000a,0x0005,0x000d,0x0003,0xfffa,0x0005,
 0xfff5,0xfffc,0x0013,0x0005,0x0010,0xffd7,0xffe8,0xfff3};
- 
+
 static const unsigned long ftable1[128]={
 0x4cc9f,0x482af,0x47bd8,0x44700,0x5238b,0x47377,0x4898a,0x411fd,
 0x4f8b7,0x4a7d5,0x4bd1b,0x47feb,0x5554b,0x49414,0x4c9d4,0x4403c,
diff --git a/src/libffmpeg/libavcodec/ra288.c b/src/libffmpeg/libavcodec/ra288.c
index 4cff3106e..e2425974e 100644
--- a/src/libffmpeg/libavcodec/ra288.c
+++ b/src/libffmpeg/libavcodec/ra288.c
@@ -14,30 +14,30 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "avcodec.h"
 #include "ra288.h"
- 
+
 typedef struct {
-	float	history[8];
-	float	output[40];
-	float	pr1[36];
-	float	pr2[10];
-	int	phase, phasep;
-
-	float st1a[111],st1b[37],st1[37];
-	float st2a[38],st2b[11],st2[11];
-	float sb[41];
-	float lhist[10];
+        float   history[8];
+        float   output[40];
+        float   pr1[36];
+        float   pr2[10];
+        int     phase, phasep;
+
+        float   st1a[111],st1b[37],st1[37];
+        float   st2a[38],st2b[11],st2[11];
+        float   sb[41];
+        float   lhist[10];
 } Real288_internal;
 
 static int ra288_decode_init(AVCodecContext * avctx)
 {
-	Real288_internal *glob=avctx->priv_data;
-	memset(glob,0,sizeof(Real288_internal));
-	return 0;
+        Real288_internal *glob=avctx->priv_data;
+        memset(glob,0,sizeof(Real288_internal));
+        return 0;
 }
 
 static void prodsum(float *tgt, float *src, int len, int n);
@@ -109,7 +109,7 @@ static void decode(Real288_internal *glob, unsigned int input)
   for (sum=32,x=10;x--;sum-=glob->pr2[x]*glob->lhist[x]);
   if (sum<0) sum=0; else if (sum>60) sum=60;
 
-  sumsum=exp(sum*0.1151292546497)*f;	/* pow(10.0,sum/20)*f */
+  sumsum=exp(sum*0.1151292546497)*f;    /* pow(10.0,sum/20)*f */
   for (sum=0,x=5;x--;) { buffer[x]=table[x]*sumsum; sum+=buffer[x]*buffer[x]; }
   if ((sum/=5)<1) sum=1;
 
@@ -180,7 +180,7 @@ static void co(int n, int i, int j, float *in, float *out, float *st1, float *st
     if (x==c) fp=in;
     work[x]=*(table++)*(*(st1++)=*(fp++));
   }
-  
+
   prodsum(buffer1,work+n,i,n);
   prodsum(buffer2,work+a,j,n);
 
@@ -228,41 +228,19 @@ static int ra288_decode_frame(AVCodecContext * avctx,
             void *data, int *data_size,
             uint8_t * buf, int buf_size)
 {
-  if(avctx->extradata_size>=6)
-  {
-//((short*)(avctx->extradata))[0]; /* subpacket size */
-//((short*)(avctx->extradata))[1]; /* subpacket height */
-//((short*)(avctx->extradata))[2]; /* subpacket flavour */
-//((short*)(avctx->extradata))[3]; /* coded frame size */
-//((short*)(avctx->extradata))[4]; /* codec's data length  */
-//((short*)(avctx->extradata))[5...] /* codec's data */
-    int bret;
     void *datao;
-    int w=avctx->block_align; /* 228 */
-    int h=((short*)(avctx->extradata))[1]; /* 12 */
-    int cfs=((short*)(avctx->extradata))[3]; /* coded frame size 38 */
-    int i,j;
-    if(buf_size<w*h)
+
+    if (buf_size < avctx->block_align)
     {
-	av_log(avctx, AV_LOG_ERROR, "ffra288: Error! Input buffer is too small [%d<%d]\n",buf_size,w*h);
-	return 0;
+        av_log(avctx, AV_LOG_ERROR, "ffra288: Error! Input buffer is too small [%d<%d]\n",buf_size,avctx->block_align);
+        return 0;
     }
+
     datao = data;
-    bret = 0;
-    for (j = 0; j < h/2; j++)
-	for (i = 0; i < h; i++)
-    {
-	    data=decode_block(avctx,&buf[j*cfs+cfs*i*h/2],(signed short *)data,cfs);
-	    bret += cfs;
-    }
+    data = decode_block(avctx, buf, (signed short *)data, avctx->block_align);
+
     *data_size = (char *)data - (char *)datao;
-    return bret;
-  }
-  else
-  {
-    av_log(avctx, AV_LOG_ERROR, "ffra288: Error: need extra data!!!\n");
-    return 0;
-  }
+    return avctx->block_align;
 }
 
 AVCodec ra_288_decoder =
diff --git a/src/libffmpeg/libavcodec/ra288.h b/src/libffmpeg/libavcodec/ra288.h
index 7cc0d8724..0d67d52bb 100644
--- a/src/libffmpeg/libavcodec/ra288.h
+++ b/src/libffmpeg/libavcodec/ra288.h
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #ifndef RA288TABLES_H
@@ -24,180 +24,180 @@ static const float amptable[8]={ 0.515625, 0.90234375, 1.57910156, 2.76342773,
                          -0.515625,-0.90234375,-1.57910156,-2.76342773 };
 
 static const float codetable[640]={
-	 0.326171875,	-1.4404296875,	-0.6123046875,	-0.8740234375,	-1.24658203125,
-	-2.45703125,	-2.23486328125,	-0.51025390625,	 1.419921875,	 1.6201171875,
-	-1.37646484375,	-1.30712890625,	-0.462890625,	-1.37939453125,	-2.1728515625,
-	-3.26123046875,	-0.166015625,	 0.7236328125,	-0.623046875,	 0.6162109375,
-	-0.2744140625,	-3.29931640625,	 0.62548828125,	 0.08740234375,	-0.6220703125,
-	-1.2265625,	-3.4814453125,	-2.40478515625,	 3.37548828125,	 1.17724609375,
-	-1.2099609375,	-0.076171875,	 2.28662109375,	-1.89111328125,	 0,
-	-4.0078125,	 1.044921875,	-0.2333984375,	-1.35986328125,	 0.26025390625,
-	 0.92236328125,	 1.34716796875,	 0.67431640625,	-3.39599609375,	-2.88720703125,
-	 2.4814453125,	-1.201171875,	-2.8212890625,	 0.87744140625,	 0.27734375,
-	-1.078125,	-1.61572265625,	-2.20849609375,	-3.044921875,	-3.66455078125,
-	-1.32763671875,	 2.1279296875,	-1.458984375,	-0.56103515625,	 1.30078125,
-	 0.61474609375,	 0.48583984375,	 1.32373046875,	-1.203125,	-5.0732421875,
-	 0.8408203125,	-3.69580078125,	-1.3388671875,	 1.06005859375,	-1.13720703125,
-	 0.50390625,	 0.36474609375,	-0.4189453125,	-3.8798828125,	-6.27099609375,
-	 1.5166015625,	 2.37109375,	-2.04736328125,	-1.24072265625,	 0.50537109375,
-	 0.9091796875,	-0.46875,	-3.236328125,	 0.2001953125,	 2.8720703125,
-	-1.21728515625,	-1.283203125,	-1.953125,	-0.029296875,	 3.5166015625,
-	-1.3046875,	 0.7060546875,	 0.75,		-1.87060546875,	 0.60205078125,
-	-2.5888671875,	 3.375,		 0.77587890625,	-2.04443359375,	 1.78955078125,
-	-1.6875,	-3.9892578125,	-3.76416015625,	 0.67578125,	 2.2939453125,
-	-2.29443359375,	-3.03173828125,	-5.45703125,	 3.95703125,	 8.2177734375,
-	 0.4541015625,	 3.419921875,	 0.61962890625,	-4.38330078125,	 1.25341796875,
-	 2.27001953125,	 5.763671875,	 1.68017578125,	-2.76220703125,	 0.58544921875,
-	 1.2412109375,	-0.08935546875,	-4.32568359375,	-3.89453125,	 1.5771484375,
-	-1.40234375,	-0.98193359375,	-4.74267578125,	-4.09423828125,	 6.33935546875,
-	 1.5068359375,	 1.044921875,	-1.796875,	-4.70849609375,	-1.4140625,
-	-3.71533203125,	 3.18115234375,	-1.11474609375,	-1.2314453125,	 3.091796875,
-	-1.62744140625,	-2.744140625,	-4.4580078125,	-5.43505859375,	 2.70654296875,
-	-0.19873046875,	-3.28173828125,	-8.5283203125,	-1.41064453125,	 5.6484375,
-	 1.802734375,	 3.318359375,	-0.1279296875,	-5.2958984375,	-0.90625,
-	 3.55224609375,	 6.544921875,	-1.45947265625,	-5.17333984375,	 2.41015625,
-	 0.119140625,	-1.08349609375,	 1.296875,	 1.84375,	-2.642578125,
-	-1.97412109375,	-2.8974609375,	 1.04052734375,	 0.42138671875,	-1.3994140625,
-	-1.6123046875,	 0.85107421875,	-0.9794921875,	-0.0625,	-1.001953125,
-	-3.10595703125,	 1.6318359375,	-0.77294921875,	-0.01025390625,	 0.5576171875,
-	-1.87353515625,	-0.89404296875,	 3.12353515625,	 1.24267578125,	-1.390625,
-	-4.556640625,	-3.1875,	 2.59228515625,	 0.9697265625,	-1.09619140625,
-	-2.1923828125,	 0.365234375,	 0.94482421875,	-1.47802734375,	-0.24072265625,
-	-4.51904296875,	 2.6201171875,	 1.55908203125,	-2.19384765625,	 0.87109375,
-	 2.3359375,	-0.1806640625,	 0.9111328125,	 0.51611328125,	-0.92236328125,
-	 3.5849609375,	-1.3134765625,	-1.25830078125,	 0.330078125,	-0.29833984375,
-	-0.2451171875,	 1.09130859375,	-0.9033203125,	-0.86767578125,	-1.00048828125,
-	 0.49365234375,	 1.89453125,	-1.20361328125,	 1.07861328125,	-0.07421875,
-	 1.265625,	 1.38134765625,	 2.728515625,	 1.38623046875,	-3.5673828125,
-	-1.48876953125,	-2.4013671875,	 2.90771484375,	 4.49267578125,	-2.17138671875,
-	 0.34033203125,	 1.908203125,	 2.8310546875,	-2.17333984375,	-2.267578125,
-	-1.03564453125,	 2.658203125,	-1.2548828125,	 0.15673828125,	-0.5869140625,
-	 1.3896484375,	-1.0185546875,	 1.724609375,	 0.2763671875,	-0.345703125,
-	-2.08935546875,	 0.4638671875,	 2.431640625,	 1.83056640625,	 0.220703125,
-	-1.212890625,	 1.7099609375,	 0.83935546875,	-0.0830078125,	 0.1162109375,
-	-1.67724609375,	 0.12841796875,	 1.0322265625,	-0.97900390625,	 1.15283203125,
-	-3.5830078125,	-0.58984375,	 4.56396484375,	-0.59375,	-1.95947265625,
-	-6.5908203125,	-0.21435546875,	 3.919921875,	-2.06640625,	 0.17626953125,
-	-1.82080078125,	 2.65283203125,	 0.978515625,	-2.30810546875,	-0.61474609375,
-	-1.9462890625,	 3.78076171875,	 4.11572265625,	-1.80224609375,	-0.48193359375,
-	 2.5380859375,	-0.20654296875,	 0.5615234375,	-0.62548828125,	 0.3984375,
-	 3.61767578125,	 2.00634765625,	-1.92822265625,	 1.3134765625,	 0.0146484384313,
-	 0.6083984375,	 1.49169921875,	-0.01708984375,	-0.6689453125,	-0.1201171875,
-	-0.72705078125,	 2.75146484375,	-0.3310546875,	-1.28271484375,	 1.5478515625,
-	 2.3583984375,	-2.23876953125,	 0.98046875,	-0.5185546875,	 0.39013671875,
-	-0.06298828125,	 0.35009765625,	 2.2431640625,	 7.29345703125,	 5.2275390625,
-	 0.20361328125,	 1.34716796875,	 0.9033203125,	-2.46923828125,	-0.56298828125,
-	-1.89794921875,	 3.59423828125,	-2.81640625,	 2.09228515625,	 0.3251953125,
-	 0.70458984375,	-0.4580078125,	 0.009765625,	-1.03466796875,	-0.82861328125,
-	-1.8125,	-1.6611328125,	-1.080078125,	 0.0537109375,	 1.04296875,
-	-1.44140625,	 0.005859375,	-0.765625,	-1.708984375,	-0.90576171875,
-	-0.64208984375,	-0.84521484375,	 0.56640625,	-0.2724609375,	 0.83447265625,
-	 0.04296875,	-2.23095703125,	 0.0947265625,	-0.2216796875,	-1.44384765625,
-	-1.38623046875,	-0.8134765625,	-0.13330078125,	 1.017578125,	-0.07568359375,
-	-0.09228515625,	-1.16015625,	 0.81201171875,	-0.5078125,	-1.19580078125,
-	-1.3876953125,	-0.66845703125,	 0.310546875,	-0.12109375,	-1.30712890625,
-	 0.74072265625,	 0.03857421875,	-1.47119140625,	-1.79150390625,	-0.47509765625,
-	 0.93408203125,	-1.21728515625,	-2.59375,	-0.36572265625,	 0.62060546875,
-	-1.41748046875,	-1.623046875,	-1.833984375,	-1.8017578125,	-0.89306640625,
-	-1.42236328125,	-0.75537109375,	-1.34765625,	-0.6865234375,	 0.548828125,
-	 0.900390625,	-0.8955078125,	 0.22265625,	 0.3447265625,	-2.0859375,
-	 0.22802734375,	-2.078125,	-0.93212890625,	 0.74267578125,	 0.5537109375,
-	-0.06201171875,	-0.4853515625,	-0.31103515625,	-0.72802734375,	-3.1708984375,
-	 0.42626953125,	-0.99853515625,	-1.869140625,	-1.36328125,	-0.2822265625,
-	 1.12841796875,	-0.88720703125,	 1.28515625,	-1.490234375,	 0.9609375,
-	 0.31298828125,	 0.5830078125,	 0.92431640625,	 2.00537109375,	 3.0966796875,
-	-0.02197265625,	 0.5849609375,	 1.0546875,	-0.70751953125,	 1.07568359375,
-	-0.978515625,	 0.83642578125,	 1.7177734375,	 1.294921875,	 2.07568359375,
-	 1.43359375,	-1.9375,	 0.625,		 0.06396484375,	-0.720703125,
-	 1.38037109375,	 0.00390625,	-0.94140625,	 1.2978515625,	 1.71533203125,
-	 1.56201171875,	-0.3984375,	 1.31201171875,	-0.85009765625,	-0.68701171875,
-	 1.439453125,	 1.96728515625,	 0.1923828125,	-0.12353515625,	 0.6337890625,
-	 2.0927734375,	 0.02490234375,	-2.20068359375,	-0.015625,	-0.32177734375,
-	 1.90576171875,	 2.7568359375,	-2.728515625,	-1.265625,	 2.78662109375,
-	-0.2958984375,	 0.6025390625,	-0.78466796875,	-2.53271484375,	 0.32421875,
-	-0.25634765625,	 1.767578125,	-1.0703125,	-1.23388671875,	 0.83349609375,
-	 2.09814453125,	-1.58740234375,	-1.11474609375,	 0.396484375,	-1.10546875,
-	 2.81494140625,	 0.2578125,	-1.60498046875,	 0.66015625,	 0.81640625,
-	 1.33544921875,	 0.60595703125,	-0.53857421875,	-1.59814453125,	-1.66357421875,
-	 1.96923828125,	 0.8046875,	-1.44775390625,	-0.5732421875,	 0.705078125,
-	 0.0361328125,	 0.4482421875,	 0.97607421875,	 0.44677734375,	-0.5009765625,
-	-1.21875,	-0.78369140625,	 0.9931640625,	 1.4404296875,	 0.11181640625,
-	-1.05859375,	 0.99462890625,	 0.00732421921566,-0.6171875,	-0.1015625,
-	-1.734375,	 0.7470703125,	 0.28369140625,	 0.72802734375,	 0.4697265625,
-	-1.27587890625,	-1.1416015625,	 1.76806640625,	-0.7265625,	-1.06689453125,
-	-0.85302734375,	 0.03955078125,	 2.7041015625,	 0.69921875,	-1.10205078125,
-	-0.49755859375,	 0.42333984375,	 0.1044921875,	-1.115234375,	-0.7373046875,
-	-0.822265625,	 1.375,		-0.11181640625,	 1.24560546875,	-0.67822265625,
-	 1.32177734375,	 0.24609375,	 0.23388671875,	 1.35888671875,	-0.49267578125,
-	 1.22900390625,	-0.72607421875,	-0.779296875,	 0.30322265625,	 0.94189453125,
-	-0.072265625,	 1.0771484375,	-2.09375,	 0.630859375,	-0.68408203125,
-	-0.25732421875,	 0.60693359375,	-1.33349609375,	 0.93212890625,	 0.625,
-	 1.04931640625,	-0.73291015625,	 1.80078125,	 0.2978515625,	-2.24169921875,
-	 1.6142578125,	-1.64501953125,	 0.91552734375,	 1.775390625,	-0.59423828125,
-	 1.2568359375,	 1.22705078125,	 0.70751953125,	-1.5009765625,	-2.43115234375,
-	 0.3974609375,	 0.8916015625,	-1.21923828125,	 2.0673828125,	-1.99072265625,
-	 0.8125,	-0.107421875,	 1.6689453125,	 0.4892578125,	 0.54443359375,
-	 0.38134765625,	 0.8095703125,	 1.91357421875,	 2.9931640625,	 1.533203125,
-	 0.560546875,	 1.98486328125,	 0.740234375,	 0.39794921875,	 0.09716796875,
-	 0.58154296875,	 1.21533203125,	 1.25048828125,	 1.18212890625,	 1.19287109375,
-	 0.3759765625,	-2.88818359375,	 2.69287109375,	-0.1796875,	-1.56201171875,
-	 0.5810546875,	 0.51123046875,	 1.8271484375,	 3.38232421875,	-1.02001953125,
-	 0.142578125,	 1.51318359375,	 2.103515625,	-0.3701171875,	-1.19873046875,
-	 0.25537109375,	 1.91455078125,	 1.974609375,	 0.6767578125,	 0.04150390625,
-	 2.13232421875,	 0.4912109375,	-0.611328125,	-0.7158203125,	-0.67529296875,
-	 1.880859375,	 0.77099609375,	-0.03759765625,	 1.0078125,	 0.423828125,
-	 2.49462890625,	 1.42529296875,	-0.0986328125,	 0.17529296875,	-0.24853515625,
-	 1.7822265625,	 1.5654296875,	 1.12451171875,	 0.82666015625,	 0.6328125,
-	 1.41845703125,	-1.90771484375,	 0.11181640625,	-0.583984375,	-1.138671875,
-	 2.91845703125,	-1.75048828125,	 0.39306640625,	 1.86767578125,	-1.5322265625,
-	 1.8291015625,	-0.2958984375,	 0.02587890625,	-0.13134765625,	-1.61181640625,
-	 0.2958984375,	 0.9853515625,	-0.642578125,	 1.984375,	 0.1943359375
+         0.326171875,        -1.4404296875,        -0.6123046875,        -0.8740234375,        -1.24658203125,
+        -2.45703125,        -2.23486328125,        -0.51025390625,         1.419921875,         1.6201171875,
+        -1.37646484375,        -1.30712890625,        -0.462890625,        -1.37939453125,        -2.1728515625,
+        -3.26123046875,        -0.166015625,         0.7236328125,        -0.623046875,         0.6162109375,
+        -0.2744140625,        -3.29931640625,         0.62548828125,         0.08740234375,        -0.6220703125,
+        -1.2265625,        -3.4814453125,        -2.40478515625,         3.37548828125,         1.17724609375,
+        -1.2099609375,        -0.076171875,         2.28662109375,        -1.89111328125,         0,
+        -4.0078125,         1.044921875,        -0.2333984375,        -1.35986328125,         0.26025390625,
+         0.92236328125,         1.34716796875,         0.67431640625,        -3.39599609375,        -2.88720703125,
+         2.4814453125,        -1.201171875,        -2.8212890625,         0.87744140625,         0.27734375,
+        -1.078125,        -1.61572265625,        -2.20849609375,        -3.044921875,        -3.66455078125,
+        -1.32763671875,         2.1279296875,        -1.458984375,        -0.56103515625,         1.30078125,
+         0.61474609375,         0.48583984375,         1.32373046875,        -1.203125,        -5.0732421875,
+         0.8408203125,        -3.69580078125,        -1.3388671875,         1.06005859375,        -1.13720703125,
+         0.50390625,         0.36474609375,        -0.4189453125,        -3.8798828125,        -6.27099609375,
+         1.5166015625,         2.37109375,        -2.04736328125,        -1.24072265625,         0.50537109375,
+         0.9091796875,        -0.46875,        -3.236328125,         0.2001953125,         2.8720703125,
+        -1.21728515625,        -1.283203125,        -1.953125,        -0.029296875,         3.5166015625,
+        -1.3046875,         0.7060546875,         0.75,                -1.87060546875,         0.60205078125,
+        -2.5888671875,         3.375,                 0.77587890625,        -2.04443359375,         1.78955078125,
+        -1.6875,        -3.9892578125,        -3.76416015625,         0.67578125,         2.2939453125,
+        -2.29443359375,        -3.03173828125,        -5.45703125,         3.95703125,         8.2177734375,
+         0.4541015625,         3.419921875,         0.61962890625,        -4.38330078125,         1.25341796875,
+         2.27001953125,         5.763671875,         1.68017578125,        -2.76220703125,         0.58544921875,
+         1.2412109375,        -0.08935546875,        -4.32568359375,        -3.89453125,         1.5771484375,
+        -1.40234375,        -0.98193359375,        -4.74267578125,        -4.09423828125,         6.33935546875,
+         1.5068359375,         1.044921875,        -1.796875,        -4.70849609375,        -1.4140625,
+        -3.71533203125,         3.18115234375,        -1.11474609375,        -1.2314453125,         3.091796875,
+        -1.62744140625,        -2.744140625,        -4.4580078125,        -5.43505859375,         2.70654296875,
+        -0.19873046875,        -3.28173828125,        -8.5283203125,        -1.41064453125,         5.6484375,
+         1.802734375,         3.318359375,        -0.1279296875,        -5.2958984375,        -0.90625,
+         3.55224609375,         6.544921875,        -1.45947265625,        -5.17333984375,         2.41015625,
+         0.119140625,        -1.08349609375,         1.296875,         1.84375,        -2.642578125,
+        -1.97412109375,        -2.8974609375,         1.04052734375,         0.42138671875,        -1.3994140625,
+        -1.6123046875,         0.85107421875,        -0.9794921875,        -0.0625,        -1.001953125,
+        -3.10595703125,         1.6318359375,        -0.77294921875,        -0.01025390625,         0.5576171875,
+        -1.87353515625,        -0.89404296875,         3.12353515625,         1.24267578125,        -1.390625,
+        -4.556640625,        -3.1875,         2.59228515625,         0.9697265625,        -1.09619140625,
+        -2.1923828125,         0.365234375,         0.94482421875,        -1.47802734375,        -0.24072265625,
+        -4.51904296875,         2.6201171875,         1.55908203125,        -2.19384765625,         0.87109375,
+         2.3359375,        -0.1806640625,         0.9111328125,         0.51611328125,        -0.92236328125,
+         3.5849609375,        -1.3134765625,        -1.25830078125,         0.330078125,        -0.29833984375,
+        -0.2451171875,         1.09130859375,        -0.9033203125,        -0.86767578125,        -1.00048828125,
+         0.49365234375,         1.89453125,        -1.20361328125,         1.07861328125,        -0.07421875,
+         1.265625,         1.38134765625,         2.728515625,         1.38623046875,        -3.5673828125,
+        -1.48876953125,        -2.4013671875,         2.90771484375,         4.49267578125,        -2.17138671875,
+         0.34033203125,         1.908203125,         2.8310546875,        -2.17333984375,        -2.267578125,
+        -1.03564453125,         2.658203125,        -1.2548828125,         0.15673828125,        -0.5869140625,
+         1.3896484375,        -1.0185546875,         1.724609375,         0.2763671875,        -0.345703125,
+        -2.08935546875,         0.4638671875,         2.431640625,         1.83056640625,         0.220703125,
+        -1.212890625,         1.7099609375,         0.83935546875,        -0.0830078125,         0.1162109375,
+        -1.67724609375,         0.12841796875,         1.0322265625,        -0.97900390625,         1.15283203125,
+        -3.5830078125,        -0.58984375,         4.56396484375,        -0.59375,        -1.95947265625,
+        -6.5908203125,        -0.21435546875,         3.919921875,        -2.06640625,         0.17626953125,
+        -1.82080078125,         2.65283203125,         0.978515625,        -2.30810546875,        -0.61474609375,
+        -1.9462890625,         3.78076171875,         4.11572265625,        -1.80224609375,        -0.48193359375,
+         2.5380859375,        -0.20654296875,         0.5615234375,        -0.62548828125,         0.3984375,
+         3.61767578125,         2.00634765625,        -1.92822265625,         1.3134765625,         0.0146484384313,
+         0.6083984375,         1.49169921875,        -0.01708984375,        -0.6689453125,        -0.1201171875,
+        -0.72705078125,         2.75146484375,        -0.3310546875,        -1.28271484375,         1.5478515625,
+         2.3583984375,        -2.23876953125,         0.98046875,        -0.5185546875,         0.39013671875,
+        -0.06298828125,         0.35009765625,         2.2431640625,         7.29345703125,         5.2275390625,
+         0.20361328125,         1.34716796875,         0.9033203125,        -2.46923828125,        -0.56298828125,
+        -1.89794921875,         3.59423828125,        -2.81640625,         2.09228515625,         0.3251953125,
+         0.70458984375,        -0.4580078125,         0.009765625,        -1.03466796875,        -0.82861328125,
+        -1.8125,        -1.6611328125,        -1.080078125,         0.0537109375,         1.04296875,
+        -1.44140625,         0.005859375,        -0.765625,        -1.708984375,        -0.90576171875,
+        -0.64208984375,        -0.84521484375,         0.56640625,        -0.2724609375,         0.83447265625,
+         0.04296875,        -2.23095703125,         0.0947265625,        -0.2216796875,        -1.44384765625,
+        -1.38623046875,        -0.8134765625,        -0.13330078125,         1.017578125,        -0.07568359375,
+        -0.09228515625,        -1.16015625,         0.81201171875,        -0.5078125,        -1.19580078125,
+        -1.3876953125,        -0.66845703125,         0.310546875,        -0.12109375,        -1.30712890625,
+         0.74072265625,         0.03857421875,        -1.47119140625,        -1.79150390625,        -0.47509765625,
+         0.93408203125,        -1.21728515625,        -2.59375,        -0.36572265625,         0.62060546875,
+        -1.41748046875,        -1.623046875,        -1.833984375,        -1.8017578125,        -0.89306640625,
+        -1.42236328125,        -0.75537109375,        -1.34765625,        -0.6865234375,         0.548828125,
+         0.900390625,        -0.8955078125,         0.22265625,         0.3447265625,        -2.0859375,
+         0.22802734375,        -2.078125,        -0.93212890625,         0.74267578125,         0.5537109375,
+        -0.06201171875,        -0.4853515625,        -0.31103515625,        -0.72802734375,        -3.1708984375,
+         0.42626953125,        -0.99853515625,        -1.869140625,        -1.36328125,        -0.2822265625,
+         1.12841796875,        -0.88720703125,         1.28515625,        -1.490234375,         0.9609375,
+         0.31298828125,         0.5830078125,         0.92431640625,         2.00537109375,         3.0966796875,
+        -0.02197265625,         0.5849609375,         1.0546875,        -0.70751953125,         1.07568359375,
+        -0.978515625,         0.83642578125,         1.7177734375,         1.294921875,         2.07568359375,
+         1.43359375,        -1.9375,         0.625,                 0.06396484375,        -0.720703125,
+         1.38037109375,         0.00390625,        -0.94140625,         1.2978515625,         1.71533203125,
+         1.56201171875,        -0.3984375,         1.31201171875,        -0.85009765625,        -0.68701171875,
+         1.439453125,         1.96728515625,         0.1923828125,        -0.12353515625,         0.6337890625,
+         2.0927734375,         0.02490234375,        -2.20068359375,        -0.015625,        -0.32177734375,
+         1.90576171875,         2.7568359375,        -2.728515625,        -1.265625,         2.78662109375,
+        -0.2958984375,         0.6025390625,        -0.78466796875,        -2.53271484375,         0.32421875,
+        -0.25634765625,         1.767578125,        -1.0703125,        -1.23388671875,         0.83349609375,
+         2.09814453125,        -1.58740234375,        -1.11474609375,         0.396484375,        -1.10546875,
+         2.81494140625,         0.2578125,        -1.60498046875,         0.66015625,         0.81640625,
+         1.33544921875,         0.60595703125,        -0.53857421875,        -1.59814453125,        -1.66357421875,
+         1.96923828125,         0.8046875,        -1.44775390625,        -0.5732421875,         0.705078125,
+         0.0361328125,         0.4482421875,         0.97607421875,         0.44677734375,        -0.5009765625,
+        -1.21875,        -0.78369140625,         0.9931640625,         1.4404296875,         0.11181640625,
+        -1.05859375,         0.99462890625,         0.00732421921566,-0.6171875,        -0.1015625,
+        -1.734375,         0.7470703125,         0.28369140625,         0.72802734375,         0.4697265625,
+        -1.27587890625,        -1.1416015625,         1.76806640625,        -0.7265625,        -1.06689453125,
+        -0.85302734375,         0.03955078125,         2.7041015625,         0.69921875,        -1.10205078125,
+        -0.49755859375,         0.42333984375,         0.1044921875,        -1.115234375,        -0.7373046875,
+        -0.822265625,         1.375,                -0.11181640625,         1.24560546875,        -0.67822265625,
+         1.32177734375,         0.24609375,         0.23388671875,         1.35888671875,        -0.49267578125,
+         1.22900390625,        -0.72607421875,        -0.779296875,         0.30322265625,         0.94189453125,
+        -0.072265625,         1.0771484375,        -2.09375,         0.630859375,        -0.68408203125,
+        -0.25732421875,         0.60693359375,        -1.33349609375,         0.93212890625,         0.625,
+         1.04931640625,        -0.73291015625,         1.80078125,         0.2978515625,        -2.24169921875,
+         1.6142578125,        -1.64501953125,         0.91552734375,         1.775390625,        -0.59423828125,
+         1.2568359375,         1.22705078125,         0.70751953125,        -1.5009765625,        -2.43115234375,
+         0.3974609375,         0.8916015625,        -1.21923828125,         2.0673828125,        -1.99072265625,
+         0.8125,        -0.107421875,         1.6689453125,         0.4892578125,         0.54443359375,
+         0.38134765625,         0.8095703125,         1.91357421875,         2.9931640625,         1.533203125,
+         0.560546875,         1.98486328125,         0.740234375,         0.39794921875,         0.09716796875,
+         0.58154296875,         1.21533203125,         1.25048828125,         1.18212890625,         1.19287109375,
+         0.3759765625,        -2.88818359375,         2.69287109375,        -0.1796875,        -1.56201171875,
+         0.5810546875,         0.51123046875,         1.8271484375,         3.38232421875,        -1.02001953125,
+         0.142578125,         1.51318359375,         2.103515625,        -0.3701171875,        -1.19873046875,
+         0.25537109375,         1.91455078125,         1.974609375,         0.6767578125,         0.04150390625,
+         2.13232421875,         0.4912109375,        -0.611328125,        -0.7158203125,        -0.67529296875,
+         1.880859375,         0.77099609375,        -0.03759765625,         1.0078125,         0.423828125,
+         2.49462890625,         1.42529296875,        -0.0986328125,         0.17529296875,        -0.24853515625,
+         1.7822265625,         1.5654296875,         1.12451171875,         0.82666015625,         0.6328125,
+         1.41845703125,        -1.90771484375,         0.11181640625,        -0.583984375,        -1.138671875,
+         2.91845703125,        -1.75048828125,         0.39306640625,         1.86767578125,        -1.5322265625,
+         1.8291015625,        -0.2958984375,         0.02587890625,        -0.13134765625,        -1.61181640625,
+         0.2958984375,         0.9853515625,        -0.642578125,         1.984375,         0.1943359375
 };
 
 static const float table1[111]={
-	0.576690972,	0.580838025,	0.585013986,	0.589219987,	0.59345597,	0.597723007,
-	0.602020264,	0.606384277,	0.610748291,	0.615142822,	0.619598389,	0.624084473,
-	0.628570557,	0.633117676,	0.637695313,	0.642272949,	0.646911621,	0.651580811,
-	0.656280518,	0.66104126,	0.665802002,	0.670593262,	0.675445557,	0.680328369,
-	0.685241699,	0.690185547,	0.695159912,	0.700164795,	0.705230713,	0.710327148,
-	0.715454102,	0.720611572,	0.725830078,	0.731048584,	0.736328125,	0.741638184,
-	0.747009277,	0.752380371,	0.7578125,	0.763305664,	0.768798828,	0.774353027,
-	0.779937744,	0.785583496,	0.791229248,	0.796936035,	0.802703857,	0.808502197,
-	0.814331055,	0.820220947,	0.826141357,	0.832092285,	0.838104248,	0.844146729,
-	0.850250244,	0.856384277,	0.862548828,	0.868774414,	0.875061035,	0.881378174,
-	0.88772583,	0.894134521,	0.900604248,	0.907104492,	0.913635254,	0.920227051,
-	0.926879883,	0.933563232,	0.940307617,	0.94708252,	0.953918457,	0.96081543,
-	0.96774292,	0.974731445,	0.981781006,	0.988861084,	0.994842529,	0.998565674,
-	0.999969482,	0.99911499,	0.996002197,	0.990600586,	0.982910156,	0.973022461,
-	0.960876465,	0.946533203,	0.930053711,	0.911437988,	0.89074707,	0.868041992,
-	0.843322754,	0.816680908,	0.788208008,	0.757904053,	0.725891113,	0.692199707,
-	0.656921387,	0.620178223,	0.582000732,	0.542480469,	0.501739502,	0.459838867,
-	0.416900635,	0.373016357,	0.328277588,	0.282775879,	0.236663818,	0.189971924,
-	0.142852783,	0.0954284668,	0.0477600098
+        0.576690972,        0.580838025,        0.585013986,        0.589219987,        0.59345597,        0.597723007,
+        0.602020264,        0.606384277,        0.610748291,        0.615142822,        0.619598389,        0.624084473,
+        0.628570557,        0.633117676,        0.637695313,        0.642272949,        0.646911621,        0.651580811,
+        0.656280518,        0.66104126,        0.665802002,        0.670593262,        0.675445557,        0.680328369,
+        0.685241699,        0.690185547,        0.695159912,        0.700164795,        0.705230713,        0.710327148,
+        0.715454102,        0.720611572,        0.725830078,        0.731048584,        0.736328125,        0.741638184,
+        0.747009277,        0.752380371,        0.7578125,        0.763305664,        0.768798828,        0.774353027,
+        0.779937744,        0.785583496,        0.791229248,        0.796936035,        0.802703857,        0.808502197,
+        0.814331055,        0.820220947,        0.826141357,        0.832092285,        0.838104248,        0.844146729,
+        0.850250244,        0.856384277,        0.862548828,        0.868774414,        0.875061035,        0.881378174,
+        0.88772583,        0.894134521,        0.900604248,        0.907104492,        0.913635254,        0.920227051,
+        0.926879883,        0.933563232,        0.940307617,        0.94708252,        0.953918457,        0.96081543,
+        0.96774292,        0.974731445,        0.981781006,        0.988861084,        0.994842529,        0.998565674,
+        0.999969482,        0.99911499,        0.996002197,        0.990600586,        0.982910156,        0.973022461,
+        0.960876465,        0.946533203,        0.930053711,        0.911437988,        0.89074707,        0.868041992,
+        0.843322754,        0.816680908,        0.788208008,        0.757904053,        0.725891113,        0.692199707,
+        0.656921387,        0.620178223,        0.582000732,        0.542480469,        0.501739502,        0.459838867,
+        0.416900635,        0.373016357,        0.328277588,        0.282775879,        0.236663818,        0.189971924,
+        0.142852783,        0.0954284668,        0.0477600098
 };
 
 static const float table2[38]={
-	0.505699992,	0.524200022,	0.54339999,	0.563300014,	0.583953857,	0.60534668,
-	0.627502441,	0.650482178,	0.674316406,	0.699005127,	0.724578857,	0.75112915,
-	0.778625488,	0.807128906,	0.836669922,	0.86730957,	0.899078369,	0.932006836,
-	0.961486816,	0.982757568,	0.995635986,	1,		0.995819092,	0.983154297,
-	0.96206665,	0.932769775,	0.895507813,	0.850585938,	0.798400879,	0.739379883,
-	0.674072266,	0.602996826,	0.526763916,	0.446014404,	0.361480713,	0.273834229,
-	0.183868408,	0.0923461914
+        0.505699992,        0.524200022,        0.54339999,        0.563300014,        0.583953857,        0.60534668,
+        0.627502441,        0.650482178,        0.674316406,        0.699005127,        0.724578857,        0.75112915,
+        0.778625488,        0.807128906,        0.836669922,        0.86730957,        0.899078369,        0.932006836,
+        0.961486816,        0.982757568,        0.995635986,        1,                0.995819092,        0.983154297,
+        0.96206665,        0.932769775,        0.895507813,        0.850585938,        0.798400879,        0.739379883,
+        0.674072266,        0.602996826,        0.526763916,        0.446014404,        0.361480713,        0.273834229,
+        0.183868408,        0.0923461914
 };
 
 static const float table1a[36]={
-	0.98828125,	0.976699829,	0.965254128,	0.953942537,	0.942763507,	0.931715488,
-	0.920796931,	0.910006344,	0.899342179,	0.888803005,	0.878387332,	0.868093729,
-	0.857920766,	0.847867012,	0.837931097,	0.828111589,	0.818407178,	0.808816493,
-	0.799338162,	0.789970934,	0.780713439,	0.771564424,	0.762522638,	0.753586829,
-	0.744755745,	0.736028135,	0.727402806,	0.718878567,	0.710454226,	0.702128589,
-	0.693900526,	0.685768902,	0.677732527,	0.669790328,	0.66194123,	0.654184103
+        0.98828125,        0.976699829,        0.965254128,        0.953942537,        0.942763507,        0.931715488,
+        0.920796931,        0.910006344,        0.899342179,        0.888803005,        0.878387332,        0.868093729,
+        0.857920766,        0.847867012,        0.837931097,        0.828111589,        0.818407178,        0.808816493,
+        0.799338162,        0.789970934,        0.780713439,        0.771564424,        0.762522638,        0.753586829,
+        0.744755745,        0.736028135,        0.727402806,        0.718878567,        0.710454226,        0.702128589,
+        0.693900526,        0.685768902,        0.677732527,        0.669790328,        0.66194123,        0.654184103
 };
 
 static const float table2a[10]={
-	0.90625,	0.821289063,	0.74432373,	0.674499512,	0.61126709,
-	0.553955078,	0.50201416,	0.454956055,	0.41229248,	0.373657227
+        0.90625,        0.821289063,        0.74432373,        0.674499512,        0.61126709,
+        0.553955078,        0.50201416,        0.454956055,        0.41229248,        0.373657227
 };
 
 #endif /* RA288TABLES_H */
diff --git a/src/libffmpeg/libavcodec/rangecoder.c b/src/libffmpeg/libavcodec/rangecoder.c
index 730d5a87c..8607b8f6d 100644
--- a/src/libffmpeg/libavcodec/rangecoder.c
+++ b/src/libffmpeg/libavcodec/rangecoder.c
@@ -14,10 +14,10 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
- 
+
 /**
  * @file rangecoder.c
  * Range coder.
@@ -38,7 +38,7 @@
 
 
 void ff_init_range_encoder(RangeCoder *c, uint8_t *buf, int buf_size){
-    c->bytestream_start= 
+    c->bytestream_start=
     c->bytestream= buf;
     c->bytestream_end= buf + buf_size;
 
@@ -66,9 +66,9 @@ void ff_build_rac_states(RangeCoder *c, int factor, int max_p){
 
 #if 0
     for(i=1; i<256; i++){
-        if(c->one_state[i]) 
+        if(c->one_state[i])
             continue;
-        
+
         p= (i*one + 128) >> 8;
         last_p8= i;
         for(;;){
@@ -93,13 +93,13 @@ void ff_build_rac_states(RangeCoder *c, int factor, int max_p){
         if(p8 <= last_p8) p8= last_p8+1;
         if(last_p8 && last_p8<256 && p8<=max_p)
             c->one_state[last_p8]= p8;
-        
+
         p+= ((one-p)*factor + one/2) >> 32;
         last_p8= p8;
     }
 #endif
     for(i=256-max_p; i<=max_p; i++){
-        if(c->one_state[i]) 
+        if(c->one_state[i])
             continue;
 
         p= (i*one + 128) >> 8;
@@ -109,7 +109,7 @@ void ff_build_rac_states(RangeCoder *c, int factor, int max_p){
         if(p8 > max_p) p8= max_p;
         c->one_state[    i]=     p8;
     }
-    
+
     for(i=0; i<256; i++)
         c->zero_state[i]= 256-c->one_state[256-i];
 #if 0
@@ -143,17 +143,17 @@ int main(){
     uint8_t r[9*SIZE];
     int i;
     uint8_t state[10]= {0};
-    
+
     ff_init_range_encoder(&c, b, SIZE);
     ff_build_rac_states(&c, 0.05*(1LL<<32), 128+64+32+16);
-    
+
     memset(state, 128, sizeof(state));
 
     for(i=0; i<SIZE; i++){
         r[i]= random()%7;
     }
-    
-  
+
+
     for(i=0; i<SIZE; i++){
 START_TIMER
         put_rac(&c, state, r[i]&1);
@@ -161,18 +161,18 @@ STOP_TIMER("put_rac")
     }
 
     ff_put_rac_terminate(&c);
-    
+
     ff_init_range_decoder(&c, b, SIZE);
-    
+
     memset(state, 128, sizeof(state));
-    
+
     for(i=0; i<SIZE; i++){
 START_TIMER
         if( (r[i]&1) != get_rac(&c, state) )
             av_log(NULL, AV_LOG_DEBUG, "rac failure at %d\n", i);
 STOP_TIMER("get_rac")
     }
-    
+
     return 0;
 }
 
diff --git a/src/libffmpeg/libavcodec/rangecoder.h b/src/libffmpeg/libavcodec/rangecoder.h
index 6fd7b43bf..0f56fad59 100644
--- a/src/libffmpeg/libavcodec/rangecoder.h
+++ b/src/libffmpeg/libavcodec/rangecoder.h
@@ -14,10 +14,10 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
- 
+
 /**
  * @file rangecoder.h
  * Range coder.
@@ -58,7 +58,7 @@ static inline void renorm_encoder(RangeCoder *c){
         }else{
             c->outstanding_count++;
         }
-        
+
         c->low = (c->low & 0xFF)<<8;
         c->range <<= 8;
     }
@@ -78,7 +78,7 @@ static inline void put_rac(RangeCoder *c, uint8_t * const state, int bit){
         c->range = range1;
         *state= c->one_state[*state];
     }
-    
+
     renorm_encoder(c);
 }
 
@@ -95,7 +95,7 @@ static inline void refill(RangeCoder *c){
 static inline int get_rac(RangeCoder *c, uint8_t * const state){
     int range1= (c->range * (*state)) >> 8;
     int attribute_unused one_mask;
-    
+
     c->range -= range1;
 #if 1
     if(c->low < c->range){
@@ -111,12 +111,12 @@ static inline int get_rac(RangeCoder *c, uint8_t * const state){
     }
 #else
     one_mask= (c->range - c->low-1)>>31;
-    
+
     c->low -= c->range & one_mask;
     c->range += (range1 - c->range) & one_mask;
-    
+
     *state= c->zero_state[(*state) + (256&one_mask)];
-    
+
     refill(c);
 
     return one_mask&1;
diff --git a/src/libffmpeg/libavcodec/ratecontrol.c b/src/libffmpeg/libavcodec/ratecontrol.c
index 0fc9caa31..29dc1f495 100644
--- a/src/libffmpeg/libavcodec/ratecontrol.c
+++ b/src/libffmpeg/libavcodec/ratecontrol.c
@@ -15,13 +15,13 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file ratecontrol.c
  * Rate control for video encoders.
- */ 
+ */
 
 #include "avcodec.h"
 #include "dsputil.h"
@@ -38,10 +38,10 @@ static int init_pass2(MpegEncContext *s);
 static double get_qscale(MpegEncContext *s, RateControlEntry *rce, double rate_factor, int frame_num);
 
 void ff_write_pass1_stats(MpegEncContext *s){
-    snprintf(s->avctx->stats_out, 256, "in:%d out:%d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d;\n",
-            s->current_picture_ptr->display_picture_number, s->current_picture_ptr->coded_picture_number, s->pict_type, 
-            s->current_picture.quality, s->i_tex_bits, s->p_tex_bits, s->mv_bits, s->misc_bits, 
-            s->f_code, s->b_code, s->current_picture.mc_mb_var_sum, s->current_picture.mb_var_sum, s->i_count);
+    snprintf(s->avctx->stats_out, 256, "in:%d out:%d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d skipcount:%d hbits:%d;\n",
+            s->current_picture_ptr->display_picture_number, s->current_picture_ptr->coded_picture_number, s->pict_type,
+            s->current_picture.quality, s->i_tex_bits, s->p_tex_bits, s->mv_bits, s->misc_bits,
+            s->f_code, s->b_code, s->current_picture.mc_mb_var_sum, s->current_picture.mb_var_sum, s->i_count, s->skip_count, s->header_bits);
 }
 
 int ff_rate_control_init(MpegEncContext *s)
@@ -53,7 +53,7 @@ int ff_rate_control_init(MpegEncContext *s)
     for(i=0; i<5; i++){
         rcc->pred[i].coeff= FF_QP2LAMBDA * 7.0;
         rcc->pred[i].count= 1.0;
-    
+
         rcc->pred[i].decay= 0.4;
         rcc->i_cplx_sum [i]=
         rcc->p_cplx_sum [i]=
@@ -78,7 +78,7 @@ int ff_rate_control_init(MpegEncContext *s)
             return -1;
         rcc->entry = (RateControlEntry*)av_mallocz(i*sizeof(RateControlEntry));
         rcc->num_entries= i;
-        
+
         /* init all to skipped p frames (with b frames we might have a not encoded frame at the end FIXME) */
         for(i=0; i<rcc->num_entries; i++){
             RateControlEntry *rce= &rcc->entry[i];
@@ -86,8 +86,8 @@ int ff_rate_control_init(MpegEncContext *s)
             rce->qscale= rce->new_qscale=FF_QP2LAMBDA * 2;
             rce->misc_bits= s->mb_num + 10;
             rce->mb_var_sum= s->mb_num*100;
-        }        
-        
+        }
+
         /* read stats */
         p= s->avctx->stats_in;
         for(i=0; i<rcc->num_entries - s->max_b_frames; i++){
@@ -107,34 +107,40 @@ int ff_rate_control_init(MpegEncContext *s)
             assert(picture_number < rcc->num_entries);
             rce= &rcc->entry[picture_number];
 
-            e+=sscanf(p, " in:%*d out:%*d type:%d q:%f itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d",
-                   &rce->pict_type, &rce->qscale, &rce->i_tex_bits, &rce->p_tex_bits, &rce->mv_bits, &rce->misc_bits, 
-                   &rce->f_code, &rce->b_code, &rce->mc_mb_var_sum, &rce->mb_var_sum, &rce->i_count);
-            if(e!=12){
+            e+=sscanf(p, " in:%*d out:%*d type:%d q:%f itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d skipcount:%d hbits:%d",
+                   &rce->pict_type, &rce->qscale, &rce->i_tex_bits, &rce->p_tex_bits, &rce->mv_bits, &rce->misc_bits,
+                   &rce->f_code, &rce->b_code, &rce->mc_mb_var_sum, &rce->mb_var_sum, &rce->i_count, &rce->skip_count, &rce->header_bits);
+            if(e!=14){
                 av_log(s->avctx, AV_LOG_ERROR, "statistics are damaged at line %d, parser out=%d\n", i, e);
                 return -1;
             }
+
             p= next;
         }
-        
+#ifdef CONFIG_XVID
+        //FIXME maybe move to end
+        if((s->flags&CODEC_FLAG_PASS2) && s->avctx->rc_strategy == FF_RC_STRATEGY_XVID)
+            return ff_xvid_rate_control_init(s);
+#endif
+
         if(init_pass2(s) < 0) return -1;
     }
-     
+
     if(!(s->flags&CODEC_FLAG_PASS2)){
 
         rcc->short_term_qsum=0.001;
         rcc->short_term_qcount=0.001;
-    
+
         rcc->pass1_rc_eq_output_sum= 0.001;
         rcc->pass1_wanted_bits=0.001;
-        
+
         /* init stuff with the user specified complexity */
         if(s->avctx->rc_initial_cplx){
             for(i=0; i<60*30; i++){
                 double bits= s->avctx->rc_initial_cplx * (i/10000.0 + 1.0)*s->mb_num;
                 RateControlEntry rce;
                 double q;
-                
+
                 if     (i%((s->gop_size+3)/4)==0) rce.pict_type= I_TYPE;
                 else if(i%(s->max_b_frames+1))    rce.pict_type= B_TYPE;
                 else                              rce.pict_type= P_TYPE;
@@ -171,7 +177,7 @@ int ff_rate_control_init(MpegEncContext *s)
         }
 
     }
-    
+
     return 0;
 }
 
@@ -181,6 +187,11 @@ void ff_rate_control_uninit(MpegEncContext *s)
     emms_c();
 
     av_freep(&rcc->entry);
+
+#ifdef CONFIG_XVID
+    if((s->flags&CODEC_FLAG_PASS2) && s->avctx->rc_strategy == FF_RC_STRATEGY_XVID)
+        ff_xvid_rate_control_uninit(s);
+#endif
 }
 
 static inline double qp2bits(RateControlEntry *rce, double qp){
@@ -196,14 +207,14 @@ static inline double bits2qp(RateControlEntry *rce, double bits){
     }
     return rce->qscale * (double)(rce->i_tex_bits + rce->p_tex_bits+1)/ bits;
 }
-    
+
 int ff_vbv_update(MpegEncContext *s, int frame_size){
     RateControlContext *rcc= &s->rc_context;
     const double fps= 1/av_q2d(s->avctx->time_base);
     const int buffer_size= s->avctx->rc_buffer_size;
     const double min_rate= s->avctx->rc_min_rate/fps;
     const double max_rate= s->avctx->rc_max_rate/fps;
-    
+
 //printf("%d %f %d %f %f\n", buffer_size, rcc->buffer_index, frame_size, min_rate, max_rate);
     if(buffer_size){
         int left;
@@ -219,11 +230,11 @@ int ff_vbv_update(MpegEncContext *s, int frame_size){
 
         if(rcc->buffer_index > buffer_size){
             int stuffing= ceil((rcc->buffer_index - buffer_size)/8);
-            
+
             if(stuffing < 4 && s->codec_id == CODEC_ID_MPEG4)
                 stuffing=4;
             rcc->buffer_index -= 8*stuffing;
-            
+
             if(s->avctx->debug & FF_DEBUG_RC)
                 av_log(s->avctx, AV_LOG_DEBUG, "stuffing %d bytes\n", stuffing);
 
@@ -241,7 +252,7 @@ static double get_qscale(MpegEncContext *s, RateControlEntry *rce, double rate_f
     AVCodecContext *a= s->avctx;
     double q, bits;
     const int pict_type= rce->new_pict_type;
-    const double mb_num= s->mb_num;  
+    const double mb_num= s->mb_num;
     int i;
 
     double const_values[]={
@@ -310,32 +321,32 @@ static double get_qscale(MpegEncContext *s, RateControlEntry *rce, double rate_f
     };
 
     bits= ff_eval(s->avctx->rc_eq, const_values, const_names, func1, func1_names, NULL, NULL, rce);
-    
+
     rcc->pass1_rc_eq_output_sum+= bits;
     bits*=rate_factor;
     if(bits<0.0) bits=0.0;
     bits+= 1.0; //avoid 1/0 issues
-    
+
     /* user override */
     for(i=0; i<s->avctx->rc_override_count; i++){
         RcOverride *rco= s->avctx->rc_override;
         if(rco[i].start_frame > frame_num) continue;
         if(rco[i].end_frame   < frame_num) continue;
-    
-        if(rco[i].qscale) 
+
+        if(rco[i].qscale)
             bits= qp2bits(rce, rco[i].qscale); //FIXME move at end to really force it?
         else
             bits*= rco[i].quality_factor;
     }
 
     q= bits2qp(rce, bits);
-    
+
     /* I/B difference */
     if     (pict_type==I_TYPE && s->avctx->i_quant_factor<0.0)
         q= -q*s->avctx->i_quant_factor + s->avctx->i_quant_offset;
     else if(pict_type==B_TYPE && s->avctx->b_quant_factor<0.0)
         q= -q*s->avctx->b_quant_factor + s->avctx->b_quant_offset;
-        
+
     return q;
 }
 
@@ -345,7 +356,7 @@ static double get_diff_limited_q(MpegEncContext *s, RateControlEntry *rce, doubl
     const int pict_type= rce->new_pict_type;
     const double last_p_q    = rcc->last_qscale_for[P_TYPE];
     const double last_non_b_q= rcc->last_qscale_for[rcc->last_non_b_pict_type];
-    
+
     if     (pict_type==I_TYPE && (a->i_quant_factor>0.0 || rcc->last_non_b_pict_type==P_TYPE))
         q= last_p_q    *ABS(a->i_quant_factor) + a->i_quant_offset;
     else if(pict_type==B_TYPE && a->b_quant_factor>0.0)
@@ -361,7 +372,7 @@ static double get_diff_limited_q(MpegEncContext *s, RateControlEntry *rce, doubl
     }
 
     rcc->last_qscale_for[pict_type]= q; //Note we cant do that after blurring
-    
+
     if(pict_type!=B_TYPE)
         rcc->last_non_b_pict_type= pict_type;
 
@@ -372,9 +383,9 @@ static double get_diff_limited_q(MpegEncContext *s, RateControlEntry *rce, doubl
  * gets the qmin & qmax for pict_type
  */
 static void get_qminmax(int *qmin_ret, int *qmax_ret, MpegEncContext *s, int pict_type){
-    int qmin= s->avctx->lmin;                                                       
+    int qmin= s->avctx->lmin;
     int qmax= s->avctx->lmax;
-    
+
     assert(qmin <= qmax);
 
     if(pict_type==B_TYPE){
@@ -389,7 +400,7 @@ static void get_qminmax(int *qmin_ret, int *qmax_ret, MpegEncContext *s, int pic
     qmax= clip(qmax, 1, FF_LAMBDA_MAX);
 
     if(qmax<qmin) qmax= qmin;
-    
+
     *qmin_ret= qmin;
     *qmax_ret= qmax;
 }
@@ -403,7 +414,7 @@ static double modify_qscale(MpegEncContext *s, RateControlEntry *rce, double q,
     const double fps= 1/av_q2d(s->avctx->time_base);
     const double min_rate= s->avctx->rc_min_rate / fps;
     const double max_rate= s->avctx->rc_max_rate / fps;
-    
+
     get_qminmax(&qmin, &qmax, s, pict_type);
 
     /* modulation */
@@ -454,16 +465,16 @@ static double modify_qscale(MpegEncContext *s, RateControlEntry *rce, double q,
     }else{
         double min2= log(qmin);
         double max2= log(qmax);
-        
+
         q= log(q);
         q= (q - min2)/(max2-min2) - 0.5;
         q*= -4.0;
         q= 1.0/(1.0 + exp(q));
         q= q*(max2-min2) + min2;
-        
+
         q= exp(q);
     }
-    
+
     return q;
 }
 
@@ -511,7 +522,7 @@ static void adaptive_quantization(MpegEncContext *s, double q){
     Picture * const pic= &s->current_picture;
     const int mb_width = s->mb_width;
     const int mb_height = s->mb_height;
-    
+
     for(i=0; i<s->mb_num; i++){
         const int mb_xy= s->mb_index2xy[i];
         float temp_cplx= sqrt(pic->mc_mb_var[mb_xy]); //FIXME merge in pow()
@@ -522,14 +533,14 @@ static void adaptive_quantization(MpegEncContext *s, double q){
         int mb_y = mb_xy / s->mb_stride;
         int mb_distance;
         float mb_factor = 0.0;
-#if 0        
+#if 0
         if(spat_cplx < q/3) spat_cplx= q/3; //FIXME finetune
         if(temp_cplx < q/3) temp_cplx= q/3; //FIXME finetune
-#endif   
+#endif
         if(spat_cplx < 4) spat_cplx= 4; //FIXME finetune
         if(temp_cplx < 4) temp_cplx= 4; //FIXME finetune
 
-        if((s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTRA)){//FIXME hq mode 
+        if((s->mb_type[mb_xy]&CANDIDATE_MB_TYPE_INTRA)){//FIXME hq mode
             cplx= spat_cplx;
             factor= 1.0 + p_masking;
         }else{
@@ -559,9 +570,9 @@ static void adaptive_quantization(MpegEncContext *s, double q){
         }
 
         factor*= 1.0 - border_masking*mb_factor;
-        
+
         if(factor<0.00001) factor= 0.00001;
-        
+
         bits= cplx*factor;
         cplx_sum+= cplx;
         bits_sum+= bits;
@@ -588,7 +599,7 @@ static void adaptive_quantization(MpegEncContext *s, double q){
         if(bits_sum < 0.001) bits_sum= 0.001;
         if(cplx_sum < 0.001) cplx_sum= 0.001;
     }
-   
+
     for(i=0; i<s->mb_num; i++){
         const int mb_xy= s->mb_index2xy[i];
         float newq= q*cplx_tab[i]/bits_tab[i];
@@ -607,9 +618,20 @@ static void adaptive_quantization(MpegEncContext *s, double q){
         s->lambda_table[mb_xy]= intq;
     }
 }
+
+void ff_get_2pass_fcode(MpegEncContext *s){
+    RateControlContext *rcc= &s->rc_context;
+    int picture_number= s->picture_number;
+    RateControlEntry *rce;
+
+    rce= &rcc->entry[picture_number];
+    s->f_code= rce->f_code;
+    s->b_code= rce->b_code;
+}
+
 //FIXME rd or at least approx for dquant
 
-float ff_rate_estimate_qscale(MpegEncContext *s)
+float ff_rate_estimate_qscale(MpegEncContext *s, int dry_run)
 {
     float q;
     int qmin, qmax;
@@ -629,12 +651,17 @@ float ff_rate_estimate_qscale(MpegEncContext *s)
     Picture * const pic= &s->current_picture;
     emms_c();
 
+#ifdef CONFIG_XVID
+    if((s->flags&CODEC_FLAG_PASS2) && s->avctx->rc_strategy == FF_RC_STRATEGY_XVID)
+        return ff_xvid_rate_estimate_qscale(s, dry_run);
+#endif
+
     get_qminmax(&qmin, &qmax, s, pict_type);
 
     fps= 1/av_q2d(s->avctx->time_base);
 //printf("input_pic_num:%d pic_num:%d frame_rate:%d\n", s->input_picture_number, s->picture_number, s->frame_rate);
         /* update predictors */
-    if(picture_number>2){
+    if(picture_number>2 && !dry_run){
         const int last_var= s->last_pict_type == I_TYPE ? rcc->last_mb_var_sum : rcc->last_mc_mb_var_sum;
         update_predictor(&rcc->pred[s->last_pict_type], rcc->last_qscale, sqrt(last_var), s->frame_bits);
     }
@@ -654,7 +681,7 @@ float ff_rate_estimate_qscale(MpegEncContext *s)
     if(br_compensation<=0.0) br_compensation=0.001;
 
     var= pict_type == I_TYPE ? pic->mb_var_sum : pic->mc_mb_var_sum;
-    
+
     short_term_q = 0; /* avoid warning */
     if(s->flags&CODEC_FLAG_PASS2){
         if(pict_type!=I_TYPE)
@@ -663,7 +690,7 @@ float ff_rate_estimate_qscale(MpegEncContext *s)
         q= rce->new_qscale / br_compensation;
 //printf("%f %f %f last:%d var:%d type:%d//\n", q, rce->new_qscale, br_compensation, s->frame_bits, var, pict_type);
     }else{
-        rce->pict_type= 
+        rce->pict_type=
         rce->new_pict_type= pict_type;
         rce->mc_mb_var_sum= pic->mc_mb_var_sum;
         rce->mb_var_sum   = pic->   mb_var_sum;
@@ -682,7 +709,7 @@ float ff_rate_estimate_qscale(MpegEncContext *s)
             rce->i_count   = 0; //FIXME we do know this approx
             rce->i_tex_bits= 0;
             rce->p_tex_bits= bits*0.9;
-            
+
             rce->mv_bits= bits*0.1;
         }
         rcc->i_cplx_sum [pict_type] += rce->i_tex_bits*rce->qscale;
@@ -692,7 +719,7 @@ float ff_rate_estimate_qscale(MpegEncContext *s)
 
         bits= rce->i_tex_bits + rce->p_tex_bits;
         rate_factor= rcc->pass1_wanted_bits/rcc->pass1_rc_eq_output_sum * br_compensation;
-    
+
         q= get_qscale(s, rce, rate_factor, picture_number);
 
         assert(q>0.0);
@@ -712,7 +739,7 @@ float ff_rate_estimate_qscale(MpegEncContext *s)
 //printf("%f ", q);
         }
         assert(q>0.0);
-        
+
         q= modify_qscale(s, rce, q, picture_number);
 
         rcc->pass1_wanted_bits+= s->bit_rate/fps;
@@ -727,17 +754,19 @@ float ff_rate_estimate_qscale(MpegEncContext *s)
         );
     }
 
-    if     (q<qmin) q=qmin; 
+    if     (q<qmin) q=qmin;
     else if(q>qmax) q=qmax;
 
     if(s->adaptive_quant)
         adaptive_quantization(s, q);
     else
         q= (int)(q + 0.5);
-    
-    rcc->last_qscale= q;
-    rcc->last_mc_mb_var_sum= pic->mc_mb_var_sum;
-    rcc->last_mb_var_sum= pic->mb_var_sum;
+
+    if(!dry_run){
+        rcc->last_qscale= q;
+        rcc->last_mc_mb_var_sum= pic->mc_mb_var_sum;
+        rcc->last_mb_var_sum= pic->mb_var_sum;
+    }
 #if 0
 {
     static int mvsum=0, texsum=0;
@@ -767,14 +796,14 @@ static int init_pass2(MpegEncContext *s)
     double rate_factor=0;
     double step;
     //int last_i_frame=-10000000;
-    const int filter_size= (int)(a->qblur*4) | 1;  
+    const int filter_size= (int)(a->qblur*4) | 1;
     double expected_bits;
     double *qscale, *blured_qscale;
 
     /* find complexity & const_bits & decide the pict_types */
     for(i=0; i<rcc->num_entries; i++){
         RateControlEntry *rce= &rcc->entry[i];
-        
+
         rce->new_pict_type= rce->pict_type;
         rcc->i_cplx_sum [rce->pict_type] += rce->i_tex_bits*rce->qscale;
         rcc->p_cplx_sum [rce->pict_type] += rce->p_tex_bits*rce->qscale;
@@ -785,27 +814,27 @@ static int init_pass2(MpegEncContext *s)
         const_bits[rce->new_pict_type]+= rce->mv_bits + rce->misc_bits;
     }
     all_const_bits= const_bits[I_TYPE] + const_bits[P_TYPE] + const_bits[B_TYPE];
-    
+
     if(all_available_bits < all_const_bits){
         av_log(s->avctx, AV_LOG_ERROR, "requested bitrate is to low\n");
         return -1;
     }
-    
+
     /* find average quantizers */
     avg_quantizer[P_TYPE]=0;
     for(step=256*256; step>0.0000001; step*=0.5){
         double expected_bits=0;
         avg_quantizer[P_TYPE]+= step;
-        
+
         avg_quantizer[I_TYPE]= avg_quantizer[P_TYPE]*ABS(s->avctx->i_quant_factor) + s->avctx->i_quant_offset;
         avg_quantizer[B_TYPE]= avg_quantizer[P_TYPE]*ABS(s->avctx->b_quant_factor) + s->avctx->b_quant_offset;
-        
-        expected_bits= 
-            + all_const_bits 
+
+        expected_bits=
+            + all_const_bits
             + complexity[I_TYPE]/avg_quantizer[I_TYPE]
             + complexity[P_TYPE]/avg_quantizer[P_TYPE]
             + complexity[B_TYPE]/avg_quantizer[B_TYPE];
-            
+
         if(expected_bits < all_available_bits) avg_quantizer[P_TYPE]-= step;
 //printf("%f %lld %f\n", expected_bits, all_available_bits, avg_quantizer[P_TYPE]);
     }
@@ -815,14 +844,14 @@ static int init_pass2(MpegEncContext *s)
         available_bits[i]= const_bits[i] + complexity[i]/avg_quantizer[i];
     }
 //printf("%lld %lld %lld %lld\n", available_bits[I_TYPE], available_bits[P_TYPE], available_bits[B_TYPE], all_available_bits);
-        
+
     qscale= av_malloc(sizeof(double)*rcc->num_entries);
     blured_qscale= av_malloc(sizeof(double)*rcc->num_entries);
 
     for(step=256*256; step>0.0000001; step*=0.5){
         expected_bits=0;
         rate_factor+= step;
-        
+
         rcc->buffer_index= s->avctx->rc_buffer_size/2;
 
         /* find qscale */
@@ -834,7 +863,7 @@ static int init_pass2(MpegEncContext *s)
         /* fixed I/B QP relative to P mode */
         for(i=rcc->num_entries-1; i>=0; i--){
             RateControlEntry *rce= &rcc->entry[i];
-            
+
             qscale[i]= get_diff_limited_q(s, rce, qscale[i]);
         }
 
@@ -844,12 +873,12 @@ static int init_pass2(MpegEncContext *s)
             const int pict_type= rce->new_pict_type;
             int j;
             double q=0.0, sum=0.0;
-        
+
             for(j=0; j<filter_size; j++){
                 int index= i+j-filter_size/2;
                 double d= index-i;
                 double coeff= a->qblur==0 ? 1.0 : exp(-d*d/(a->qblur * a->qblur));
-            
+
                 if(index < 0 || index >= rcc->num_entries) continue;
                 if(pict_type != rcc->entry[index].new_pict_type) continue;
                 q+= qscale[index] * coeff;
@@ -857,7 +886,7 @@ static int init_pass2(MpegEncContext *s)
             }
             blured_qscale[i]= q/sum;
         }
-    
+
         /* find expected bits */
         for(i=0; i<rcc->num_entries; i++){
             RateControlEntry *rce= &rcc->entry[i];
diff --git a/src/libffmpeg/libavcodec/raw.c b/src/libffmpeg/libavcodec/raw.c
index 4f829fd02..28c3cad54 100644
--- a/src/libffmpeg/libavcodec/raw.c
+++ b/src/libffmpeg/libavcodec/raw.c
@@ -14,14 +14,14 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
- 
+
 /**
  * @file raw.c
  * Raw Video Codec
  */
- 
+
 #include "avcodec.h"
 
 typedef struct RawVideoContext {
@@ -31,7 +31,7 @@ typedef struct RawVideoContext {
     AVFrame pic;             ///< AVCodecContext.coded_frame
 } RawVideoContext;
 
-typedef struct PixleFormatTag {
+typedef struct PixelFormatTag {
     int pix_fmt;
     unsigned int fourcc;
 } PixelFormatTag;
@@ -70,8 +70,8 @@ unsigned int avcodec_pix_fmt_to_codec_tag(enum PixelFormat fmt)
     const PixelFormatTag * tags = pixelFormatTags;
     while (tags->pix_fmt >= 0) {
         if (tags->pix_fmt == fmt)
-	    return tags->fourcc;
-	tags++; 
+            return tags->fourcc;
+        tags++;
     }
     return 0;
 }
@@ -92,18 +92,18 @@ static int raw_init_decoder(AVCodecContext *avctx)
         case 32: avctx->pix_fmt= PIX_FMT_RGBA32; break;
         }
     }
-    
+
     context->length = avpicture_get_size(avctx->pix_fmt, avctx->width, avctx->height);
     context->buffer = av_malloc(context->length);
     context->p      = context->buffer;
     context->pic.pict_type = FF_I_TYPE;
     context->pic.key_frame = 1;
-    
+
     avctx->coded_frame= &context->pic;
-    
+
     if (!context->buffer)
         return -1;
-   
+
     return 0;
 }
 
@@ -115,18 +115,22 @@ static void flip(AVCodecContext *avctx, AVPicture * picture){
 }
 
 static int raw_decode(AVCodecContext *avctx,
-			    void *data, int *data_size,
-			    uint8_t *buf, int buf_size)
+                            void *data, int *data_size,
+                            uint8_t *buf, int buf_size)
 {
     RawVideoContext *context = avctx->priv_data;
     int bytesNeeded;
 
+    AVFrame * frame = (AVFrame *) data;
     AVPicture * picture = (AVPicture *) data;
 
+    frame->interlaced_frame = avctx->coded_frame->interlaced_frame;
+    frame->top_field_first = avctx->coded_frame->top_field_first;
+
     /* Early out without copy if packet size == frame size */
     if (buf_size == context->length  &&  context->p == context->buffer) {
         avpicture_fill(picture, buf, avctx->pix_fmt, avctx->width, avctx->height);
-        flip(avctx, picture);        
+        flip(avctx, picture);
         *data_size = sizeof(AVPicture);
         return buf_size;
     }
@@ -141,7 +145,7 @@ static int raw_decode(AVCodecContext *avctx,
     memcpy(context->p, buf, bytesNeeded);
     context->p = context->buffer;
     avpicture_fill(picture, context->buffer, avctx->pix_fmt, avctx->width, avctx->height);
-    flip(avctx, picture);        
+    flip(avctx, picture);
     *data_size = sizeof(AVPicture);
     return bytesNeeded;
 }
@@ -149,7 +153,7 @@ static int raw_decode(AVCodecContext *avctx,
 static int raw_close_decoder(AVCodecContext *avctx)
 {
     RawVideoContext *context = avctx->priv_data;
-    
+
     av_freep(&context->buffer);
     return 0;
 }
@@ -167,7 +171,7 @@ static int raw_init_encoder(AVCodecContext *avctx)
 }
 
 static int raw_encode(AVCodecContext *avctx,
-			    unsigned char *frame, int buf_size, void *data)
+                            unsigned char *frame, int buf_size, void *data)
 {
     return avpicture_layout((AVPicture *)data, avctx->pix_fmt, avctx->width,
                                                avctx->height, frame, buf_size);
diff --git a/src/libffmpeg/libavcodec/resample2.c b/src/libffmpeg/libavcodec/resample2.c
new file mode 100644
index 000000000..735f612d1
--- /dev/null
+++ b/src/libffmpeg/libavcodec/resample2.c
@@ -0,0 +1,272 @@
+/*
+ * audio resampling
+ * Copyright (c) 2004 Michael Niedermayer <michaelni@gmx.at>
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ *
+ */
+
+/**
+ * @file resample2.c
+ * audio resampling
+ * @author Michael Niedermayer <michaelni@gmx.at>
+ */
+
+#include "avcodec.h"
+#include "common.h"
+#include "dsputil.h"
+
+#if 1
+#define FILTER_SHIFT 15
+
+#define FELEM int16_t
+#define FELEM2 int32_t
+#define FELEM_MAX INT16_MAX
+#define FELEM_MIN INT16_MIN
+#else
+#define FILTER_SHIFT 22
+
+#define FELEM int32_t
+#define FELEM2 int64_t
+#define FELEM_MAX INT32_MAX
+#define FELEM_MIN INT32_MIN
+#endif
+
+
+typedef struct AVResampleContext{
+    FELEM *filter_bank;
+    int filter_length;
+    int ideal_dst_incr;
+    int dst_incr;
+    int index;
+    int frac;
+    int src_incr;
+    int compensation_distance;
+    int phase_shift;
+    int phase_mask;
+    int linear;
+}AVResampleContext;
+
+/**
+ * 0th order modified bessel function of the first kind.
+ */
+double bessel(double x){
+    double v=1;
+    double t=1;
+    int i;
+
+    for(i=1; i<50; i++){
+        t *= i;
+        v += pow(x*x/4, i)/(t*t);
+    }
+    return v;
+}
+
+/**
+ * builds a polyphase filterbank.
+ * @param factor resampling factor
+ * @param scale wanted sum of coefficients for each filter
+ * @param type 0->cubic, 1->blackman nuttall windowed sinc, 2->kaiser windowed sinc beta=16
+ */
+void av_build_filter(FELEM *filter, double factor, int tap_count, int phase_count, int scale, int type){
+    int ph, i, v;
+    double x, y, w, tab[tap_count];
+    const int center= (tap_count-1)/2;
+
+    /* if upsampling, only need to interpolate, no filter */
+    if (factor > 1.0)
+        factor = 1.0;
+
+    for(ph=0;ph<phase_count;ph++) {
+        double norm = 0;
+        double e= 0;
+        for(i=0;i<tap_count;i++) {
+            x = M_PI * ((double)(i - center) - (double)ph / phase_count) * factor;
+            if (x == 0) y = 1.0;
+            else        y = sin(x) / x;
+            switch(type){
+            case 0:{
+                const float d= -0.5; //first order derivative = -0.5
+                x = fabs(((double)(i - center) - (double)ph / phase_count) * factor);
+                if(x<1.0) y= 1 - 3*x*x + 2*x*x*x + d*(            -x*x + x*x*x);
+                else      y=                       d*(-4 + 8*x - 5*x*x + x*x*x);
+                break;}
+            case 1:
+                w = 2.0*x / (factor*tap_count) + M_PI;
+                y *= 0.3635819 - 0.4891775 * cos(w) + 0.1365995 * cos(2*w) - 0.0106411 * cos(3*w);
+                break;
+            case 2:
+                w = 2.0*x / (factor*tap_count*M_PI);
+                y *= bessel(16*sqrt(FFMAX(1-w*w, 0)));
+                break;
+            }
+
+            tab[i] = y;
+            norm += y;
+        }
+
+        /* normalize so that an uniform color remains the same */
+        for(i=0;i<tap_count;i++) {
+            v = clip(lrintf(tab[i] * scale / norm + e), FELEM_MIN, FELEM_MAX);
+            filter[ph * tap_count + i] = v;
+            e += tab[i] * scale / norm - v;
+        }
+    }
+}
+
+/**
+ * initalizes a audio resampler.
+ * note, if either rate is not a integer then simply scale both rates up so they are
+ */
+AVResampleContext *av_resample_init(int out_rate, int in_rate, int filter_size, int phase_shift, int linear, double cutoff){
+    AVResampleContext *c= av_mallocz(sizeof(AVResampleContext));
+    double factor= FFMIN(out_rate * cutoff / in_rate, 1.0);
+    int phase_count= 1<<phase_shift;
+
+    c->phase_shift= phase_shift;
+    c->phase_mask= phase_count-1;
+    c->linear= linear;
+
+    c->filter_length= FFMAX((int)ceil(filter_size/factor), 1);
+    c->filter_bank= av_mallocz(c->filter_length*(phase_count+1)*sizeof(FELEM));
+    av_build_filter(c->filter_bank, factor, c->filter_length, phase_count, 1<<FILTER_SHIFT, 1);
+    memcpy(&c->filter_bank[c->filter_length*phase_count+1], c->filter_bank, (c->filter_length-1)*sizeof(FELEM));
+    c->filter_bank[c->filter_length*phase_count]= c->filter_bank[c->filter_length - 1];
+
+    c->src_incr= out_rate;
+    c->ideal_dst_incr= c->dst_incr= in_rate * phase_count;
+    c->index= -phase_count*((c->filter_length-1)/2);
+
+    return c;
+}
+
+void av_resample_close(AVResampleContext *c){
+    av_freep(&c->filter_bank);
+    av_freep(&c);
+}
+
+/**
+ * Compensates samplerate/timestamp drift. The compensation is done by changing
+ * the resampler parameters, so no audible clicks or similar distortions ocur
+ * @param compensation_distance distance in output samples over which the compensation should be performed
+ * @param sample_delta number of output samples which should be output less
+ *
+ * example: av_resample_compensate(c, 10, 500)
+ * here instead of 510 samples only 500 samples would be output
+ *
+ * note, due to rounding the actual compensation might be slightly different,
+ * especially if the compensation_distance is large and the in_rate used during init is small
+ */
+void av_resample_compensate(AVResampleContext *c, int sample_delta, int compensation_distance){
+//    sample_delta += (c->ideal_dst_incr - c->dst_incr)*(int64_t)c->compensation_distance / c->ideal_dst_incr;
+    c->compensation_distance= compensation_distance;
+    c->dst_incr = c->ideal_dst_incr - c->ideal_dst_incr * (int64_t)sample_delta / compensation_distance;
+}
+
+/**
+ * resamples.
+ * @param src an array of unconsumed samples
+ * @param consumed the number of samples of src which have been consumed are returned here
+ * @param src_size the number of unconsumed samples available
+ * @param dst_size the amount of space in samples available in dst
+ * @param update_ctx if this is 0 then the context wont be modified, that way several channels can be resampled with the same context
+ * @return the number of samples written in dst or -1 if an error occured
+ */
+int av_resample(AVResampleContext *c, short *dst, short *src, int *consumed, int src_size, int dst_size, int update_ctx){
+    int dst_index, i;
+    int index= c->index;
+    int frac= c->frac;
+    int dst_incr_frac= c->dst_incr % c->src_incr;
+    int dst_incr=      c->dst_incr / c->src_incr;
+    int compensation_distance= c->compensation_distance;
+
+  if(compensation_distance == 0 && c->filter_length == 1 && c->phase_shift==0){
+        int64_t index2= ((int64_t)index)<<32;
+        int64_t incr= (1LL<<32) * c->dst_incr / c->src_incr;
+        dst_size= FFMIN(dst_size, (src_size-1-index) * (int64_t)c->src_incr / c->dst_incr);
+
+        for(dst_index=0; dst_index < dst_size; dst_index++){
+            dst[dst_index] = src[index2>>32];
+            index2 += incr;
+        }
+        frac += dst_index * dst_incr_frac;
+        index += dst_index * dst_incr;
+        index += frac / c->src_incr;
+        frac %= c->src_incr;
+  }else{
+    for(dst_index=0; dst_index < dst_size; dst_index++){
+        FELEM *filter= c->filter_bank + c->filter_length*(index & c->phase_mask);
+        int sample_index= index >> c->phase_shift;
+        FELEM2 val=0;
+
+        if(sample_index < 0){
+            for(i=0; i<c->filter_length; i++)
+                val += src[ABS(sample_index + i) % src_size] * filter[i];
+        }else if(sample_index + c->filter_length > src_size){
+            break;
+        }else if(c->linear){
+            int64_t v=0;
+            int sub_phase= (frac<<8) / c->src_incr;
+            for(i=0; i<c->filter_length; i++){
+                int64_t coeff= filter[i]*(256 - sub_phase) + filter[i + c->filter_length]*sub_phase;
+                v += src[sample_index + i] * coeff;
+            }
+            val= v>>8;
+        }else{
+            for(i=0; i<c->filter_length; i++){
+                val += src[sample_index + i] * (FELEM2)filter[i];
+            }
+        }
+
+        val = (val + (1<<(FILTER_SHIFT-1)))>>FILTER_SHIFT;
+        dst[dst_index] = (unsigned)(val + 32768) > 65535 ? (val>>31) ^ 32767 : val;
+
+        frac += dst_incr_frac;
+        index += dst_incr;
+        if(frac >= c->src_incr){
+            frac -= c->src_incr;
+            index++;
+        }
+
+        if(dst_index + 1 == compensation_distance){
+            compensation_distance= 0;
+            dst_incr_frac= c->ideal_dst_incr % c->src_incr;
+            dst_incr=      c->ideal_dst_incr / c->src_incr;
+        }
+    }
+  }
+    *consumed= FFMAX(index, 0) >> c->phase_shift;
+    if(index>=0) index &= c->phase_mask;
+
+    if(compensation_distance){
+        compensation_distance -= dst_index;
+        assert(compensation_distance > 0);
+    }
+    if(update_ctx){
+        c->frac= frac;
+        c->index= index;
+        c->dst_incr= dst_incr_frac + c->src_incr*dst_incr;
+        c->compensation_distance= compensation_distance;
+    }
+#if 0
+    if(update_ctx && !c->compensation_distance){
+#undef rand
+        av_resample_compensate(c, rand() % (8000*2) - 8000, 8000*2);
+av_log(NULL, AV_LOG_DEBUG, "%d %d %d\n", c->dst_incr, c->ideal_dst_incr, c->compensation_distance);
+    }
+#endif
+
+    return dst_index;
+}
diff --git a/src/libffmpeg/libavcodec/roqvideo.c b/src/libffmpeg/libavcodec/roqvideo.c
index 598765583..462a4cf72 100644
--- a/src/libffmpeg/libavcodec/roqvideo.c
+++ b/src/libffmpeg/libavcodec/roqvideo.c
@@ -13,7 +13,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -255,7 +255,7 @@ static void apply_motion_8x8(RoqContext *ri, int x, int y,
                 pa[2] = avg2(pb[2], pb[3]);
                 pa[3] = avg2(pb[3], pb[4]);
                 break;
- 
+
             case 2:
                 pa[0] = avg2(pb[0], pb[hw]);
                 pa[1] = avg2(pb[1], pb[hw+1]);
@@ -362,7 +362,7 @@ static void roqvideo_decode_frame(RoqContext *ri)
                             apply_motion_4x4(ri, x, y, 0, 8, 8);
                             break;
                         case RoQ_ID_FCC:
-                            apply_motion_4x4(ri, x, y, buf[bpos++], 
+                            apply_motion_4x4(ri, x, y, buf[bpos++],
                                 chunk_arg >> 8, chunk_arg & 0xff);
                             break;
                         case RoQ_ID_SLD:
diff --git a/src/libffmpeg/libavcodec/rpza.c b/src/libffmpeg/libavcodec/rpza.c
index 6b1510a4b..8c0766273 100644
--- a/src/libffmpeg/libavcodec/rpza.c
+++ b/src/libffmpeg/libavcodec/rpza.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -119,8 +119,8 @@ static void rpza_decode_stream(RpzaContext *s)
             colorA = (opcode << 8) | (s->buf[stream_ptr++]);
             opcode = 0;
             if ((s->buf[stream_ptr] & 0x80) != 0) {
-                /* Must behave as opcode 110xxxxx, using colorA computed 
-                 * above. Use fake opcode 0x20 to enter switch block at 
+                /* Must behave as opcode 110xxxxx, using colorA computed
+                 * above. Use fake opcode 0x20 to enter switch block at
                  * the right place */
                 opcode = 0x20;
                 n_blocks = 1;
diff --git a/src/libffmpeg/libavcodec/rv10.c b/src/libffmpeg/libavcodec/rv10.c
index 06fbde837..5dd942dc5 100644
--- a/src/libffmpeg/libavcodec/rv10.c
+++ b/src/libffmpeg/libavcodec/rv10.c
@@ -15,14 +15,14 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file rv10.c
  * RV10 codec.
  */
- 
+
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
@@ -67,7 +67,7 @@ static const uint16_t rv_lum_code[256] =
  0x0f78, 0x0f79, 0x0f7a, 0x0f7b, 0x0f7c, 0x0f7d, 0x0f7e, 0x0f7f,
 };
 
-static const uint8_t rv_lum_bits[256] = 
+static const uint8_t rv_lum_bits[256] =
 {
  14, 12, 12, 12, 12, 12, 12, 12,
  12, 12, 12, 12, 12, 12, 12, 12,
@@ -235,40 +235,40 @@ void rv10_encode_picture_header(MpegEncContext *s, int picture_number)
     int full_frame= 0;
 
     align_put_bits(&s->pb);
-    
-    put_bits(&s->pb, 1, 1);	/* marker */
+
+    put_bits(&s->pb, 1, 1);     /* marker */
 
     put_bits(&s->pb, 1, (s->pict_type == P_TYPE));
 
-    put_bits(&s->pb, 1, 0);	/* not PB frame */
+    put_bits(&s->pb, 1, 0);     /* not PB frame */
 
     put_bits(&s->pb, 5, s->qscale);
 
     if (s->pict_type == I_TYPE) {
-	/* specific MPEG like DC coding not used */
+        /* specific MPEG like DC coding not used */
     }
     /* if multiple packets per frame are sent, the position at which
        to display the macro blocks is coded here */
     if(!full_frame){
-        put_bits(&s->pb, 6, 0);	/* mb_x */
-        put_bits(&s->pb, 6, 0);	/* mb_y */
+        put_bits(&s->pb, 6, 0); /* mb_x */
+        put_bits(&s->pb, 6, 0); /* mb_y */
         put_bits(&s->pb, 12, s->mb_width * s->mb_height);
     }
 
-    put_bits(&s->pb, 3, 0);	/* ignored */
+    put_bits(&s->pb, 3, 0);     /* ignored */
 }
 
 void rv20_encode_picture_header(MpegEncContext *s, int picture_number){
     put_bits(&s->pb, 2, s->pict_type); //I 0 vs. 1 ?
-    put_bits(&s->pb, 1, 0);	/* unknown bit */
+    put_bits(&s->pb, 1, 0);     /* unknown bit */
     put_bits(&s->pb, 5, s->qscale);
-        
+
     put_bits(&s->pb, 8, picture_number&0xFF); //FIXME wrong, but correct is not known
     s->mb_x= s->mb_y= 0;
     ff_h263_encode_mba(s);
-    
+
     put_bits(&s->pb, 1, s->no_rounding);
-    
+
     assert(s->f_code == 1);
     assert(s->unrestricted_mv == 1);
 //    assert(s->h263_aic== (s->pict_type == I_TYPE));
@@ -279,7 +279,7 @@ void rv20_encode_picture_header(MpegEncContext *s, int picture_number){
 
     s->h263_aic= s->pict_type == I_TYPE;
     if(s->h263_aic){
-        s->y_dc_scale_table= 
+        s->y_dc_scale_table=
         s->c_dc_scale_table= ff_aic_dc_scale_table;
     }else{
         s->y_dc_scale_table=
@@ -308,7 +308,7 @@ static int get_num(GetBitContext *gb)
 static int rv10_decode_picture_header(MpegEncContext *s)
 {
     int mb_count, pb_frame, marker, unk, mb_xy;
-    
+
 //printf("ff:%d\n", full_frame);
     marker = get_bits(&s->gb, 1);
 
@@ -323,7 +323,7 @@ static int rv10_decode_picture_header(MpegEncContext *s)
 #ifdef DEBUG
     printf("pict_type=%d pb_frame=%d\n", s->pict_type, pb_frame);
 #endif
-    
+
     if (pb_frame){
         av_log(s->avctx, AV_LOG_ERROR, "pb frame not supported\n");
         return -1;
@@ -354,15 +354,15 @@ static int rv10_decode_picture_header(MpegEncContext *s)
 
     mb_xy= s->mb_x + s->mb_y*s->mb_width;
     if(show_bits(&s->gb, 12)==0 || (mb_xy && mb_xy < s->mb_num)){
-        s->mb_x = get_bits(&s->gb, 6);	/* mb_x */
-        s->mb_y = get_bits(&s->gb, 6);	/* mb_y */
+        s->mb_x = get_bits(&s->gb, 6); /* mb_x */
+        s->mb_y = get_bits(&s->gb, 6); /* mb_y */
         mb_count = get_bits(&s->gb, 12);
     } else {
         s->mb_x = 0;
         s->mb_y = 0;
         mb_count = s->mb_width * s->mb_height;
     }
-    unk= get_bits(&s->gb, 3);	/* ignored */
+    unk= get_bits(&s->gb, 3);   /* ignored */
 //printf("%d\n", unk);
     s->f_code = 1;
     s->unrestricted_mv = 1;
@@ -373,7 +373,7 @@ static int rv10_decode_picture_header(MpegEncContext *s)
 static int rv20_decode_picture_header(MpegEncContext *s)
 {
     int seq, mb_pos, i;
-    
+
 #if 0
     GetBitContext gb= s->gb;
     for(i=0; i<64; i++){
@@ -389,13 +389,13 @@ static int rv20_decode_picture_header(MpegEncContext *s)
     }
     av_log(s->avctx, AV_LOG_DEBUG, "\n");
 #endif
-    
+
     if(s->avctx->sub_id == 0x30202002 || s->avctx->sub_id == 0x30203002){
         if (get_bits(&s->gb, 3)){
             av_log(s->avctx, AV_LOG_ERROR, "unknown triplet set\n");
             return -1;
-        } 
-    }   
+        }
+    }
 
     i= get_bits(&s->gb, 2);
     switch(i){
@@ -403,16 +403,16 @@ static int rv20_decode_picture_header(MpegEncContext *s)
     case 1: s->pict_type= I_TYPE; break; //hmm ...
     case 2: s->pict_type= P_TYPE; break;
     case 3: s->pict_type= B_TYPE; break;
-    default: 
+    default:
         av_log(s->avctx, AV_LOG_ERROR, "unknown frame type\n");
         return -1;
     }
-    
+
     if(s->last_picture_ptr==NULL && s->pict_type==B_TYPE){
         av_log(s->avctx, AV_LOG_ERROR, "early B pix\n");
         return -1;
     }
-    
+
     if (get_bits(&s->gb, 1)){
         av_log(s->avctx, AV_LOG_ERROR, "unknown bit set\n");
         return -1;
@@ -429,7 +429,7 @@ static int rv20_decode_picture_header(MpegEncContext *s)
             return -1;
         }
     }
-        
+
     if(s->avctx->has_b_frames){
         int f=9;
         int v= s->avctx->extradata_size >= 4 ? ((uint8_t*)s->avctx->extradata)[1] : 0;
@@ -440,7 +440,7 @@ static int rv20_decode_picture_header(MpegEncContext *s)
         }
         seq= get_bits(&s->gb, 14)<<1;
 
-        if(v) 
+        if(v)
             f= get_bits(&s->gb, av_log2(v));
 
         if(s->avctx->debug & FF_DEBUG_PICT_INFO){
@@ -450,7 +450,7 @@ static int rv20_decode_picture_header(MpegEncContext *s)
         seq= get_bits(&s->gb, 8)*128;
     }
 
-//     if(s->avctx->sub_id <= 0x20201002){ //0x20201002 definitely needs this 
+//     if(s->avctx->sub_id <= 0x20201002){ //0x20201002 definitely needs this
     mb_pos= ff_h263_decode_mba(s);
 /*    }else{
         mb_pos= get_bits(&s->gb, av_log2(s->mb_num-1)+1);
@@ -461,7 +461,7 @@ static int rv20_decode_picture_header(MpegEncContext *s)
     seq |= s->time &~0x7FFF;
     if(seq - s->time >  0x4000) seq -= 0x8000;
     if(seq - s->time < -0x4000) seq += 0x8000;
-    if(seq != s->time){  
+    if(seq != s->time){
         if(s->pict_type!=B_TYPE){
             s->time= seq;
             s->pp_time= s->time - s->last_non_b_time;
@@ -481,7 +481,7 @@ static int rv20_decode_picture_header(MpegEncContext *s)
 }
 av_log(s->avctx, AV_LOG_DEBUG, "\n");*/
     s->no_rounding= get_bits1(&s->gb);
-    
+
     s->f_code = 1;
     s->unrestricted_mv = 1;
     s->h263_aic= s->pict_type == I_TYPE;
@@ -490,9 +490,9 @@ av_log(s->avctx, AV_LOG_DEBUG, "\n");*/
 //    s->umvplus=1;
     s->modified_quant=1;
     s->loop_filter=1;
-    
+
     if(s->avctx->debug & FF_DEBUG_PICT_INFO){
-            av_log(s->avctx, AV_LOG_INFO, "num:%5d x:%2d y:%2d type:%d qscale:%2d rnd:%d\n", 
+            av_log(s->avctx, AV_LOG_INFO, "num:%5d x:%2d y:%2d type:%d qscale:%2d rnd:%d\n",
                    seq, s->mb_x, s->mb_y, s->pict_type, s->qscale, s->no_rounding);
     }
 
@@ -507,7 +507,7 @@ static int rv10_decode_init(AVCodecContext *avctx)
     static int done=0;
 
     MPV_decode_defaults(s);
-    
+
     s->avctx= avctx;
     s->out_format = FMT_H263;
     s->codec_id= avctx->codec_id;
@@ -556,7 +556,7 @@ static int rv10_decode_init(AVCodecContext *avctx)
     default:
         av_log(s->avctx, AV_LOG_ERROR, "unknown header %X\n", avctx->sub_id);
     }
-    
+
     if(avctx->debug & FF_DEBUG_PICT_INFO){
         av_log(avctx, AV_LOG_DEBUG, "ver:%X ver0:%X\n", avctx->sub_id, avctx->extradata_size >= 4 ? ((uint32_t*)avctx->extradata)[0] : -1);
     }
@@ -570,10 +570,10 @@ static int rv10_decode_init(AVCodecContext *avctx)
 
     /* init rv vlc */
     if (!done) {
-        init_vlc(&rv_dc_lum, DC_VLC_BITS, 256, 
+        init_vlc(&rv_dc_lum, DC_VLC_BITS, 256,
                  rv_lum_bits, 1, 1,
                  rv_lum_code, 2, 2, 1);
-        init_vlc(&rv_dc_chrom, DC_VLC_BITS, 256, 
+        init_vlc(&rv_dc_chrom, DC_VLC_BITS, 256,
                  rv_chrom_bits, 1, 1,
                  rv_chrom_code, 2, 2, 1);
         done = 1;
@@ -590,7 +590,7 @@ static int rv10_decode_end(AVCodecContext *avctx)
     return 0;
 }
 
-static int rv10_decode_packet(AVCodecContext *avctx, 
+static int rv10_decode_packet(AVCodecContext *avctx,
                              uint8_t *buf, int buf_size)
 {
     MpegEncContext *s = avctx->priv_data;
@@ -605,7 +605,7 @@ static int rv10_decode_packet(AVCodecContext *avctx,
         av_log(s->avctx, AV_LOG_ERROR, "HEADER ERROR\n");
         return -1;
     }
-    
+
     if (s->mb_x >= s->mb_width ||
         s->mb_y >= s->mb_height) {
         av_log(s->avctx, AV_LOG_ERROR, "POS ERROR %d %d\n", s->mb_x, s->mb_y);
@@ -638,12 +638,12 @@ static int rv10_decode_packet(AVCodecContext *avctx,
     if(s->codec_id== CODEC_ID_RV10){
         if(s->mb_y==0) s->first_slice_line=1;
     }else{
-        s->first_slice_line=1;    
+        s->first_slice_line=1;
         s->resync_mb_x= s->mb_x;
         s->resync_mb_y= s->mb_y;
     }
     if(s->h263_aic){
-        s->y_dc_scale_table= 
+        s->y_dc_scale_table=
         s->c_dc_scale_table= ff_aic_dc_scale_table;
     }else{
         s->y_dc_scale_table=
@@ -652,7 +652,7 @@ static int rv10_decode_packet(AVCodecContext *avctx,
 
     if(s->modified_quant)
         s->chroma_qscale_table= ff_h263_chroma_qscale_table;
-        
+
     ff_set_qscale(s, s->qscale);
 
     s->rv10_first_dc_coded[0] = 0;
@@ -676,7 +676,7 @@ static int rv10_decode_packet(AVCodecContext *avctx,
 #endif
 
         s->mv_dir = MV_DIR_FORWARD;
-        s->mv_type = MV_TYPE_16X16; 
+        s->mv_type = MV_TYPE_16X16;
         ret=ff_h263_decode_mb(s, s->block);
 
         if (ret == SLICE_ERROR || s->gb.size_in_bits < get_bits_count(&s->gb)) {
@@ -704,13 +704,13 @@ static int rv10_decode_packet(AVCodecContext *avctx,
     return buf_size;
 }
 
-static int rv10_decode_frame(AVCodecContext *avctx, 
+static int rv10_decode_frame(AVCodecContext *avctx,
                              void *data, int *data_size,
                              uint8_t *buf, int buf_size)
 {
     MpegEncContext *s = avctx->priv_data;
     int i;
-    AVFrame *pict = data; 
+    AVFrame *pict = data;
 
 #ifdef DEBUG
     printf("*****frame %d size=%d\n", avctx->frame_number, buf_size);
@@ -725,7 +725,7 @@ static int rv10_decode_frame(AVCodecContext *avctx,
         for(i=0; i<avctx->slice_count; i++){
             int offset= avctx->slice_offset[i];
             int size;
-            
+
             if(i+1 == avctx->slice_count)
                 size= buf_size - offset;
             else
@@ -736,11 +736,11 @@ static int rv10_decode_frame(AVCodecContext *avctx,
     }else{
         rv10_decode_packet(avctx, buf, buf_size);
     }
-    
+
     if(s->mb_y>=s->mb_height){
         ff_er_frame_end(s);
         MPV_frame_end(s);
-    
+
         if(s->pict_type==B_TYPE || s->low_delay){
             *pict= *(AVFrame*)&s->current_picture;
             ff_print_debug_info(s, pict);
diff --git a/src/libffmpeg/libavcodec/shorten.c b/src/libffmpeg/libavcodec/shorten.c
index b523a9250..4d80d40a5 100644
--- a/src/libffmpeg/libavcodec/shorten.c
+++ b/src/libffmpeg/libavcodec/shorten.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
@@ -84,7 +84,7 @@ typedef struct ShortenContext {
     uint8_t *bitstream;
     int bitstream_size;
     int bitstream_index;
-    int allocated_bitstream_size;
+    unsigned int allocated_bitstream_size;
     int header_size;
     uint8_t header[OUT_BUFFER_SIZE];
     int version;
diff --git a/src/libffmpeg/libavcodec/simple_idct.c b/src/libffmpeg/libavcodec/simple_idct.c
index 4b488197a..8fa83bec7 100644
--- a/src/libffmpeg/libavcodec/simple_idct.c
+++ b/src/libffmpeg/libavcodec/simple_idct.c
@@ -15,17 +15,17 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
- 
+
 /**
  * @file simple_idct.c
  * simpleidct in C.
  */
- 
+
 /*
   based upon some outcommented c code from mpeg2dec (idct_mmx.c
-  written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>) 
+  written by Aaron Holtzman <aholtzma@ess.engr.uvic.ca>)
  */
 #include "avcodec.h"
 #include "dsputil.h"
@@ -75,7 +75,7 @@
 
 static inline void idctRowCondDC (DCTELEM * row)
 {
-	int a0, a1, a2, a3, b0, b1, b2, b3;
+        int a0, a1, a2, a3, b0, b1, b2, b3;
 #ifdef FAST_64BIT
         uint64_t temp;
 #else
@@ -89,7 +89,7 @@ static inline void idctRowCondDC (DCTELEM * row)
 #define ROW0_MASK 0xffffLL
 #endif
         if(sizeof(DCTELEM)==2){
-            if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) | 
+            if ( ((((uint64_t *)row)[0] & ~ROW0_MASK) |
                   ((uint64_t *)row)[1]) == 0) {
                 temp = (row[0] << 3) & 0xffff;
                 temp += temp << 16;
@@ -97,7 +97,7 @@ static inline void idctRowCondDC (DCTELEM * row)
                 ((uint64_t *)row)[0] = temp;
                 ((uint64_t *)row)[1] = temp;
                 return;
-	    }
+            }
         }else{
             if (!(row[1]|row[2]|row[3]|row[4]|row[5]|row[6]|row[7])) {
                 row[0]=row[1]=row[2]=row[3]=row[4]=row[5]=row[6]=row[7]= row[0] << 3;
@@ -108,7 +108,7 @@ static inline void idctRowCondDC (DCTELEM * row)
         if(sizeof(DCTELEM)==2){
             if (!(((uint32_t*)row)[1] |
                   ((uint32_t*)row)[2] |
-                  ((uint32_t*)row)[3] | 
+                  ((uint32_t*)row)[3] |
                   row[1])) {
                 temp = (row[0] << 3) & 0xffff;
                 temp += temp << 16;
@@ -125,9 +125,9 @@ static inline void idctRowCondDC (DCTELEM * row)
 #endif
 
         a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
-	a1 = a0;
-	a2 = a0;
-	a3 = a0;
+        a1 = a0;
+        a2 = a0;
+        a3 = a0;
 
         /* no need to optimize : gcc does it */
         a0 += W2 * row[2];
@@ -149,7 +149,7 @@ static inline void idctRowCondDC (DCTELEM * row)
 #else
         temp = ((uint32_t*)row)[2] | ((uint32_t*)row)[3];
 #endif
-	if (temp != 0) {
+        if (temp != 0) {
             a0 += W4*row[4] + W6*row[6];
             a1 += - W4*row[4] - W2*row[6];
             a2 += - W4*row[4] + W2*row[6];
@@ -157,38 +157,38 @@ static inline void idctRowCondDC (DCTELEM * row)
 
             MAC16(b0, W5, row[5]);
             MAC16(b0, W7, row[7]);
-            
+
             MAC16(b1, -W1, row[5]);
             MAC16(b1, -W5, row[7]);
-            
+
             MAC16(b2, W7, row[5]);
             MAC16(b2, W3, row[7]);
-            
+
             MAC16(b3, W3, row[5]);
             MAC16(b3, -W1, row[7]);
-	}
-
-	row[0] = (a0 + b0) >> ROW_SHIFT;
-	row[7] = (a0 - b0) >> ROW_SHIFT;
-	row[1] = (a1 + b1) >> ROW_SHIFT;
-	row[6] = (a1 - b1) >> ROW_SHIFT;
-	row[2] = (a2 + b2) >> ROW_SHIFT;
-	row[5] = (a2 - b2) >> ROW_SHIFT;
-	row[3] = (a3 + b3) >> ROW_SHIFT;
-	row[4] = (a3 - b3) >> ROW_SHIFT;
+        }
+
+        row[0] = (a0 + b0) >> ROW_SHIFT;
+        row[7] = (a0 - b0) >> ROW_SHIFT;
+        row[1] = (a1 + b1) >> ROW_SHIFT;
+        row[6] = (a1 - b1) >> ROW_SHIFT;
+        row[2] = (a2 + b2) >> ROW_SHIFT;
+        row[5] = (a2 - b2) >> ROW_SHIFT;
+        row[3] = (a3 + b3) >> ROW_SHIFT;
+        row[4] = (a3 - b3) >> ROW_SHIFT;
 }
 
-static inline void idctSparseColPut (uint8_t *dest, int line_size, 
+static inline void idctSparseColPut (uint8_t *dest, int line_size,
                                      DCTELEM * col)
 {
-	int a0, a1, a2, a3, b0, b1, b2, b3;
+        int a0, a1, a2, a3, b0, b1, b2, b3;
         uint8_t *cm = cropTbl + MAX_NEG_CROP;
 
         /* XXX: I did that only to give same values as previous code */
-	a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
-	a1 = a0;
-	a2 = a0;
-	a3 = a0;
+        a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
+        a1 = a0;
+        a2 = a0;
+        a3 = a0;
 
         a0 +=  + W2*col[8*2];
         a1 +=  + W6*col[8*2];
@@ -205,33 +205,33 @@ static inline void idctSparseColPut (uint8_t *dest, int line_size,
         MAC16(b2, - W1, col[8*3]);
         MAC16(b3, - W5, col[8*3]);
 
-	if(col[8*4]){
+        if(col[8*4]){
             a0 += + W4*col[8*4];
             a1 += - W4*col[8*4];
             a2 += - W4*col[8*4];
             a3 += + W4*col[8*4];
-	}
+        }
 
-	if (col[8*5]) {
+        if (col[8*5]) {
             MAC16(b0, + W5, col[8*5]);
             MAC16(b1, - W1, col[8*5]);
             MAC16(b2, + W7, col[8*5]);
             MAC16(b3, + W3, col[8*5]);
-	}
+        }
 
-	if(col[8*6]){
+        if(col[8*6]){
             a0 += + W6*col[8*6];
             a1 += - W2*col[8*6];
             a2 += + W2*col[8*6];
             a3 += - W6*col[8*6];
-	}
+        }
 
-	if (col[8*7]) {
+        if (col[8*7]) {
             MAC16(b0, + W7, col[8*7]);
             MAC16(b1, - W5, col[8*7]);
             MAC16(b2, + W3, col[8*7]);
             MAC16(b3, - W1, col[8*7]);
-	}
+        }
 
         dest[0] = cm[(a0 + b0) >> COL_SHIFT];
         dest += line_size;
@@ -250,17 +250,17 @@ static inline void idctSparseColPut (uint8_t *dest, int line_size,
         dest[0] = cm[(a0 - b0) >> COL_SHIFT];
 }
 
-static inline void idctSparseColAdd (uint8_t *dest, int line_size, 
+static inline void idctSparseColAdd (uint8_t *dest, int line_size,
                                      DCTELEM * col)
 {
-	int a0, a1, a2, a3, b0, b1, b2, b3;
+        int a0, a1, a2, a3, b0, b1, b2, b3;
         uint8_t *cm = cropTbl + MAX_NEG_CROP;
 
         /* XXX: I did that only to give same values as previous code */
-	a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
-	a1 = a0;
-	a2 = a0;
-	a3 = a0;
+        a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
+        a1 = a0;
+        a2 = a0;
+        a3 = a0;
 
         a0 +=  + W2*col[8*2];
         a1 +=  + W6*col[8*2];
@@ -277,33 +277,33 @@ static inline void idctSparseColAdd (uint8_t *dest, int line_size,
         MAC16(b2, - W1, col[8*3]);
         MAC16(b3, - W5, col[8*3]);
 
-	if(col[8*4]){
+        if(col[8*4]){
             a0 += + W4*col[8*4];
             a1 += - W4*col[8*4];
             a2 += - W4*col[8*4];
             a3 += + W4*col[8*4];
-	}
+        }
 
-	if (col[8*5]) {
+        if (col[8*5]) {
             MAC16(b0, + W5, col[8*5]);
             MAC16(b1, - W1, col[8*5]);
             MAC16(b2, + W7, col[8*5]);
             MAC16(b3, + W3, col[8*5]);
-	}
+        }
 
-	if(col[8*6]){
+        if(col[8*6]){
             a0 += + W6*col[8*6];
             a1 += - W2*col[8*6];
             a2 += + W2*col[8*6];
             a3 += - W6*col[8*6];
-	}
+        }
 
-	if (col[8*7]) {
+        if (col[8*7]) {
             MAC16(b0, + W7, col[8*7]);
             MAC16(b1, - W5, col[8*7]);
             MAC16(b2, + W3, col[8*7]);
             MAC16(b3, - W1, col[8*7]);
-	}
+        }
 
         dest[0] = cm[dest[0] + ((a0 + b0) >> COL_SHIFT)];
         dest += line_size;
@@ -324,13 +324,13 @@ static inline void idctSparseColAdd (uint8_t *dest, int line_size,
 
 static inline void idctSparseCol (DCTELEM * col)
 {
-	int a0, a1, a2, a3, b0, b1, b2, b3;
+        int a0, a1, a2, a3, b0, b1, b2, b3;
 
         /* XXX: I did that only to give same values as previous code */
-	a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
-	a1 = a0;
-	a2 = a0;
-	a3 = a0;
+        a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4));
+        a1 = a0;
+        a2 = a0;
+        a3 = a0;
 
         a0 +=  + W2*col[8*2];
         a1 +=  + W6*col[8*2];
@@ -347,33 +347,33 @@ static inline void idctSparseCol (DCTELEM * col)
         MAC16(b2, - W1, col[8*3]);
         MAC16(b3, - W5, col[8*3]);
 
-	if(col[8*4]){
+        if(col[8*4]){
             a0 += + W4*col[8*4];
             a1 += - W4*col[8*4];
             a2 += - W4*col[8*4];
             a3 += + W4*col[8*4];
-	}
+        }
 
-	if (col[8*5]) {
+        if (col[8*5]) {
             MAC16(b0, + W5, col[8*5]);
             MAC16(b1, - W1, col[8*5]);
             MAC16(b2, + W7, col[8*5]);
             MAC16(b3, + W3, col[8*5]);
-	}
+        }
 
-	if(col[8*6]){
+        if(col[8*6]){
             a0 += + W6*col[8*6];
             a1 += - W2*col[8*6];
             a2 += + W2*col[8*6];
             a3 += - W6*col[8*6];
-	}
+        }
 
-	if (col[8*7]) {
+        if (col[8*7]) {
             MAC16(b0, + W7, col[8*7]);
             MAC16(b1, - W5, col[8*7]);
             MAC16(b2, + W3, col[8*7]);
             MAC16(b3, - W1, col[8*7]);
-	}
+        }
 
         col[0 ] = ((a0 + b0) >> COL_SHIFT);
         col[8 ] = ((a1 + b1) >> COL_SHIFT);
@@ -390,7 +390,7 @@ void simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
     int i;
     for(i=0; i<8; i++)
         idctRowCondDC(block + i*8);
-    
+
     for(i=0; i<8; i++)
         idctSparseColPut(dest + i, line_size, block + i);
 }
@@ -400,7 +400,7 @@ void simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
     int i;
     for(i=0; i<8; i++)
         idctRowCondDC(block + i*8);
-    
+
     for(i=0; i<8; i++)
         idctSparseColAdd(dest + i, line_size, block + i);
 }
@@ -410,7 +410,7 @@ void simple_idct(DCTELEM *block)
     int i;
     for(i=0; i<8; i++)
         idctRowCondDC(block + i*8);
-    
+
     for(i=0; i<8; i++)
         idctSparseCol(block + i);
 }
@@ -467,7 +467,7 @@ void simple_idct248_put(uint8_t *dest, int line_size, DCTELEM *block)
 {
     int i;
     DCTELEM *ptr;
-    
+
     /* butterfly */
     ptr = block;
     for(i=0;i<4;i++) {
diff --git a/src/libffmpeg/libavcodec/simple_idct.h b/src/libffmpeg/libavcodec/simple_idct.h
index 2da822771..64f410f0d 100644
--- a/src/libffmpeg/libavcodec/simple_idct.h
+++ b/src/libffmpeg/libavcodec/simple_idct.h
@@ -15,14 +15,14 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file simple_idct.h
  * simple idct header.
  */
- 
+
 void simple_idct_put(uint8_t *dest, int line_size, DCTELEM *block);
 void simple_idct_add(uint8_t *dest, int line_size, DCTELEM *block);
 void ff_simple_idct_mmx(int16_t *block);
diff --git a/src/libffmpeg/libavcodec/smc.c b/src/libffmpeg/libavcodec/smc.c
index dbb5adef1..a08beeacd 100644
--- a/src/libffmpeg/libavcodec/smc.c
+++ b/src/libffmpeg/libavcodec/smc.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -168,7 +168,7 @@ static void smc_decode_stream(SmcContext *s)
 
             /* figure out where the previous block started */
             if (pixel_ptr == 0)
-                prev_block_ptr1 = 
+                prev_block_ptr1 =
                     (row_ptr - s->avctx->width * 4) + s->avctx->width - 4;
             else
                 prev_block_ptr1 = row_ptr + pixel_ptr - 4;
@@ -195,14 +195,14 @@ static void smc_decode_stream(SmcContext *s)
 
             /* sanity check */
             if ((row_ptr == 0) && (pixel_ptr < 2 * 4)) {
-        	av_log(s->avctx, AV_LOG_INFO, "encountered repeat block opcode (%02X) but not enough blocks rendered yet\n",
+                av_log(s->avctx, AV_LOG_INFO, "encountered repeat block opcode (%02X) but not enough blocks rendered yet\n",
                     opcode & 0xF0);
                 break;
             }
 
             /* figure out where the previous 2 blocks started */
             if (pixel_ptr == 0)
-                prev_block_ptr1 = (row_ptr - s->avctx->width * 4) + 
+                prev_block_ptr1 = (row_ptr - s->avctx->width * 4) +
                     s->avctx->width - 4 * 2;
             else if (pixel_ptr == 4)
                 prev_block_ptr1 = (row_ptr - s->avctx->width * 4) + row_inc;
@@ -326,7 +326,7 @@ static void smc_decode_stream(SmcContext *s)
                 block_ptr = row_ptr + pixel_ptr;
                 for (pixel_y = 0; pixel_y < 4; pixel_y++) {
                     for (pixel_x = 0; pixel_x < 4; pixel_x++) {
-                        pixel = color_table_index + 
+                        pixel = color_table_index +
                             ((color_flags >> flag_mask) & 0x03);
                         flag_mask -= 2;
                         pixels[block_ptr++] = s->color_quads[pixel];
@@ -394,7 +394,7 @@ static void smc_decode_stream(SmcContext *s)
                         flag_mask = 21;
                     }
                     for (pixel_x = 0; pixel_x < 4; pixel_x++) {
-                        pixel = color_table_index + 
+                        pixel = color_table_index +
                             ((color_flags >> flag_mask) & 0x07);
                         flag_mask -= 3;
                         pixels[block_ptr++] = s->color_octets[pixel];
@@ -452,7 +452,7 @@ static int smc_decode_frame(AVCodecContext *avctx,
     s->size = buf_size;
 
     s->frame.reference = 1;
-    s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE | 
+    s->frame.buffer_hints = FF_BUFFER_HINTS_VALID | FF_BUFFER_HINTS_PRESERVE |
                             FF_BUFFER_HINTS_REUSABLE | FF_BUFFER_HINTS_READABLE;
     if (avctx->reget_buffer(avctx, &s->frame)) {
         av_log(s->avctx, AV_LOG_ERROR, "reget_buffer() failed\n");
diff --git a/src/libffmpeg/libavcodec/snow.c b/src/libffmpeg/libavcodec/snow.c
index 21a593f13..ad69c3241 100644
--- a/src/libffmpeg/libavcodec/snow.c
+++ b/src/libffmpeg/libavcodec/snow.c
@@ -13,7 +13,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #include "avcodec.h"
@@ -383,10 +383,19 @@ typedef struct BlockNode{
     uint8_t type;
 //#define TYPE_SPLIT    1
 #define BLOCK_INTRA   1
+#define BLOCK_OPT     2
 //#define TYPE_NOCOLOR  4
     uint8_t level; //FIXME merge into type?
 }BlockNode;
 
+static const BlockNode null_block= { //FIXME add border maybe
+    .color= {128,128,128},
+    .mx= 0,
+    .my= 0,
+    .type= 0,
+    .level= 0,
+};
+
 #define LOG2_MB_SIZE 4
 #define MB_SIZE (1<<LOG2_MB_SIZE)
 
@@ -433,7 +442,8 @@ typedef struct SnowContext{
     AVCodecContext *avctx;
     RangeCoder c;
     DSPContext dsp;
-    AVFrame input_picture;
+    AVFrame new_picture;
+    AVFrame input_picture;              ///< new_picture with the internal linesizes
     AVFrame current_picture;
     AVFrame last_picture;
     AVFrame mconly_picture;
@@ -463,6 +473,9 @@ typedef struct SnowContext{
     int block_max_depth;
     Plane plane[MAX_PLANES];
     BlockNode *block;
+#define ME_CACHE_SIZE 1024
+    int me_cache[ME_CACHE_SIZE];
+    int me_cache_generation;
     slice_buffer sb;
 
     MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
@@ -479,22 +492,24 @@ typedef struct {
 #define slice_buffer_get_line(slice_buf, line_num) ((slice_buf)->line[line_num] ? (slice_buf)->line[line_num] : slice_buffer_load_line((slice_buf), (line_num)))
 //#define slice_buffer_get_line(slice_buf, line_num) (slice_buffer_load_line((slice_buf), (line_num)))
 
+static void iterative_me(SnowContext *s);
+
 static void slice_buffer_init(slice_buffer * buf, int line_count, int max_allocated_lines, int line_width, DWTELEM * base_buffer)
 {
     int i;
-  
+
     buf->base_buffer = base_buffer;
     buf->line_count = line_count;
     buf->line_width = line_width;
     buf->data_count = max_allocated_lines;
     buf->line = (DWTELEM * *) av_mallocz (sizeof(DWTELEM *) * line_count);
     buf->data_stack = (DWTELEM * *) av_malloc (sizeof(DWTELEM *) * max_allocated_lines);
-  
+
     for (i = 0; i < max_allocated_lines; i++)
     {
       buf->data_stack[i] = (DWTELEM *) av_malloc (sizeof(DWTELEM) * line_width);
     }
-    
+
     buf->data_stack_top = max_allocated_lines - 1;
 }
 
@@ -502,21 +517,21 @@ static DWTELEM * slice_buffer_load_line(slice_buffer * buf, int line)
 {
     int offset;
     DWTELEM * buffer;
-  
-//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);  
-  
+
+//  av_log(NULL, AV_LOG_DEBUG, "Cache hit: %d\n", line);
+
     assert(buf->data_stack_top >= 0);
 //  assert(!buf->line[line]);
     if (buf->line[line])
         return buf->line[line];
-    
+
     offset = buf->line_width * line;
     buffer = buf->data_stack[buf->data_stack_top];
     buf->data_stack_top--;
     buf->line[line] = buffer;
-  
+
 //  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_load_line: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
-  
+
     return buffer;
 }
 
@@ -533,7 +548,7 @@ static void slice_buffer_release(slice_buffer * buf, int line)
     buf->data_stack_top++;
     buf->data_stack[buf->data_stack_top] = buffer;
     buf->line[line] = NULL;
-  
+
 //  av_log(NULL, AV_LOG_DEBUG, "slice_buffer_release: line: %d remaining: %d\n", line, buf->data_stack_top + 1);
 }
 
@@ -554,7 +569,7 @@ static void slice_buffer_destroy(slice_buffer * buf)
 {
     int i;
     slice_buffer_flush(buf);
-  
+
     for (i = buf->data_count - 1; i >= 0; i--)
     {
         assert(buf->data_stack[i]);
@@ -566,17 +581,19 @@ static void slice_buffer_destroy(slice_buffer * buf)
     av_free(buf->line);
 }
 
-#ifdef	__sgi
+#ifdef __sgi
 // Avoid a name clash on SGI IRIX
-#undef	qexp
+#undef qexp
 #endif
 #define QEXPSHIFT (7-FRAC_BITS+8) //FIXME try to change this to 0
 static uint8_t qexp[QROOT];
 
 static inline int mirror(int v, int m){
-    if     (v<0) return -v;
-    else if(v>m) return 2*m-v;
-    else         return v;
+    while((unsigned)v > (unsigned)m){
+        v=-v;
+        if(v<0) v+= 2*m;
+    }
+    return v;
 }
 
 static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signed){
@@ -586,7 +603,7 @@ static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signe
         const int a= ABS(v);
         const int e= av_log2(a);
 #if 1
-        const int el= FFMIN(e, 10);   
+        const int el= FFMIN(e, 10);
         put_rac(c, state+0, 0);
 
         for(i=0; i<el; i++){
@@ -607,7 +624,7 @@ static inline void put_symbol(RangeCoder *c, uint8_t *state, int v, int is_signe
         if(is_signed)
             put_rac(c, state+11 + el, v < 0); //11..21
 #else
-        
+
         put_rac(c, state+0, 0);
         if(e<=9){
             for(i=0; i<e; i++){
@@ -676,7 +693,7 @@ static inline void put_symbol2(RangeCoder *c, uint8_t *state, int v, int log2){
         if(log2>0) r+=r;
     }
     put_rac(c, state+4+log2, 0);
-    
+
     for(i=log2-1; i>=0; i--){
         put_rac(c, state+31-i, (v>>i)&1);
     }
@@ -694,7 +711,7 @@ static inline int get_symbol2(RangeCoder *c, uint8_t *state, int log2){
         log2++;
         if(log2>0) r+=r;
     }
-    
+
     for(i=log2-1; i>=0; i--){
         v+= get_rac(c, state+31-i)<<i;
     }
@@ -714,11 +731,11 @@ static always_inline void lift(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int dst
         dst += dst_step;
         src += src_step;
     }
-    
+
     for(i=0; i<w; i++){
         dst[i*dst_step] = LIFT(src[i*src_step], ((mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add)>>shift), inverse);
     }
-    
+
     if(mirror_right){
         dst[w*dst_step] = LIFT(src[w*src_step], ((mul*2*ref[w*ref_step]+add)>>shift), inverse);
     }
@@ -738,14 +755,14 @@ static always_inline void lift5(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int ds
         dst += dst_step;
         src += src_step;
     }
-    
+
     for(i=0; i<w; i++){
         int r= 3*(ref[i*ref_step] + ref[(i+1)*ref_step]);
         r += r>>4;
         r += r>>8;
         dst[i*dst_step] = LIFT(src[i*src_step], ((r+add)>>shift), inverse);
     }
-    
+
     if(mirror_right){
         int r= 3*2*ref[w*ref_step];
         r += r>>4;
@@ -767,11 +784,11 @@ static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int ds
         dst += dst_step;
         src += src_step;
     }
-    
+
     for(i=0; i<w; i++){
         dst[i*dst_step] = LIFTS(src[i*src_step], mul*(ref[i*ref_step] + ref[(i+1)*ref_step])+add, inverse);
     }
-    
+
     if(mirror_right){
         dst[w*dst_step] = LIFTS(src[w*src_step], mul*2*ref[w*ref_step]+add, inverse);
     }
@@ -780,7 +797,7 @@ static always_inline void liftS(DWTELEM *dst, DWTELEM *src, DWTELEM *ref, int ds
 
 static void inplace_lift(DWTELEM *dst, int width, int *coeffs, int n, int shift, int start, int inverse){
     int x, i;
-    
+
     for(x=start; x<width; x+=2){
         int64_t sum=0;
 
@@ -800,7 +817,7 @@ static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *
     for(y=start; y<height; y+=2){
         for(x=0; x<width; x++){
             int64_t sum=0;
-    
+
             for(i=0; i<n; i++){
                 int y2= y + 2*i - n + 1;
                 if     (y2<      0) y2= -y2;
@@ -859,7 +876,7 @@ static void inplace_liftV(DWTELEM *dst, int width, int height, int stride, int *
 #define N4 0
 #define SHIFT4 0
 #define COEFFS4 NULL
-#elif 1 // 11/5 
+#elif 1 // 11/5
 #define N1 0
 #define SHIFT1 1
 #define COEFFS1 NULL
@@ -937,7 +954,7 @@ static void horizontal_decomposeX(DWTELEM *b, int width){
     inplace_lift(b, width, COEFFS2, N2, SHIFT2, LX0, 0);
     inplace_lift(b, width, COEFFS3, N3, SHIFT3, LX1, 0);
     inplace_lift(b, width, COEFFS4, N4, SHIFT4, LX0, 0);
-    
+
     for(x=0; x<width2; x++){
         temp[x   ]= b[2*x    ];
         temp[x+w2]= b[2*x + 1];
@@ -969,7 +986,7 @@ static void horizontal_composeX(DWTELEM *b, int width){
 
 static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int stride){
     int x, y;
-  
+
     for(y=0; y<height; y++){
         for(x=0; x<width; x++){
             buffer[y*stride + x] *= SCALEX;
@@ -979,16 +996,16 @@ static void spatial_decomposeX(DWTELEM *buffer, int width, int height, int strid
     for(y=0; y<height; y++){
         horizontal_decomposeX(buffer + y*stride, width);
     }
-    
+
     inplace_liftV(buffer, width, height, stride, COEFFS1, N1, SHIFT1, LX1, 0);
     inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 0);
     inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 0);
-    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);    
+    inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 0);
 }
 
 static void spatial_composeX(DWTELEM *buffer, int width, int height, int stride){
     int x, y;
-  
+
     inplace_liftV(buffer, width, height, stride, COEFFS4, N4, SHIFT4, LX0, 1);
     inplace_liftV(buffer, width, height, stride, COEFFS3, N3, SHIFT3, LX1, 1);
     inplace_liftV(buffer, width, height, stride, COEFFS2, N2, SHIFT2, LX0, 1);
@@ -1048,7 +1065,7 @@ static void horizontal_decompose53i(DWTELEM *b, int width){
     b[width -1] = A3;
     b[width2-1] = A2;
     }
-#else        
+#else
     lift(b+w2, temp+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 0);
     lift(b   , temp   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 0);
 #endif
@@ -1056,7 +1073,7 @@ static void horizontal_decompose53i(DWTELEM *b, int width){
 
 static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
     int i;
-    
+
     for(i=0; i<width; i++){
         b1[i] -= (b0[i] + b2[i])>>1;
     }
@@ -1064,7 +1081,7 @@ static void vertical_decompose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int w
 
 static void vertical_decompose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
     int i;
-    
+
     for(i=0; i<width; i++){
         b1[i] += (b0[i] + b2[i] + 2)>>2;
     }
@@ -1074,21 +1091,21 @@ static void spatial_decompose53i(DWTELEM *buffer, int width, int height, int str
     int y;
     DWTELEM *b0= buffer + mirror(-2-1, height-1)*stride;
     DWTELEM *b1= buffer + mirror(-2  , height-1)*stride;
-  
+
     for(y=-2; y<height; y+=2){
         DWTELEM *b2= buffer + mirror(y+1, height-1)*stride;
         DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
 
 {START_TIMER
-        if(b1 <= b3)     horizontal_decompose53i(b2, width);
-        if(y+2 < height) horizontal_decompose53i(b3, width);
+        if(y+1<(unsigned)height) horizontal_decompose53i(b2, width);
+        if(y+2<(unsigned)height) horizontal_decompose53i(b3, width);
 STOP_TIMER("horizontal_decompose53i")}
-        
+
 {START_TIMER
-        if(b1 <= b3) vertical_decompose53iH0(b1, b2, b3, width);
-        if(b0 <= b2) vertical_decompose53iL0(b0, b1, b2, width);
+        if(y+1<(unsigned)height) vertical_decompose53iH0(b1, b2, b3, width);
+        if(y+0<(unsigned)height) vertical_decompose53iL0(b0, b1, b2, width);
 STOP_TIMER("vertical_decompose53i*")}
-        
+
         b0=b2;
         b1=b3;
     }
@@ -1177,7 +1194,7 @@ static void horizontal_decompose97i(DWTELEM *b, int width){
 
 static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
     int i;
-    
+
     for(i=0; i<width; i++){
         b1[i] -= (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
     }
@@ -1185,7 +1202,7 @@ static void vertical_decompose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int w
 
 static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
     int i;
-    
+
     for(i=0; i<width; i++){
 #ifdef lift5
         b1[i] += (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
@@ -1200,7 +1217,7 @@ static void vertical_decompose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int w
 
 static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
     int i;
-    
+
     for(i=0; i<width; i++){
 #ifdef liftS
         b1[i] -= (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
@@ -1212,7 +1229,7 @@ static void vertical_decompose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int w
 
 static void vertical_decompose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
     int i;
-    
+
     for(i=0; i<width; i++){
         b1[i] += (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
     }
@@ -1224,28 +1241,28 @@ static void spatial_decompose97i(DWTELEM *buffer, int width, int height, int str
     DWTELEM *b1= buffer + mirror(-4  , height-1)*stride;
     DWTELEM *b2= buffer + mirror(-4+1, height-1)*stride;
     DWTELEM *b3= buffer + mirror(-4+2, height-1)*stride;
-  
+
     for(y=-4; y<height; y+=2){
         DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
         DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
 
 {START_TIMER
-        if(b3 <= b5)     horizontal_decompose97i(b4, width);
-        if(y+4 < height) horizontal_decompose97i(b5, width);
+        if(y+3<(unsigned)height) horizontal_decompose97i(b4, width);
+        if(y+4<(unsigned)height) horizontal_decompose97i(b5, width);
 if(width>400){
 STOP_TIMER("horizontal_decompose97i")
 }}
-        
+
 {START_TIMER
-        if(b3 <= b5) vertical_decompose97iH0(b3, b4, b5, width);
-        if(b2 <= b4) vertical_decompose97iL0(b2, b3, b4, width);
-        if(b1 <= b3) vertical_decompose97iH1(b1, b2, b3, width);
-        if(b0 <= b2) vertical_decompose97iL1(b0, b1, b2, width);
+        if(y+3<(unsigned)height) vertical_decompose97iH0(b3, b4, b5, width);
+        if(y+2<(unsigned)height) vertical_decompose97iL0(b2, b3, b4, width);
+        if(y+1<(unsigned)height) vertical_decompose97iH1(b1, b2, b3, width);
+        if(y+0<(unsigned)height) vertical_decompose97iL1(b0, b1, b2, width);
 
 if(width>400){
 STOP_TIMER("vertical_decompose97i")
 }}
-        
+
         b0=b2;
         b1=b3;
         b2=b4;
@@ -1255,7 +1272,7 @@ STOP_TIMER("vertical_decompose97i")
 
 void ff_spatial_dwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
     int level;
-    
+
     for(level=0; level<decomposition_count; level++){
         switch(type){
         case 0: spatial_decompose97i(buffer, width>>level, height>>level, stride<<level); break;
@@ -1300,7 +1317,7 @@ static void horizontal_compose53i(DWTELEM *b, int width){
     A2 += (A1 + A3 + 2)>>2;
     b[width -1] = A3;
     b[width2-1] = A2;
-#else   
+#else
     lift(temp   , b   , b+w2, 1, 1, 1, width,  1, 2, 2, 0, 1);
     lift(temp+w2, b+w2, temp, 1, 1, 1, width, -1, 0, 1, 1, 1);
 #endif
@@ -1314,7 +1331,7 @@ static void horizontal_compose53i(DWTELEM *b, int width){
 
 static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
     int i;
-    
+
     for(i=0; i<width; i++){
         b1[i] += (b0[i] + b2[i])>>1;
     }
@@ -1322,7 +1339,7 @@ static void vertical_compose53iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int wid
 
 static void vertical_compose53iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
     int i;
-    
+
     for(i=0; i<width; i++){
         b1[i] -= (b0[i] + b2[i] + 2)>>2;
     }
@@ -1342,24 +1359,20 @@ static void spatial_compose53i_init(dwt_compose_t *cs, DWTELEM *buffer, int heig
 
 static void spatial_compose53i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
     int y= cs->y;
-    int mirror0 = mirror(y-1, height-1);
-    int mirror1 = mirror(y  , height-1);
-    int mirror2 = mirror(y+1, height-1);
-    int mirror3 = mirror(y+2, height-1);
-    
+
     DWTELEM *b0= cs->b0;
     DWTELEM *b1= cs->b1;
-    DWTELEM *b2= slice_buffer_get_line(sb, mirror2 * stride_line);
-    DWTELEM *b3= slice_buffer_get_line(sb, mirror3 * stride_line);
+    DWTELEM *b2= slice_buffer_get_line(sb, mirror(y+1, height-1) * stride_line);
+    DWTELEM *b3= slice_buffer_get_line(sb, mirror(y+2, height-1) * stride_line);
 
 {START_TIMER
-        if(mirror1 <= mirror3) vertical_compose53iL0(b1, b2, b3, width);
-        if(mirror0 <= mirror2) vertical_compose53iH0(b0, b1, b2, width);
+        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
+        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
 STOP_TIMER("vertical_compose53i*")}
 
 {START_TIMER
-        if(y-1 >= 0) horizontal_compose53i(b0, width);
-        if(mirror0 <= mirror2) horizontal_compose53i(b1, width);
+        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
+        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
 STOP_TIMER("horizontal_compose53i")}
 
     cs->b0 = b2;
@@ -1375,13 +1388,13 @@ static void spatial_compose53i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width,
     DWTELEM *b3= buffer + mirror(y+2, height-1)*stride;
 
 {START_TIMER
-        if(b1 <= b3) vertical_compose53iL0(b1, b2, b3, width);
-        if(b0 <= b2) vertical_compose53iH0(b0, b1, b2, width);
+        if(y+1<(unsigned)height) vertical_compose53iL0(b1, b2, b3, width);
+        if(y+0<(unsigned)height) vertical_compose53iH0(b0, b1, b2, width);
 STOP_TIMER("vertical_compose53i*")}
 
 {START_TIMER
-        if(y-1 >= 0) horizontal_compose53i(b0, width);
-        if(b0 <= b2) horizontal_compose53i(b1, width);
+        if(y-1<(unsigned)height) horizontal_compose53i(b0, width);
+        if(y+0<(unsigned)height) horizontal_compose53i(b1, width);
 STOP_TIMER("horizontal_compose53i")}
 
     cs->b0 = b2;
@@ -1394,9 +1407,9 @@ static void spatial_compose53i(DWTELEM *buffer, int width, int height, int strid
     spatial_compose53i_init(&cs, buffer, height, stride);
     while(cs.y <= height)
         spatial_compose53i_dy(&cs, buffer, width, height, stride);
-}   
+}
+
 
- 
 static void horizontal_compose97i(DWTELEM *b, int width){
     DWTELEM temp[width];
     const int w2= (width+1)>>1;
@@ -1409,7 +1422,7 @@ static void horizontal_compose97i(DWTELEM *b, int width){
 
 static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
     int i;
-    
+
     for(i=0; i<width; i++){
         b1[i] += (W_AM*(b0[i] + b2[i])+W_AO)>>W_AS;
     }
@@ -1417,7 +1430,7 @@ static void vertical_compose97iH0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int wid
 
 static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
     int i;
-    
+
     for(i=0; i<width; i++){
 #ifdef lift5
         b1[i] -= (W_CM*(b0[i] + b2[i])+W_CO)>>W_CS;
@@ -1432,7 +1445,7 @@ static void vertical_compose97iH1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int wid
 
 static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
     int i;
-    
+
     for(i=0; i<width; i++){
 #ifdef liftS
         b1[i] += (W_BM*(b0[i] + b2[i])+W_BO)>>W_BS;
@@ -1444,7 +1457,7 @@ static void vertical_compose97iL0(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int wid
 
 static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int width){
     int i;
-    
+
     for(i=0; i<width; i++){
         b1[i] -= (W_DM*(b0[i] + b2[i])+W_DO)>>W_DS;
     }
@@ -1452,7 +1465,7 @@ static void vertical_compose97iL1(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, int wid
 
 static void vertical_compose97i(DWTELEM *b0, DWTELEM *b1, DWTELEM *b2, DWTELEM *b3, DWTELEM *b4, DWTELEM *b5, int width){
     int i;
-    
+
     for(i=0; i<width; i++){
 #ifndef lift5
         int r;
@@ -1493,36 +1506,30 @@ static void spatial_compose97i_init(dwt_compose_t *cs, DWTELEM *buffer, int heig
 
 static void spatial_compose97i_dy_buffered(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line){
     int y = cs->y;
-    
-    int mirror0 = mirror(y - 1, height - 1);
-    int mirror1 = mirror(y + 0, height - 1);
-    int mirror2 = mirror(y + 1, height - 1);
-    int mirror3 = mirror(y + 2, height - 1);
-    int mirror4 = mirror(y + 3, height - 1);
-    int mirror5 = mirror(y + 4, height - 1);
+
     DWTELEM *b0= cs->b0;
     DWTELEM *b1= cs->b1;
     DWTELEM *b2= cs->b2;
     DWTELEM *b3= cs->b3;
-    DWTELEM *b4= slice_buffer_get_line(sb, mirror4 * stride_line);
-    DWTELEM *b5= slice_buffer_get_line(sb, mirror5 * stride_line);
-        
+    DWTELEM *b4= slice_buffer_get_line(sb, mirror(y + 3, height - 1) * stride_line);
+    DWTELEM *b5= slice_buffer_get_line(sb, mirror(y + 4, height - 1) * stride_line);
+
 {START_TIMER
     if(y>0 && y+4<height){
         vertical_compose97i(b0, b1, b2, b3, b4, b5, width);
     }else{
-        if(mirror3 <= mirror5) vertical_compose97iL1(b3, b4, b5, width);
-        if(mirror2 <= mirror4) vertical_compose97iH1(b2, b3, b4, width);
-        if(mirror1 <= mirror3) vertical_compose97iL0(b1, b2, b3, width);
-        if(mirror0 <= mirror2) vertical_compose97iH0(b0, b1, b2, width);
+        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
+        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
+        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
+        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
     }
 if(width>400){
 STOP_TIMER("vertical_compose97i")}}
 
 {START_TIMER
-        if(y-1>=  0) horizontal_compose97i(b0, width);
-        if(mirror0 <= mirror2) horizontal_compose97i(b1, width);
-if(width>400 && mirror0 <= mirror2){
+        if(y-1<(unsigned)height) horizontal_compose97i(b0, width);
+        if(y+0<(unsigned)height) horizontal_compose97i(b1, width);
+if(width>400 && y+0<(unsigned)height){
 STOP_TIMER("horizontal_compose97i")}}
 
     cs->b0=b2;
@@ -1541,25 +1548,17 @@ static void spatial_compose97i_dy(dwt_compose_t *cs, DWTELEM *buffer, int width,
     DWTELEM *b4= buffer + mirror(y+3, height-1)*stride;
     DWTELEM *b5= buffer + mirror(y+4, height-1)*stride;
 
-        if(stride == width && y+4 < height && 0){ 
-            int x;
-            for(x=0; x<width/2; x++)
-                b5[x] += 64*2;
-            for(; x<width; x++)
-                b5[x] += 169*2;
-        }
-        
 {START_TIMER
-        if(b3 <= b5) vertical_compose97iL1(b3, b4, b5, width);
-        if(b2 <= b4) vertical_compose97iH1(b2, b3, b4, width);
-        if(b1 <= b3) vertical_compose97iL0(b1, b2, b3, width);
-        if(b0 <= b2) vertical_compose97iH0(b0, b1, b2, width);
+        if(y+3<(unsigned)height) vertical_compose97iL1(b3, b4, b5, width);
+        if(y+2<(unsigned)height) vertical_compose97iH1(b2, b3, b4, width);
+        if(y+1<(unsigned)height) vertical_compose97iL0(b1, b2, b3, width);
+        if(y+0<(unsigned)height) vertical_compose97iH0(b0, b1, b2, width);
 if(width>400){
 STOP_TIMER("vertical_compose97i")}}
 
 {START_TIMER
-        if(y-1>=  0) horizontal_compose97i(b0, width);
-        if(b0 <= b2) horizontal_compose97i(b1, width);
+        if(y-1<(unsigned)height) horizontal_compose97i(b0, width);
+        if(y+0<(unsigned)height) horizontal_compose97i(b1, width);
 if(width>400 && b0 <= b2){
 STOP_TIMER("horizontal_compose97i")}}
 
@@ -1577,7 +1576,7 @@ static void spatial_compose97i(DWTELEM *buffer, int width, int height, int strid
         spatial_compose97i_dy(&cs, buffer, width, height, stride);
 }
 
-void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
+static void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int width, int height, int stride_line, int type, int decomposition_count){
     int level;
     for(level=decomposition_count-1; level>=0; level--){
         switch(type){
@@ -1590,7 +1589,7 @@ void ff_spatial_idwt_buffered_init(dwt_compose_t *cs, slice_buffer * sb, int wid
     }
 }
 
-void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
+static void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
     int level;
     for(level=decomposition_count-1; level>=0; level--){
         switch(type){
@@ -1602,7 +1601,7 @@ void ff_spatial_idwt_init(dwt_compose_t *cs, DWTELEM *buffer, int width, int hei
     }
 }
 
-void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
+static void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count, int y){
     const int support = type==1 ? 3 : 5;
     int level;
     if(type==2) return;
@@ -1620,7 +1619,7 @@ void ff_spatial_idwt_slice(dwt_compose_t *cs, DWTELEM *buffer, int width, int he
     }
 }
 
-void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
+static void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf, int width, int height, int stride_line, int type, int decomposition_count, int y){
     const int support = type==1 ? 3 : 5;
     int level;
     if(type==2) return;
@@ -1638,7 +1637,7 @@ void ff_spatial_idwt_buffered_slice(dwt_compose_t *cs, slice_buffer * slice_buf,
     }
 }
 
-void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
+static void ff_spatial_idwt(DWTELEM *buffer, int width, int height, int stride, int type, int decomposition_count){
     if(type==2){
         int level;
         for(level=decomposition_count-1; level>=0; level--)
@@ -1662,7 +1661,7 @@ static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELE
         int runs[w*h];
         int run_index=0;
         int max_index;
-                
+
         for(y=0; y<h; y++){
             for(x=0; x<w; x++){
                 int v, p=0;
@@ -1688,7 +1687,7 @@ static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELE
                 if(parent){
                     int px= x>>1;
                     int py= y>>1;
-                    if(px<b->parent->width && py<b->parent->height) 
+                    if(px<b->parent->width && py<b->parent->height)
                         p= parent[px + py*2*stride];
                 }
                 if(!(/*ll|*/l|lt|t|rt|p)){
@@ -1709,7 +1708,7 @@ static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELE
         put_symbol2(&s->c, b->state[30], max_index, 0);
         if(run_index <= max_index)
             put_symbol2(&s->c, b->state[1], run, 3);
-        
+
         for(y=0; y<h; y++){
             if(s->c.bytestream_end - s->c.bytestream < w*40){
                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
@@ -1739,7 +1738,7 @@ static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELE
                 if(parent){
                     int px= x>>1;
                     int py= y>>1;
-                    if(px<b->parent->width && py<b->parent->height) 
+                    if(px<b->parent->width && py<b->parent->height)
                         p= parent[px + py*2*stride];
                 }
                 if(/*ll|*/l|lt|t|rt|p){
@@ -1772,7 +1771,7 @@ static int encode_subband_c0run(SnowContext *s, SubBand *b, DWTELEM *src, DWTELE
     return 0;
 }
 
-static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){    
+static int encode_subband(SnowContext *s, SubBand *b, DWTELEM *src, DWTELEM *parent, int stride, int orientation){
 //    encode_subband_qtree(s, b, src, parent, stride, orientation);
 //    encode_subband_z0run(s, b, src, parent, stride, orientation);
     return encode_subband_c0run(s, b, src, parent, stride, orientation);
@@ -1783,7 +1782,7 @@ static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, i
     const int w= b->width;
     const int h= b->height;
     int x,y;
-    
+
     if(1){
         int run, runs;
         x_and_coeff *xc= b->x_coeff;
@@ -1806,7 +1805,7 @@ static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, i
             for(x=0; x<w; x++){
                 int p=0;
                 const int l= v;
-                
+
                 lt= t; t= rt;
 
                 if(y){
@@ -1832,7 +1831,7 @@ static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, i
                     if(v){
                         v= 2*(get_symbol2(&s->c, b->state[context + 2], context-4) + 1);
                         v+=get_rac(&s->c, &b->state[0][16 + 1 + 3 + quant3bA[l&0xFF] + 3*quant3bA[t&0xFF]]);
-                        
+
                         xc->x=x;
                         (xc++)->coeff= v;
                     }
@@ -1842,7 +1841,7 @@ static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, i
                         else           run= INT_MAX;
                         v= 2*(get_symbol2(&s->c, b->state[0 + 2], 0-4) + 1);
                         v+=get_rac(&s->c, &b->state[0][16 + 1 + 3]);
-                        
+
                         xc->x=x;
                         (xc++)->coeff= v;
                     }else{
@@ -1862,7 +1861,7 @@ static inline void unpack_coeffs(SnowContext *s, SubBand *b, SubBand * parent, i
             (xc++)->x= w+1; //end marker
             prev_xc= prev2_xc;
             prev2_xc= xc;
-            
+
             if(parent_xc){
                 if(y&1){
                     while(parent_xc->x != parent->width+1)
@@ -1886,7 +1885,7 @@ static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, sli
     int qmul= qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
     int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
     int new_index = 0;
-    
+
     START_TIMER
 
     if(b->buf == s->spatial_dwt_buffer || s->qlog == LOSSLESS_QLOG){
@@ -1898,7 +1897,7 @@ static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, sli
     if (start_y != 0)
         new_index = save_state[0];
 
-        
+
     for(y=start_y; y<h; y++){
         int x = 0;
         int v;
@@ -1919,10 +1918,10 @@ static inline void decode_subband_slice_buffered(SnowContext *s, SubBand *b, sli
     if(w > 200 && start_y != 0/*level+1 == s->spatial_decomposition_count*/){
         STOP_TIMER("decode_subband")
     }
-        
+
     /* Save our variables for the next slice. */
     save_state[0] = new_index;
-        
+
     return;
 }
 
@@ -1943,10 +1942,10 @@ static void reset_contexts(SnowContext *s){
 static int alloc_blocks(SnowContext *s){
     int w= -((-s->avctx->width )>>LOG2_MB_SIZE);
     int h= -((-s->avctx->height)>>LOG2_MB_SIZE);
-    
+
     s->b_width = w;
     s->b_height= h;
-    
+
     s->block= av_mallocz(w * h * sizeof(BlockNode) << (s->block_max_depth*2));
     return 0;
 }
@@ -1999,7 +1998,7 @@ static inline void set_blocks(SnowContext *s, int level, int x, int y, int l, in
     const int block_w= 1<<rem_depth;
     BlockNode block;
     int i,j;
-    
+
     block.color[0]= l;
     block.color[1]= cb;
     block.color[2]= cr;
@@ -2052,13 +2051,6 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
     const int rem_depth= s->block_max_depth - level;
     const int index= (x + y*w) << rem_depth;
     const int block_w= 1<<(LOG2_MB_SIZE - level);
-    static BlockNode null_block= { //FIXME add border maybe
-        .color= {128,128,128},
-        .mx= 0,
-        .my= 0,
-        .type= 0,
-        .level= 0,
-    };
     int trx= (x+1)<<rem_depth;
     int try= (y+1)<<rem_depth;
     BlockNode *left  = x ? &s->block[index-1] : &null_block;
@@ -2073,16 +2065,12 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
     int pmx= mid_pred(left->mx, top->mx, tr->mx);
     int pmy= mid_pred(left->my, top->my, tr->my);
     int mx=0, my=0;
-    int l,cr,cb, i;
+    int l,cr,cb;
     const int stride= s->current_picture.linesize[0];
     const int uvstride= s->current_picture.linesize[1];
-    const int instride= s->input_picture.linesize[0];
-    const int uvinstride= s->input_picture.linesize[1];
-    uint8_t *new_l = s->input_picture.data[0] + (x + y*  instride)*block_w;
-    uint8_t *new_cb= s->input_picture.data[1] + (x + y*uvinstride)*block_w/2;
-    uint8_t *new_cr= s->input_picture.data[2] + (x + y*uvinstride)*block_w/2;
-    uint8_t current_mb[3][stride*block_w];
-    uint8_t *current_data[3]= {&current_mb[0][0], &current_mb[1][0], &current_mb[2][0]};
+    uint8_t *current_data[3]= { s->input_picture.data[0] + (x + y*  stride)*block_w,
+                                s->input_picture.data[1] + (x + y*uvstride)*block_w/2,
+                                s->input_picture.data[2] + (x + y*uvstride)*block_w/2};
     int P[10][2];
     int16_t last_mv[3][2];
     int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
@@ -2098,14 +2086,6 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
         return 0;
     }
 
-    //FIXME optimize
-    for(i=0; i<block_w; i++)
-        memcpy(&current_mb[0][0] +   stride*i, new_l  +   instride*i, block_w);
-    for(i=0; i<block_w>>1; i++)
-        memcpy(&current_mb[1][0] + uvstride*i, new_cb + uvinstride*i, block_w>>1);
-    for(i=0; i<block_w>>1; i++)
-        memcpy(&current_mb[2][0] + uvstride*i, new_cr + uvinstride*i, block_w>>1);
-
 //    clip predictors / edge ?
 
     P_LEFT[0]= left->mx;
@@ -2114,36 +2094,36 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
     P_TOP [1]= top->my;
     P_TOPRIGHT[0]= tr->mx;
     P_TOPRIGHT[1]= tr->my;
-    
+
     last_mv[0][0]= s->block[index].mx;
     last_mv[0][1]= s->block[index].my;
     last_mv[1][0]= right->mx;
     last_mv[1][1]= right->my;
     last_mv[2][0]= bottom->mx;
     last_mv[2][1]= bottom->my;
-    
+
     s->m.mb_stride=2;
-    s->m.mb_x= 
+    s->m.mb_x=
     s->m.mb_y= 0;
     s->m.me.skip= 0;
 
     init_ref(c, current_data, s->last_picture.data, NULL, block_w*x, block_w*y, 0);
-    
+
     assert(s->m.me.  stride ==   stride);
     assert(s->m.me.uvstride == uvstride);
-    
+
     c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
     c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
     c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
     c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
-    
+
     c->xmin = - x*block_w - 16+2;
     c->ymin = - y*block_w - 16+2;
     c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
     c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-2;
 
     if(P_LEFT[0]     > (c->xmax<<shift)) P_LEFT[0]    = (c->xmax<<shift);
-    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift); 
+    if(P_LEFT[1]     > (c->ymax<<shift)) P_LEFT[1]    = (c->ymax<<shift);
     if(P_TOP[0]      > (c->xmax<<shift)) P_TOP[0]     = (c->xmax<<shift);
     if(P_TOP[1]      > (c->ymax<<shift)) P_TOP[1]     = (c->ymax<<shift);
     if(P_TOPRIGHT[0] < (c->xmin<<shift)) P_TOPRIGHT[0]= (c->xmin<<shift);
@@ -2161,18 +2141,18 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
         c->pred_y = P_MEDIAN[1];
     }
 
-    score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv, 
+    score= ff_epzs_motion_search(&s->m, &mx, &my, P, 0, /*ref_index*/ 0, last_mv,
                              (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
 
     assert(mx >= c->xmin);
     assert(mx <= c->xmax);
     assert(my >= c->ymin);
     assert(my <= c->ymax);
-    
+
     score= s->m.me.sub_motion_search(&s->m, &mx, &my, score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
     score= ff_get_mb_score(&s->m, mx, my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
     //FIXME if mb_cmp != SSE then intra cant be compared currently and mb_penalty vs. lambda2
-                             
+
   //  subpel search
     pc= s->c;
     pc.bytestream_start=
@@ -2191,15 +2171,15 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
              ))>>FF_LAMBDA_SHIFT;
 
     block_s= block_w*block_w;
-    sum = pix_sum(&current_mb[0][0], stride, block_w);
+    sum = pix_sum(current_data[0], stride, block_w);
     l= (sum + block_s/2)/block_s;
-    iscore = pix_norm1(&current_mb[0][0], stride, block_w) - 2*l*sum + l*l*block_s;
-    
+    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
+
     block_s= block_w*block_w>>2;
-    sum = pix_sum(&current_mb[1][0], uvstride, block_w>>1);
+    sum = pix_sum(current_data[1], uvstride, block_w>>1);
     cb= (sum + block_s/2)/block_s;
 //    iscore += pix_norm1(&current_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
-    sum = pix_sum(&current_mb[2][0], uvstride, block_w>>1);
+    sum = pix_sum(current_data[2], uvstride, block_w>>1);
     cr= (sum + block_s/2)/block_s;
 //    iscore += pix_norm1(&current_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
 
@@ -2233,7 +2213,7 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
         else
             c->scene_change_score+= s->m.qscale;
     }
-        
+
     if(level!=s->block_max_depth){
         put_rac(&s->c, &s->block_state[4 + s_context], 0);
         score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
@@ -2241,11 +2221,11 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
         score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
         score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
         score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
-    
+
         if(score2 < score && score2 < iscore)
             return score2;
     }
-    
+
     if(iscore < score){
         memcpy(pbbak, i_buffer, i_len);
         s->c= ic;
@@ -2266,24 +2246,75 @@ static int encode_q_branch(SnowContext *s, int level, int x, int y){
 }
 #endif
 
+static always_inline int same_block(BlockNode *a, BlockNode *b){
+    if((a->type&BLOCK_INTRA) && (b->type&BLOCK_INTRA)){
+        return !((a->color[0] - b->color[0]) | (a->color[1] - b->color[1]) | (a->color[2] - b->color[2]));
+    }else{
+        return !((a->mx - b->mx) | (a->my - b->my) | ((a->type ^ b->type)&BLOCK_INTRA));
+    }
+}
+
+static void encode_q_branch2(SnowContext *s, int level, int x, int y){
+    const int w= s->b_width  << s->block_max_depth;
+    const int rem_depth= s->block_max_depth - level;
+    const int index= (x + y*w) << rem_depth;
+    int trx= (x+1)<<rem_depth;
+    BlockNode *b= &s->block[index];
+    BlockNode *left  = x ? &s->block[index-1] : &null_block;
+    BlockNode *top   = y ? &s->block[index-w] : &null_block;
+    BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
+    BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
+    int pl = left->color[0];
+    int pcb= left->color[1];
+    int pcr= left->color[2];
+    int pmx= mid_pred(left->mx, top->mx, tr->mx);
+    int pmy= mid_pred(left->my, top->my, tr->my);
+    int mx_context= av_log2(2*ABS(left->mx - top->mx));
+    int my_context= av_log2(2*ABS(left->my - top->my));
+    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
+
+    if(s->keyframe){
+        set_blocks(s, level, x, y, pl, pcb, pcr, pmx, pmy, BLOCK_INTRA);
+        return;
+    }
+
+    if(level!=s->block_max_depth){
+        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
+            put_rac(&s->c, &s->block_state[4 + s_context], 1);
+        }else{
+            put_rac(&s->c, &s->block_state[4 + s_context], 0);
+            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
+            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
+            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
+            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
+            return;
+        }
+    }
+    if(b->type & BLOCK_INTRA){
+        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
+        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
+        put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
+        put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
+        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, BLOCK_INTRA);
+    }else{
+        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
+        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
+        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
+        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, 0);
+    }
+}
+
 static void decode_q_branch(SnowContext *s, int level, int x, int y){
     const int w= s->b_width << s->block_max_depth;
     const int rem_depth= s->block_max_depth - level;
     const int index= (x + y*w) << rem_depth;
-    static BlockNode null_block= { //FIXME add border maybe
-        .color= {128,128,128},
-        .mx= 0,
-        .my= 0,
-        .type= 0,
-        .level= 0,
-    };
     int trx= (x+1)<<rem_depth;
     BlockNode *left  = x ? &s->block[index-1] : &null_block;
     BlockNode *top   = y ? &s->block[index-w] : &null_block;
     BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
     BlockNode *tr    = y && trx<w && ((x&1)==0 || level==0) ? &s->block[index-w+(1<<rem_depth)] : tl; //FIXME use lt
     int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
-    
+
     if(s->keyframe){
         set_blocks(s, level, x, y, null_block.color[0], null_block.color[1], null_block.color[2], null_block.mx, null_block.my, BLOCK_INTRA);
         return;
@@ -2298,7 +2329,7 @@ static void decode_q_branch(SnowContext *s, int level, int x, int y){
         int my= mid_pred(left->my, top->my, tr->my);
         int mx_context= av_log2(2*ABS(left->mx - top->mx)) + 0*av_log2(2*ABS(tr->mx - top->mx));
         int my_context= av_log2(2*ABS(left->my - top->my)) + 0*av_log2(2*ABS(tr->my - top->my));
-        
+
         type= get_rac(&s->c, &s->block_state[1 + left->type + top->type]) ? BLOCK_INTRA : 0;
 
         if(type){
@@ -2324,13 +2355,19 @@ static void encode_blocks(SnowContext *s){
     int w= s->b_width;
     int h= s->b_height;
 
+    if(s->avctx->me_method == ME_ITER && !s->keyframe)
+        iterative_me(s);
+
     for(y=0; y<h; y++){
         if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
             av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
             return;
         }
         for(x=0; x<w; x++){
-            encode_q_branch(s, 0, x, y);
+            if(s->avctx->me_method == ME_ITER)
+                encode_q_branch2(s, 0, x, y);
+            else
+                encode_q_branch (s, 0, x, y);
         }
     }
 }
@@ -2369,10 +2406,10 @@ START_TIMER
 
             if(dx<8) am = (32*a2*( 8-dx) +    am* dx    + 128)>>8;
             else     am = (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
-            
+
             /* FIXME Try increasing tmp buffer to 16 bits and not clipping here. Should give marginally better results. - Robert*/
             if(am&(~255)) am= ~(am>>31);
-            
+
             tmp[x] = am;
 
 /*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
@@ -2384,7 +2421,7 @@ START_TIMER
         src += stride;
     }
     tmp -= (b_h+5)*stride;
-    
+
     for(y=0; y < b_h; y++){
         for(x=0; x < b_w; x++){
             int a0= tmp[x + 0*stride];
@@ -2397,14 +2434,14 @@ START_TIMER
 //            int am= 18*(a2+a3) - 2*(a1+a4);
 /*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
             int aR= (-7*a3 + 105*a2 + 35*a1 - 5*a0)>>3;*/
-            
+
 //            if(b_w==16) am= 8*(a1+a2);
 
             if(dy<8) am =  (32*a2*( 8-dy) +    am* dy    + 128)>>8;
             else     am = (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
 
             if(am&(~255)) am= ~(am>>31);
-            
+
             dst[x] = am;
 /*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
             else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
@@ -2434,12 +2471,42 @@ mca( 0, 8,8)
 mca( 8, 8,8)
 
 static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int sx, int sy, int b_w, int b_h, BlockNode *block, int plane_index, int w, int h){
-    if(block->type){
+    if(block->type & BLOCK_INTRA){
         int x, y;
-        const int color= block->color[plane_index];
-        for(y=0; y < b_h; y++){
-            for(x=0; x < b_w; x++){
-                dst[x + y*stride]= color;
+        const int color = block->color[plane_index];
+        const int color4= color*0x01010101;
+        if(b_w==32){
+            for(y=0; y < b_h; y++){
+                *(uint32_t*)&dst[0 + y*stride]= color4;
+                *(uint32_t*)&dst[4 + y*stride]= color4;
+                *(uint32_t*)&dst[8 + y*stride]= color4;
+                *(uint32_t*)&dst[12+ y*stride]= color4;
+                *(uint32_t*)&dst[16+ y*stride]= color4;
+                *(uint32_t*)&dst[20+ y*stride]= color4;
+                *(uint32_t*)&dst[24+ y*stride]= color4;
+                *(uint32_t*)&dst[28+ y*stride]= color4;
+            }
+        }else if(b_w==16){
+            for(y=0; y < b_h; y++){
+                *(uint32_t*)&dst[0 + y*stride]= color4;
+                *(uint32_t*)&dst[4 + y*stride]= color4;
+                *(uint32_t*)&dst[8 + y*stride]= color4;
+                *(uint32_t*)&dst[12+ y*stride]= color4;
+            }
+        }else if(b_w==8){
+            for(y=0; y < b_h; y++){
+                *(uint32_t*)&dst[0 + y*stride]= color4;
+                *(uint32_t*)&dst[4 + y*stride]= color4;
+            }
+        }else if(b_w==4){
+            for(y=0; y < b_h; y++){
+                *(uint32_t*)&dst[0 + y*stride]= color4;
+            }
+        }else{
+            for(y=0; y < b_h; y++){
+                for(x=0; x < b_w; x++){
+                    dst[x + y*stride]= color;
+                }
             }
         }
     }else{
@@ -2448,6 +2515,7 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp,
         int my= block->my*scale;
         const int dx= mx&15;
         const int dy= my&15;
+        const int tab_index= 3 - (b_w>>2) + (b_w>>4);
         sx += (mx>>4) - 2;
         sy += (my>>4) - 2;
         src += sx + sy*stride;
@@ -2456,17 +2524,31 @@ static void pred_block(SnowContext *s, uint8_t *dst, uint8_t *src, uint8_t *tmp,
             ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
             src= tmp + MB_SIZE;
         }
-        if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
+        assert(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h);
+        assert(!(b_w&(b_w-1)));
+        assert(b_w>1 && b_h>1);
+        assert(tab_index>=0 && tab_index<4 || b_w==32);
+        if((dx&3) || (dy&3))
             mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
-        else
-            s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
+        else if(b_w==32){
+            int y;
+            for(y=0; y<b_h; y+=16){
+                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
+                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
+            }
+        }else if(b_w==b_h)
+            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
+        else if(b_w==2*b_h){
+            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 2       + 2*stride,stride);
+            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
+        }else{
+            assert(2*b_w==b_h);
+            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 2 + 2*stride           ,stride);
+            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
+        }
     }
 }
 
-static always_inline int same_block(BlockNode *a, BlockNode *b){
-    return !((a->mx - b->mx) | (a->my - b->my) | a->type | b->type);
-}
-
 //FIXME name clenup (b_w, block_w, b_width stuff)
 static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb, DWTELEM *old_dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
     DWTELEM * dst = NULL;
@@ -2477,7 +2559,7 @@ static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb,
     BlockNode *rt= lt+1;
     BlockNode *lb= lt+b_stride;
     BlockNode *rb= lb+1;
-    uint8_t *block[4]; 
+    uint8_t *block[4];
     int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
     uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
     uint8_t *ptmp;
@@ -2497,7 +2579,7 @@ static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb,
         lb= lt;
         rb= rt;
     }
-        
+
     if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
         obmc -= src_x;
         b_w += src_x;
@@ -2512,7 +2594,7 @@ static always_inline void add_yblock_buffered(SnowContext *s, slice_buffer * sb,
     }else if(src_y + b_h> h){
         b_h = h - src_y;
     }
-    
+
     if(b_w<=0 || b_h<=0) return;
 
 assert(src_stride > 2*MB_SIZE + 5);
@@ -2523,7 +2605,7 @@ assert(src_stride > 2*MB_SIZE + 5);
     ptmp= tmp + 3*tmp_step;
     block[0]= ptmp;
     ptmp+=tmp_step;
-    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);    
+    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
 
     if(same_block(lt, rt)){
         block[1]= block[0];
@@ -2532,7 +2614,7 @@ assert(src_stride > 2*MB_SIZE + 5);
         ptmp+=tmp_step;
         pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
     }
-        
+
     if(same_block(lt, lb)){
         block[2]= block[0];
     }else if(same_block(rt, lb)){
@@ -2590,7 +2672,7 @@ assert(src_stride > 2*MB_SIZE + 5);
 {
 
     START_TIMER
-    
+
     for(y=0; y<b_h; y++){
         //FIXME ugly missue of obmc_stride
         uint8_t *obmc1= obmc + y*obmc_stride;
@@ -2627,7 +2709,7 @@ assert(src_stride > 2*MB_SIZE + 5);
 }
 
 //FIXME name clenup (b_w, block_w, b_width stuff)
-static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){
+static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int offset_dst, int plane_index){
     const int b_width = s->b_width  << s->block_max_depth;
     const int b_height= s->b_height << s->block_max_depth;
     const int b_stride= b_width;
@@ -2635,7 +2717,7 @@ static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8
     BlockNode *rt= lt+1;
     BlockNode *lb= lt+b_stride;
     BlockNode *rb= lb+1;
-    uint8_t *block[4]; 
+    uint8_t *block[4];
     int tmp_step= src_stride >= 7*MB_SIZE ? MB_SIZE : MB_SIZE*src_stride;
     uint8_t tmp[src_stride*7*MB_SIZE]; //FIXME align
     uint8_t *ptmp;
@@ -2655,10 +2737,12 @@ static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8
         lb= lt;
         rb= rt;
     }
-        
+
     if(src_x<0){ //FIXME merge with prev & always round internal width upto *16
         obmc -= src_x;
         b_w += src_x;
+        if(!offset_dst)
+            dst -= src_x;
         src_x=0;
     }else if(src_x + b_w > w){
         b_w = w - src_x;
@@ -2666,22 +2750,25 @@ static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8
     if(src_y<0){
         obmc -= src_y*obmc_stride;
         b_h += src_y;
+        if(!offset_dst)
+            dst -= src_y*dst_stride;
         src_y=0;
     }else if(src_y + b_h> h){
         b_h = h - src_y;
     }
-    
+
     if(b_w<=0 || b_h<=0) return;
 
 assert(src_stride > 2*MB_SIZE + 5);
-    dst += src_x + src_y*dst_stride;
+    if(offset_dst)
+        dst += src_x + src_y*dst_stride;
     dst8+= src_x + src_y*src_stride;
 //    src += src_x + src_y*src_stride;
 
     ptmp= tmp + 3*tmp_step;
     block[0]= ptmp;
     ptmp+=tmp_step;
-    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);    
+    pred_block(s, block[0], src, tmp, src_stride, src_x, src_y, b_w, b_h, lt, plane_index, w, h);
 
     if(same_block(lt, rt)){
         block[1]= block[0];
@@ -2690,7 +2777,7 @@ assert(src_stride > 2*MB_SIZE + 5);
         ptmp+=tmp_step;
         pred_block(s, block[1], src, tmp, src_stride, src_x, src_y, b_w, b_h, rt, plane_index, w, h);
     }
-        
+
     if(same_block(lt, lb)){
         block[2]= block[0];
     }else if(same_block(rt, lb)){
@@ -2756,7 +2843,7 @@ assert(src_stride > 2*MB_SIZE + 5);
                     +obmc2[x] * block[2][x + y*src_stride]
                     +obmc3[x] * block[1][x + y*src_stride]
                     +obmc4[x] * block[0][x + y*src_stride];
-            
+
             v <<= 8 - LOG2_OBMC_MAX;
             if(FRAC_BITS != 8){
                 v += 1<<(7 - FRAC_BITS);
@@ -2790,7 +2877,7 @@ static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer *
     int w= p->width;
     int h= p->height;
     START_TIMER
-    
+
     if(s->keyframe || (s->avctx->debug&512)){
         if(mb_y==mb_h)
             return;
@@ -2824,11 +2911,11 @@ static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer *
 
         return;
     }
-    
+
         for(mb_x=0; mb_x<=mb_w; mb_x++){
             START_TIMER
 
-            add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc, 
+            add_yblock_buffered(s, sb, old_buffer, dst8, ref, obmc,
                        block_w*mb_x - block_w/2,
                        block_w*mb_y - block_w/2,
                        block_w, block_w,
@@ -2836,10 +2923,10 @@ static always_inline void predict_slice_buffered(SnowContext *s, slice_buffer *
                        w, ref_stride, obmc_stride,
                        mb_x - 1, mb_y - 1,
                        add, plane_index);
-            
+
             STOP_TIMER("add_yblock")
         }
-    
+
     STOP_TIMER("predict_slice")
 }
 
@@ -2851,14 +2938,14 @@ static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_
     int block_size = MB_SIZE >> s->block_max_depth;
     int block_w    = plane_index ? block_size/2 : block_size;
     const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
-    int obmc_stride= plane_index ? block_size : 2*block_size;
+    const int obmc_stride= plane_index ? block_size : 2*block_size;
     int ref_stride= s->current_picture.linesize[plane_index];
     uint8_t *ref  = s->last_picture.data[plane_index];
     uint8_t *dst8= s->current_picture.data[plane_index];
     int w= p->width;
     int h= p->height;
     START_TIMER
-    
+
     if(s->keyframe || (s->avctx->debug&512)){
         if(mb_y==mb_h)
             return;
@@ -2882,22 +2969,22 @@ static always_inline void predict_slice(SnowContext *s, DWTELEM *buf, int plane_
 
         return;
     }
-    
+
         for(mb_x=0; mb_x<=mb_w; mb_x++){
             START_TIMER
 
-            add_yblock(s, buf, dst8, ref, obmc, 
+            add_yblock(s, buf, dst8, ref, obmc,
                        block_w*mb_x - block_w/2,
                        block_w*mb_y - block_w/2,
                        block_w, block_w,
                        w, h,
                        w, ref_stride, obmc_stride,
                        mb_x - 1, mb_y - 1,
-                       add, plane_index);
-            
+                       add, 1, plane_index);
+
             STOP_TIMER("add_yblock")
         }
-    
+
     STOP_TIMER("predict_slice")
 }
 
@@ -2908,6 +2995,501 @@ static always_inline void predict_plane(SnowContext *s, DWTELEM *buf, int plane_
         predict_slice(s, buf, plane_index, add, mb_y);
 }
 
+static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
+    int i, x2, y2;
+    Plane *p= &s->plane[plane_index];
+    const int block_size = MB_SIZE >> s->block_max_depth;
+    const int block_w    = plane_index ? block_size/2 : block_size;
+    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
+    const int obmc_stride= plane_index ? block_size : 2*block_size;
+    const int ref_stride= s->current_picture.linesize[plane_index];
+    uint8_t *ref= s->   last_picture.data[plane_index];
+    uint8_t *src= s-> input_picture.data[plane_index];
+    DWTELEM *dst= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
+    const int b_stride = s->b_width << s->block_max_depth;
+    const int w= p->width;
+    const int h= p->height;
+    int index= mb_x + mb_y*b_stride;
+    BlockNode *b= &s->block[index];
+    BlockNode backup= *b;
+    int ab=0;
+    int aa=0;
+
+    b->type|= BLOCK_INTRA;
+    b->color[plane_index]= 0;
+    memset(dst, 0, obmc_stride*obmc_stride*sizeof(DWTELEM));
+
+    for(i=0; i<4; i++){
+        int mb_x2= mb_x + (i &1) - 1;
+        int mb_y2= mb_y + (i>>1) - 1;
+        int x= block_w*mb_x2 + block_w/2;
+        int y= block_w*mb_y2 + block_w/2;
+
+        add_yblock(s, dst + ((i&1)+(i>>1)*obmc_stride)*block_w, NULL, ref, obmc,
+                    x, y, block_w, block_w, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
+
+        for(y2= FFMAX(y, 0); y2<FFMIN(h, y+block_w); y2++){
+            for(x2= FFMAX(x, 0); x2<FFMIN(w, x+block_w); x2++){
+                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_w*mb_y - block_w/2))*obmc_stride;
+                int obmc_v= obmc[index];
+                int d;
+                if(y<0) obmc_v += obmc[index + block_w*obmc_stride];
+                if(x<0) obmc_v += obmc[index + block_w];
+                if(y+block_w>h) obmc_v += obmc[index - block_w*obmc_stride];
+                if(x+block_w>w) obmc_v += obmc[index - block_w];
+                //FIXME precalc this or simplify it somehow else
+
+                d = -dst[index] + (1<<(FRAC_BITS-1));
+                dst[index] = d;
+                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
+                aa += obmc_v * obmc_v; //FIXME precalclate this
+            }
+        }
+    }
+    *b= backup;
+
+    return clip(((ab<<6) + aa/2)/aa, 0, 255); //FIXME we shouldnt need cliping
+}
+
+static inline int get_block_bits(SnowContext *s, int x, int y, int w){
+    const int b_stride = s->b_width << s->block_max_depth;
+    const int b_height = s->b_height<< s->block_max_depth;
+    int index= x + y*b_stride;
+    BlockNode *b     = &s->block[index];
+    BlockNode *left  = x ? &s->block[index-1] : &null_block;
+    BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
+    BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
+    BlockNode *tr    = y && x+w<b_stride ? &s->block[index-b_stride+w] : tl;
+    int dmx, dmy;
+//  int mx_context= av_log2(2*ABS(left->mx - top->mx));
+//  int my_context= av_log2(2*ABS(left->my - top->my));
+
+    if(x<0 || x>=b_stride || y>=b_height)
+        return 0;
+    dmx= b->mx - mid_pred(left->mx, top->mx, tr->mx);
+    dmy= b->my - mid_pred(left->my, top->my, tr->my);
+/*
+1            0      0
+01X          1-2    1
+001XX        3-6    2-3
+0001XXX      7-14   4-7
+00001XXXX   15-30   8-15
+*/
+//FIXME try accurate rate
+//FIXME intra and inter predictors if surrounding blocks arent the same type
+    if(b->type & BLOCK_INTRA){
+        return 3+2*( av_log2(2*ABS(left->color[0] - b->color[0]))
+                   + av_log2(2*ABS(left->color[1] - b->color[1]))
+                   + av_log2(2*ABS(left->color[2] - b->color[2])));
+    }else
+        return 2*(1 + av_log2(2*ABS(dmx))
+                    + av_log2(2*ABS(dmy))); //FIXME kill the 2* can be merged in lambda
+}
+
+static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, const uint8_t *obmc_edged){
+    Plane *p= &s->plane[plane_index];
+    const int block_size = MB_SIZE >> s->block_max_depth;
+    const int block_w    = plane_index ? block_size/2 : block_size;
+    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
+    const int obmc_stride= plane_index ? block_size : 2*block_size;
+    const int ref_stride= s->current_picture.linesize[plane_index];
+    uint8_t *ref= s->   last_picture.data[plane_index];
+    uint8_t *dst= s->current_picture.data[plane_index];
+    uint8_t *src= s->  input_picture.data[plane_index];
+    DWTELEM *pred= (DWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
+    uint8_t cur[ref_stride*2*MB_SIZE]; //FIXME alignment
+    uint8_t tmp[ref_stride*(2*MB_SIZE+5)];
+    const int b_stride = s->b_width << s->block_max_depth;
+    const int b_height = s->b_height<< s->block_max_depth;
+    const int w= p->width;
+    const int h= p->height;
+    int distortion;
+    int rate= 0;
+    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
+    int sx= block_w*mb_x - block_w/2;
+    int sy= block_w*mb_y - block_w/2;
+    const int x0= FFMAX(0,-sx);
+    const int y0= FFMAX(0,-sy);
+    const int x1= FFMIN(block_w*2, w-sx);
+    const int y1= FFMIN(block_w*2, h-sy);
+    int i,x,y;
+
+    pred_block(s, cur, ref, tmp, ref_stride, sx, sy, block_w*2, block_w*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
+
+    for(y=y0; y<y1; y++){
+        const uint8_t *obmc1= obmc_edged + y*obmc_stride;
+        const DWTELEM *pred1 = pred + y*obmc_stride;
+        uint8_t *cur1 = cur + y*ref_stride;
+        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
+        for(x=x0; x<x1; x++){
+            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
+            v = (v + pred1[x]) >> FRAC_BITS;
+            if(v&(~255)) v= ~(v>>31);
+            dst1[x] = v;
+        }
+    }
+
+    //FIXME sad/ssd can be broken up, but wavelet cmp should be one 32x32 block
+    if(block_w==16){
+        distortion = 0;
+        for(i=0; i<4; i++){
+            int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
+            distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
+        }
+    }else{
+        assert(block_w==8);
+        distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
+    }
+
+    if(plane_index==0){
+        for(i=0; i<4; i++){
+/* ..RRr
+ * .RXx.
+ * rxx..
+ */
+            rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
+        }
+        if(mb_x == b_stride-2)
+            rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
+    }
+    return distortion + rate*penalty_factor;
+}
+
+static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
+    int i, y2;
+    Plane *p= &s->plane[plane_index];
+    const int block_size = MB_SIZE >> s->block_max_depth;
+    const int block_w    = plane_index ? block_size/2 : block_size;
+    const uint8_t *obmc  = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth];
+    const int obmc_stride= plane_index ? block_size : 2*block_size;
+    const int ref_stride= s->current_picture.linesize[plane_index];
+    uint8_t *ref= s->   last_picture.data[plane_index];
+    uint8_t *dst= s->current_picture.data[plane_index];
+    uint8_t *src= s-> input_picture.data[plane_index];
+    const static DWTELEM zero_dst[4096]; //FIXME
+    const int b_stride = s->b_width << s->block_max_depth;
+    const int b_height = s->b_height<< s->block_max_depth;
+    const int w= p->width;
+    const int h= p->height;
+    int distortion= 0;
+    int rate= 0;
+    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
+
+    for(i=0; i<9; i++){
+        int mb_x2= mb_x + (i%3) - 1;
+        int mb_y2= mb_y + (i/3) - 1;
+        int x= block_w*mb_x2 + block_w/2;
+        int y= block_w*mb_y2 + block_w/2;
+
+        add_yblock(s, zero_dst, dst, ref, obmc,
+                   x, y, block_w, block_w, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
+
+        //FIXME find a cleaner/simpler way to skip the outside stuff
+        for(y2= y; y2<0; y2++)
+            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
+        for(y2= h; y2<y+block_w; y2++)
+            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
+        if(x<0){
+            for(y2= y; y2<y+block_w; y2++)
+                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
+        }
+        if(x+block_w > w){
+            for(y2= y; y2<y+block_w; y2++)
+                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
+        }
+
+        assert(block_w== 8 || block_w==16);
+        distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_w);
+    }
+
+    if(plane_index==0){
+        BlockNode *b= &s->block[mb_x+mb_y*b_stride];
+        int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
+
+/* ..RRRr
+ * .RXXx.
+ * .RXXx.
+ * rxxx.
+ */
+        if(merged)
+            rate = get_block_bits(s, mb_x, mb_y, 2);
+        for(i=merged?4:0; i<9; i++){
+            static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
+            rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
+        }
+    }
+    return distortion + rate*penalty_factor;
+}
+
+static always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, const uint8_t *obmc_edged, int *best_rd){
+    const int b_stride= s->b_width << s->block_max_depth;
+    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
+    BlockNode backup= *block;
+    int rd, index, value;
+
+    assert(mb_x>=0 && mb_y>=0);
+    assert(mb_x<b_stride);
+
+    if(intra){
+        block->color[0] = p[0];
+        block->color[1] = p[1];
+        block->color[2] = p[2];
+        block->type |= BLOCK_INTRA;
+    }else{
+        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
+        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6);
+        if(s->me_cache[index] == value)
+            return 0;
+        s->me_cache[index]= value;
+
+        block->mx= p[0];
+        block->my= p[1];
+        block->type &= ~BLOCK_INTRA;
+    }
+
+    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
+
+//FIXME chroma
+    if(rd < *best_rd){
+        *best_rd= rd;
+        return 1;
+    }else{
+        *block= backup;
+        return 0;
+    }
+}
+
+/* special case for int[2] args we discard afterward, fixes compilation prob with gcc 2.95 */
+static always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int intra, const uint8_t *obmc_edged, int *best_rd){
+    int p[2] = {p0, p1};
+    return check_block(s, mb_x, mb_y, p, intra, obmc_edged, best_rd);
+}
+
+static always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int *best_rd){
+    const int b_stride= s->b_width << s->block_max_depth;
+    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
+    BlockNode backup[4]= {block[0], block[1], block[b_stride], block[b_stride+1]};
+    int rd, index, value;
+
+    assert(mb_x>=0 && mb_y>=0);
+    assert(mb_x<b_stride);
+    assert(((mb_x|mb_y)&1) == 0);
+
+    index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
+    value= s->me_cache_generation + (p0>>10) + (p1<<6);
+    if(s->me_cache[index] == value)
+        return 0;
+    s->me_cache[index]= value;
+
+    block->mx= p0;
+    block->my= p1;
+    block->type &= ~BLOCK_INTRA;
+    block[1]= block[b_stride]= block[b_stride+1]= *block;
+
+    rd= get_4block_rd(s, mb_x, mb_y, 0);
+
+//FIXME chroma
+    if(rd < *best_rd){
+        *best_rd= rd;
+        return 1;
+    }else{
+        block[0]= backup[0];
+        block[1]= backup[1];
+        block[b_stride]= backup[2];
+        block[b_stride+1]= backup[3];
+        return 0;
+    }
+}
+
+static void iterative_me(SnowContext *s){
+    int pass, mb_x, mb_y;
+    const int b_width = s->b_width  << s->block_max_depth;
+    const int b_height= s->b_height << s->block_max_depth;
+    const int b_stride= b_width;
+    int color[3];
+
+    for(pass=0; pass<50; pass++){
+        int change= 0;
+
+        for(mb_y= 0; mb_y<b_height; mb_y++){
+            for(mb_x= 0; mb_x<b_width; mb_x++){
+                int dia_change, i, j;
+                int best_rd= INT_MAX;
+                BlockNode backup;
+                const int index= mb_x + mb_y * b_stride;
+                BlockNode *block= &s->block[index];
+                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : &null_block;
+                BlockNode *lb = mb_x                              ? &s->block[index         -1] : &null_block;
+                BlockNode *rb = mb_x+1<b_width                    ? &s->block[index         +1] : &null_block;
+                BlockNode *bb =                   mb_y+1<b_height ? &s->block[index+b_stride  ] : &null_block;
+                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : &null_block;
+                BlockNode *trb= mb_x+1<b_width && mb_y            ? &s->block[index-b_stride+1] : &null_block;
+                BlockNode *blb= mb_x           && mb_y+1<b_height ? &s->block[index+b_stride-1] : &null_block;
+                BlockNode *brb= mb_x+1<b_width && mb_y+1<b_height ? &s->block[index+b_stride+1] : &null_block;
+                const int b_w= (MB_SIZE >> s->block_max_depth);
+                uint8_t obmc_edged[b_w*2][b_w*2];
+
+                if(pass && (block->type & BLOCK_OPT))
+                    continue;
+                block->type |= BLOCK_OPT;
+
+                backup= *block;
+
+                if(!s->me_cache_generation)
+                    memset(s->me_cache, 0, sizeof(s->me_cache));
+                s->me_cache_generation += 1<<22;
+
+                //FIXME precalc
+                {
+                    int x, y;
+                    memcpy(obmc_edged, obmc_tab[s->block_max_depth], b_w*b_w*4);
+                    if(mb_x==0)
+                        for(y=0; y<b_w*2; y++)
+                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
+                    if(mb_x==b_stride-1)
+                        for(y=0; y<b_w*2; y++)
+                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
+                    if(mb_y==0){
+                        for(x=0; x<b_w*2; x++)
+                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
+                        for(y=1; y<b_w; y++)
+                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
+                    }
+                    if(mb_y==b_height-1){
+                        for(x=0; x<b_w*2; x++)
+                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
+                        for(y=b_w; y<b_w*2-1; y++)
+                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
+                    }
+                }
+
+                //skip stuff outside the picture
+                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1)
+                {
+                    uint8_t *src= s->  input_picture.data[0];
+                    uint8_t *dst= s->current_picture.data[0];
+                    const int stride= s->current_picture.linesize[0];
+                    const int block_w= MB_SIZE >> s->block_max_depth;
+                    const int sx= block_w*mb_x - block_w/2;
+                    const int sy= block_w*mb_y - block_w/2;
+                    const int w= s->plane[0].width;
+                    const int h= s->plane[0].height;
+                    int y;
+
+                    for(y=sy; y<0; y++)
+                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
+                    for(y=h; y<sy+block_w*2; y++)
+                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
+                    if(sx<0){
+                        for(y=sy; y<sy+block_w*2; y++)
+                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
+                    }
+                    if(sx+block_w*2 > w){
+                        for(y=sy; y<sy+block_w*2; y++)
+                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
+                    }
+                }
+
+                // intra(black) = neighbors' contribution to the current block
+                for(i=0; i<3; i++)
+                    color[i]= get_dc(s, mb_x, mb_y, i);
+
+                // get previous score (cant be cached due to OBMC)
+                if(pass > 0 && (block->type&BLOCK_INTRA)){
+                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
+                    check_block(s, mb_x, mb_y, color0, 1, *obmc_edged, &best_rd);
+                }else
+                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, 0, *obmc_edged, &best_rd);
+
+                check_block_inter(s, mb_x, mb_y, 0, 0, 0, *obmc_edged, &best_rd);
+                check_block_inter(s, mb_x, mb_y, tb->mx, tb->my, 0, *obmc_edged, &best_rd);
+                check_block_inter(s, mb_x, mb_y, lb->mx, lb->my, 0, *obmc_edged, &best_rd);
+                check_block_inter(s, mb_x, mb_y, rb->mx, rb->my, 0, *obmc_edged, &best_rd);
+                check_block_inter(s, mb_x, mb_y, bb->mx, bb->my, 0, *obmc_edged, &best_rd);
+
+                /* fullpel ME */
+                //FIXME avoid subpel interpol / round to nearest integer
+                do{
+                    dia_change=0;
+                    for(i=0; i<FFMAX(s->avctx->dia_size, 1); i++){
+                        for(j=0; j<i; j++){
+                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), 0, *obmc_edged, &best_rd);
+                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), 0, *obmc_edged, &best_rd);
+                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), 0, *obmc_edged, &best_rd);
+                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), 0, *obmc_edged, &best_rd);
+                        }
+                    }
+                }while(dia_change);
+                /* subpel ME */
+                do{
+                    static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
+                    dia_change=0;
+                    for(i=0; i<8; i++)
+                        dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], 0, *obmc_edged, &best_rd);
+                }while(dia_change);
+                //FIXME or try the standard 2 pass qpel or similar
+#if 1
+                check_block(s, mb_x, mb_y, color, 1, *obmc_edged, &best_rd);
+                //FIXME RD style color selection
+#endif
+                if(!same_block(block, &backup)){
+                    if(tb != &null_block) tb ->type &= ~BLOCK_OPT;
+                    if(lb != &null_block) lb ->type &= ~BLOCK_OPT;
+                    if(rb != &null_block) rb ->type &= ~BLOCK_OPT;
+                    if(bb != &null_block) bb ->type &= ~BLOCK_OPT;
+                    if(tlb!= &null_block) tlb->type &= ~BLOCK_OPT;
+                    if(trb!= &null_block) trb->type &= ~BLOCK_OPT;
+                    if(blb!= &null_block) blb->type &= ~BLOCK_OPT;
+                    if(brb!= &null_block) brb->type &= ~BLOCK_OPT;
+                    change ++;
+                }
+            }
+        }
+        av_log(NULL, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
+        if(!change)
+            break;
+    }
+
+    if(s->block_max_depth == 1){
+        int change= 0;
+        for(mb_y= 0; mb_y<b_height; mb_y+=2){
+            for(mb_x= 0; mb_x<b_width; mb_x+=2){
+                int dia_change, i, j;
+                int best_rd, init_rd;
+                const int index= mb_x + mb_y * b_stride;
+                BlockNode *b[4];
+
+                b[0]= &s->block[index];
+                b[1]= b[0]+1;
+                b[2]= b[0]+b_stride;
+                b[3]= b[2]+1;
+                if(same_block(b[0], b[1]) &&
+                   same_block(b[0], b[2]) &&
+                   same_block(b[0], b[3]))
+                    continue;
+
+                if(!s->me_cache_generation)
+                    memset(s->me_cache, 0, sizeof(s->me_cache));
+                s->me_cache_generation += 1<<22;
+
+                init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
+
+                check_4block_inter(s, mb_x, mb_y,
+                                   (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
+                                   (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, &best_rd);
+
+                for(i=0; i<4; i++)
+                    if(!(b[i]->type&BLOCK_INTRA))
+                        check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, &best_rd);
+
+                if(init_rd != best_rd)
+                    change++;
+            }
+        }
+        av_log(NULL, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
+    }
+}
+
 static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int bias){
     const int level= b->level;
     const int w= b->width;
@@ -2918,16 +3500,16 @@ static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int b
 //    START_TIMER
 
     if(s->qlog == LOSSLESS_QLOG) return;
- 
+
     bias= bias ? 0 : (3*qmul)>>3;
     thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
     thres2= 2*thres1;
-    
+
     if(!bias){
         for(y=0; y<h; y++){
             for(x=0; x<w; x++){
                 int i= src[x + y*stride];
-                
+
                 if((unsigned)(i+thres1) > thres2){
                     if(i>=0){
                         i<<= QEXPSHIFT;
@@ -2946,8 +3528,8 @@ static void quantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int b
     }else{
         for(y=0; y<h; y++){
             for(x=0; x<w; x++){
-                int i= src[x + y*stride]; 
-                
+                int i= src[x + y*stride];
+
                 if((unsigned)(i+thres1) > thres2){
                     if(i>=0){
                         i<<= QEXPSHIFT;
@@ -2976,9 +3558,9 @@ static void dequantize_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand
     const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
     int x,y;
     START_TIMER
-    
+
     if(s->qlog == LOSSLESS_QLOG) return;
-    
+
     for(y=start_y; y<end_y; y++){
 //        DWTELEM * line = slice_buffer_get_line_from_address(sb, src + (y * stride));
         DWTELEM * line = slice_buffer_get_line(sb, (y * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
@@ -3004,9 +3586,9 @@ static void dequantize(SnowContext *s, SubBand *b, DWTELEM *src, int stride){
     const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
     int x,y;
     START_TIMER
-    
+
     if(s->qlog == LOSSLESS_QLOG) return;
-    
+
     for(y=0; y<h; y++){
         for(x=0; x<w; x++){
             int i= src[x + y*stride];
@@ -3026,11 +3608,11 @@ static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, in
     const int w= b->width;
     const int h= b->height;
     int x,y;
-    
+
     for(y=h-1; y>=0; y--){
         for(x=w-1; x>=0; x--){
             int i= x + y*stride;
-            
+
             if(x){
                 if(use_median){
                     if(y && x+1<w) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
@@ -3049,15 +3631,15 @@ static void decorrelate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, in
 static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand *b, DWTELEM *src, int stride, int inverse, int use_median, int start_y, int end_y){
     const int w= b->width;
     int x,y;
-    
+
 //    START_TIMER
-    
+
     DWTELEM * line;
     DWTELEM * prev;
-    
+
     if (start_y != 0)
         line = slice_buffer_get_line(sb, ((start_y - 1) * b->stride_line) + b->buf_y_offset) + b->buf_x_offset;
-    
+
     for(y=start_y; y<end_y; y++){
         prev = line;
 //        line = slice_buffer_get_line_from_address(sb, src + (y * stride));
@@ -3076,7 +3658,7 @@ static void correlate_slice_buffered(SnowContext *s, slice_buffer * sb, SubBand
             }
         }
     }
-    
+
 //    STOP_TIMER("correlate")
 }
 
@@ -3084,11 +3666,11 @@ static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int
     const int w= b->width;
     const int h= b->height;
     int x,y;
-    
+
     for(y=0; y<h; y++){
         for(x=0; x<w; x++){
             int i= x + y*stride;
-            
+
             if(x){
                 if(use_median){
                     if(y && x+1<w) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - stride + 1]);
@@ -3106,9 +3688,9 @@ static void correlate(SnowContext *s, SubBand *b, DWTELEM *src, int stride, int
 
 static void encode_header(SnowContext *s){
     int plane_index, level, orientation;
-    uint8_t kstate[32]; 
-    
-    memset(kstate, MID_STATE, sizeof(kstate));   
+    uint8_t kstate[32];
+
+    memset(kstate, MID_STATE, sizeof(kstate));
 
     put_rac(&s->c, kstate, s->keyframe);
     if(s->keyframe || s->always_reset)
@@ -3135,8 +3717,8 @@ static void encode_header(SnowContext *s){
         }
     }
     put_symbol(&s->c, s->header_state, s->spatial_decomposition_type, 0);
-    put_symbol(&s->c, s->header_state, s->qlog, 1); 
-    put_symbol(&s->c, s->header_state, s->mv_scale, 0); 
+    put_symbol(&s->c, s->header_state, s->qlog, 1);
+    put_symbol(&s->c, s->header_state, s->mv_scale, 0);
     put_symbol(&s->c, s->header_state, s->qbias, 1);
     put_symbol(&s->c, s->header_state, s->block_max_depth, 0);
 }
@@ -3145,7 +3727,7 @@ static int decode_header(SnowContext *s){
     int plane_index, level, orientation;
     uint8_t kstate[32];
 
-    memset(kstate, MID_STATE, sizeof(kstate));   
+    memset(kstate, MID_STATE, sizeof(kstate));
 
     s->keyframe= get_rac(&s->c, kstate);
     if(s->keyframe || s->always_reset)
@@ -3178,28 +3760,33 @@ static int decode_header(SnowContext *s){
             }
         }
     }
-    
+
     s->spatial_decomposition_type= get_symbol(&s->c, s->header_state, 0);
     if(s->spatial_decomposition_type > 2){
         av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
         return -1;
     }
-    
+
     s->qlog= get_symbol(&s->c, s->header_state, 1);
     s->mv_scale= get_symbol(&s->c, s->header_state, 0);
     s->qbias= get_symbol(&s->c, s->header_state, 1);
     s->block_max_depth= get_symbol(&s->c, s->header_state, 0);
+    if(s->block_max_depth > 1){
+        av_log(s->avctx, AV_LOG_ERROR, "block_max_depth= %d is too large", s->block_max_depth);
+        s->block_max_depth= 0;
+        return -1;
+    }
 
     return 0;
 }
 
-static void init_qexp(){
+static void init_qexp(void){
     int i;
     double v=128;
 
     for(i=0; i<QROOT; i++){
         qexp[i]= lrintf(v);
-        v *= pow(2, 1.0 / QROOT); 
+        v *= pow(2, 1.0 / QROOT);
     }
 }
 
@@ -3209,7 +3796,7 @@ static int common_init(AVCodecContext *avctx){
     int level, orientation, plane_index, dec;
 
     s->avctx= avctx;
-        
+
     dsputil_init(&s->dsp, avctx);
 
 #define mcf(dx,dy)\
@@ -3255,21 +3842,21 @@ static int common_init(AVCodecContext *avctx){
 
     dec= s->spatial_decomposition_count= 5;
     s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
-    
+
     s->chroma_h_shift= 1; //FIXME XXX
     s->chroma_v_shift= 1;
-    
+
 //    dec += FFMAX(s->chroma_h_shift, s->chroma_v_shift);
-    
+
     width= s->avctx->width;
     height= s->avctx->height;
 
     s->spatial_dwt_buffer= av_mallocz(width*height*sizeof(DWTELEM));
-    
+
     s->mv_scale= (s->avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
     s->block_max_depth= (s->avctx->flags & CODEC_FLAG_4MV) ? 1 : 0;
-    
-    for(plane_index=0; plane_index<3; plane_index++){    
+
+    for(plane_index=0; plane_index<3; plane_index++){
         int w= s->avctx->width;
         int h= s->avctx->height;
 
@@ -3283,17 +3870,17 @@ static int common_init(AVCodecContext *avctx){
         for(level=s->spatial_decomposition_count-1; level>=0; level--){
             for(orientation=level ? 1 : 0; orientation<4; orientation++){
                 SubBand *b= &s->plane[plane_index].band[level][orientation];
-                
+
                 b->buf= s->spatial_dwt_buffer;
                 b->level= level;
                 b->stride= s->plane[plane_index].width << (s->spatial_decomposition_count - level);
                 b->width = (w + !(orientation&1))>>1;
                 b->height= (h + !(orientation>1))>>1;
-                
+
                 b->stride_line = 1 << (s->spatial_decomposition_count - level);
                 b->buf_x_offset = 0;
                 b->buf_y_offset = 0;
-                
+
                 if(orientation&1){
                     b->buf += (w+1)>>1;
                     b->buf_x_offset = (w+1)>>1;
@@ -3302,7 +3889,7 @@ static int common_init(AVCodecContext *avctx){
                     b->buf += b->stride>>1;
                     b->buf_y_offset = b->stride_line >> 1;
                 }
-                
+
                 if(level)
                     b->parent= &s->plane[plane_index].band[level-1][orientation];
                 b->x_coeff=av_mallocz(((b->width+1) * b->height+1)*sizeof(x_and_coeff));
@@ -3311,16 +3898,16 @@ static int common_init(AVCodecContext *avctx){
             h= (h+1)>>1;
         }
     }
-    
+
     reset_contexts(s);
-/*    
+/*
     width= s->width= avctx->width;
     height= s->height= avctx->height;
-    
+
     assert(width && height);
 */
     s->avctx->get_buffer(s->avctx, &s->mconly_picture);
-    
+
     return 0;
 }
 
@@ -3335,7 +3922,7 @@ static void calculate_vissual_weight(SnowContext *s, Plane *p){
             SubBand *b= &p->band[level][orientation];
             DWTELEM *buf= b->buf;
             int64_t error=0;
-            
+
             memset(s->spatial_dwt_buffer, 0, sizeof(int)*width*height);
             buf[b->width/2 + b->height/2*b->stride]= 256*256;
             ff_spatial_idwt(s->spatial_dwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
@@ -3363,12 +3950,12 @@ static int encode_init(AVCodecContext *avctx)
                "use vstrict=-2 / -strict -2 to use it anyway\n");
         return -1;
     }
- 
+
     common_init(avctx);
     alloc_blocks(s);
- 
+
     s->version=0;
-    
+
     s->m.avctx   = avctx;
     s->m.flags   = avctx->flags;
     s->m.bit_rate= avctx->bit_rate;
@@ -3376,6 +3963,7 @@ static int encode_init(AVCodecContext *avctx)
     s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
     s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
     s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
+    s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
     h263_encode_init(&s->m); //mv_penalty
 
     if(avctx->flags&CODEC_FLAG_PASS1){
@@ -3390,8 +3978,8 @@ static int encode_init(AVCodecContext *avctx)
     for(plane_index=0; plane_index<3; plane_index++){
         calculate_vissual_weight(s, &s->plane[plane_index]);
     }
-    
-    
+
+
     avctx->coded_frame= &s->current_picture;
     switch(avctx->pix_fmt){
 //    case PIX_FMT_YUV444P:
@@ -3412,6 +4000,12 @@ static int encode_init(AVCodecContext *avctx)
 //    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
     s->chroma_h_shift= 1;
     s->chroma_v_shift= 1;
+
+    ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
+    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
+
+    s->avctx->get_buffer(s->avctx, &s->input_picture);
+
     return 0;
 }
 #endif
@@ -3430,13 +4024,13 @@ static int frame_start(SnowContext *s){
     tmp= s->last_picture;
     s->last_picture= s->current_picture;
     s->current_picture= tmp;
-    
+
     s->current_picture.reference= 1;
     if(s->avctx->get_buffer(s->avctx, &s->current_picture) < 0){
         av_log(s->avctx, AV_LOG_ERROR, "get_buffer() failed\n");
         return -1;
     }
-    
+
     return 0;
 }
 
@@ -3447,24 +4041,31 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
     AVFrame *pict = data;
     const int width= s->avctx->width;
     const int height= s->avctx->height;
-    int level, orientation, plane_index;
+    int level, orientation, plane_index, i, y;
 
     ff_init_range_encoder(c, buf, buf_size);
     ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
-    
-    s->input_picture = *pict;
+
+    for(i=0; i<3; i++){
+        int shift= !!i;
+        for(y=0; y<(height>>shift); y++)
+            memcpy(&s->input_picture.data[i][y * s->input_picture.linesize[i]],
+                   &pict->data[i][y * pict->linesize[i]],
+                   width>>shift);
+    }
+    s->new_picture = *pict;
 
     if(avctx->flags&CODEC_FLAG_PASS2){
         s->m.pict_type =
         pict->pict_type= s->m.rc_context.entry[avctx->frame_number].new_pict_type;
         s->keyframe= pict->pict_type==FF_I_TYPE;
         s->m.picture_number= avctx->frame_number;
-        pict->quality= ff_rate_estimate_qscale(&s->m);
+        pict->quality= ff_rate_estimate_qscale(&s->m, 0);
     }else{
         s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
         pict->pict_type= s->keyframe ? FF_I_TYPE : FF_P_TYPE;
     }
-    
+
     if(pict->quality){
         s->qlog= rint(QROOT*log(pict->quality / (float)FF_QP2LAMBDA)/log(2));
         //<64 >60
@@ -3481,10 +4082,10 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
         int block_width = (width +15)>>4;
         int block_height= (height+15)>>4;
         int stride= s->current_picture.linesize[0];
-        
+
         assert(s->current_picture.data[0]);
         assert(s->last_picture.data[0]);
-     
+
         s->m.avctx= s->avctx;
         s->m.current_picture.data[0]= s->current_picture.data[0];
         s->m.   last_picture.data[0]= s->   last_picture.data[0];
@@ -3516,17 +4117,18 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
 
         s->m.dsp= s->dsp; //move
         ff_init_me(&s->m);
+        s->dsp= s->m.dsp;
     }
-    
+
 redo_frame:
-        
+
     s->qbias= pict->pict_type == P_TYPE ? 2 : 0;
 
     encode_header(s);
     s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
     encode_blocks(s);
     s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
-      
+
     for(plane_index=0; plane_index<3; plane_index++){
         Plane *p= &s->plane[plane_index];
         int w= p->width;
@@ -3542,9 +4144,9 @@ redo_frame:
             }
         }
         predict_plane(s, s->spatial_dwt_buffer, plane_index, 0);
-        
-        if(   plane_index==0 
-           && pict->pict_type == P_TYPE 
+
+        if(   plane_index==0
+           && pict->pict_type == P_TYPE
            && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
             ff_init_range_encoder(c, buf, buf_size);
             ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
@@ -3553,7 +4155,7 @@ redo_frame:
             reset_contexts(s);
             goto redo_frame;
         }
-        
+
         if(s->qlog == LOSSLESS_QLOG){
             for(y=0; y<h; y++){
                 for(x=0; x<w; x++){
@@ -3561,13 +4163,13 @@ redo_frame:
                 }
             }
         }
- 
+
         ff_spatial_dwt(s->spatial_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
 
         for(level=0; level<s->spatial_decomposition_count; level++){
             for(orientation=level ? 1 : 0; orientation<4; orientation++){
                 SubBand *b= &p->band[level][orientation];
-                
+
                 quantize(s, b, b->buf, b->stride, s->qbias);
                 if(orientation==0)
                     decorrelate(s, b, b->buf, b->stride, pict->pict_type == P_TYPE, 0);
@@ -3600,7 +4202,7 @@ redo_frame:
 STOP_TIMER("pred-conv")}
         if(s->avctx->flags&CODEC_FLAG_PSNR){
             int64_t error= 0;
-            
+
     if(pict->data[plane_index]) //FIXME gray hack
             for(y=0; y<h; y++){
                 for(x=0; x<w; x++){
@@ -3632,7 +4234,7 @@ STOP_TIMER("pred-conv")}
     }
 
     emms_c();
-    
+
     return ff_rac_terminate(c);
 }
 #endif
@@ -3642,17 +4244,18 @@ static void common_end(SnowContext *s){
 
     av_freep(&s->spatial_dwt_buffer);
 
-    av_freep(&s->m.me.scratchpad);    
+    av_freep(&s->m.me.scratchpad);
     av_freep(&s->m.me.map);
     av_freep(&s->m.me.score_map);
- 
+    av_freep(&s->m.obmc_scratchpad);
+
     av_freep(&s->block);
 
-    for(plane_index=0; plane_index<3; plane_index++){    
+    for(plane_index=0; plane_index<3; plane_index++){
         for(level=s->spatial_decomposition_count-1; level>=0; level--){
             for(orientation=level ? 1 : 0; orientation<4; orientation++){
                 SubBand *b= &s->plane[plane_index].band[level][orientation];
-                
+
                 av_freep(&b->x_coeff);
             }
         }
@@ -3675,14 +4278,14 @@ static int decode_init(AVCodecContext *avctx)
 {
     SnowContext *s = avctx->priv_data;
     int block_size;
-    
+
     avctx->pix_fmt= PIX_FMT_YUV420P;
 
     common_init(avctx);
-    
+
     block_size = MB_SIZE >> s->block_max_depth;
-    slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 2)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
-    
+    slice_buffer_init(&s->sb, s->plane[0].height, (block_size) + (s->spatial_decomposition_count * (s->spatial_decomposition_count + 3)) + 1, s->plane[0].width, s->spatial_dwt_buffer);
+
     return 0;
 }
 
@@ -3704,7 +4307,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
     //keyframe flag dupliaction mess FIXME
     if(avctx->debug&FF_DEBUG_PICT_INFO)
         av_log(avctx, AV_LOG_ERROR, "keyframe:%d qlog:%d\n", s->keyframe, s->qlog);
-    
+
     decode_blocks(s);
 
     for(plane_index=0; plane_index<3; plane_index++){
@@ -3713,7 +4316,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
         int h= p->height;
         int x, y;
         int decode_state[MAX_DECOMPOSITIONS][4][1]; /* Stored state info for unpack_coeffs. 1 variable per instance. */
-        
+
 if(s->avctx->debug&2048){
         memset(s->spatial_dwt_buffer, 0, sizeof(DWTELEM)*w*h);
         predict_plane(s, s->spatial_dwt_buffer, plane_index, 1);
@@ -3748,7 +4351,7 @@ if(s->avctx->debug&2048){
 
     ff_spatial_idwt_buffered_init(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count);
     for(mb_y=0; mb_y<=mb_h; mb_y++){
-        
+
         int slice_starty = block_w*mb_y;
         int slice_h = block_w*(mb_y+1);
         if (!(s->keyframe || s->avctx->debug&512)){
@@ -3756,7 +4359,7 @@ if(s->avctx->debug&2048){
             slice_h -= (block_w >> 1);
         }
 
-        {        
+        {
         START_TIMER
         for(level=0; level<s->spatial_decomposition_count; level++){
             for(orientation=level ? 1 : 0; orientation<4; orientation++){
@@ -3765,15 +4368,16 @@ if(s->avctx->debug&2048){
                 int end_y;
                 int our_mb_start = mb_y;
                 int our_mb_end = (mb_y + 1);
-                start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + 2: 0);
-                end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + 2);
+                const int extra= 3;
+                start_y = (mb_y ? ((block_w * our_mb_start) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra: 0);
+                end_y = (((block_w * our_mb_end) >> (s->spatial_decomposition_count - level)) + s->spatial_decomposition_count - level + extra);
                 if (!(s->keyframe || s->avctx->debug&512)){
                     start_y = FFMAX(0, start_y - (block_w >> (1+s->spatial_decomposition_count - level)));
                     end_y = FFMAX(0, end_y - (block_w >> (1+s->spatial_decomposition_count - level)));
                 }
                 start_y = FFMIN(b->height, start_y);
                 end_y = FFMIN(b->height, end_y);
-                
+
                 if (start_y != end_y){
                     if (orientation == 0){
                         SubBand * correlate_band = &p->band[0][0];
@@ -3790,14 +4394,14 @@ if(s->avctx->debug&2048){
         }
         STOP_TIMER("decode_subband_slice");
         }
-        
+
 {   START_TIMER
         for(; yd<slice_h; yd+=4){
             ff_spatial_idwt_buffered_slice(cs, &s->sb, w, h, 1, s->spatial_decomposition_type, s->spatial_decomposition_count, yd);
         }
     STOP_TIMER("idwt slice");}
 
-        
+
         if(s->qlog == LOSSLESS_QLOG){
             for(; yq<slice_h && yq<h; yq++){
                 DWTELEM * line = slice_buffer_get_line(&s->sb, yq);
@@ -3808,30 +4412,30 @@ if(s->avctx->debug&2048){
         }
 
         predict_slice_buffered(s, &s->sb, s->spatial_dwt_buffer, plane_index, 1, mb_y);
-        
+
         y = FFMIN(p->height, slice_starty);
         end_y = FFMIN(p->height, slice_h);
         while(y < end_y)
             slice_buffer_release(&s->sb, y++);
     }
-    
+
     slice_buffer_flush(&s->sb);
-    
+
 STOP_TIMER("idwt + predict_slices")}
     }
-            
+
     emms_c();
 
     if(s->last_picture.data[0])
         avctx->release_buffer(avctx, &s->last_picture);
 
-if(!(s->avctx->debug&2048))        
+if(!(s->avctx->debug&2048))
     *picture= s->current_picture;
 else
     *picture= s->mconly_picture;
-    
+
     *data_size = sizeof(AVFrame);
-    
+
     bytes_read= c->bytestream - c->bytestream_start;
     if(bytes_read ==0) av_log(s->avctx, AV_LOG_ERROR, "error at end of frame\n"); //FIXME
 
@@ -3843,7 +4447,7 @@ static int decode_end(AVCodecContext *avctx)
     SnowContext *s = avctx->priv_data;
 
     slice_buffer_destroy(&s->sb);
-    
+
     common_end(s);
 
     return 0;
@@ -3888,14 +4492,14 @@ int main(){
     int i;
     s.spatial_decomposition_count=6;
     s.spatial_decomposition_type=1;
-    
+
     printf("testing 5/3 DWT\n");
     for(i=0; i<width*height; i++)
         buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
-    
-    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
-    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
-    
+
+    ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+    ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+
     for(i=0; i<width*height; i++)
         if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
 
@@ -3903,18 +4507,19 @@ int main(){
     s.spatial_decomposition_type=0;
     for(i=0; i<width*height; i++)
         buffer[0][i]= buffer[1][i]= random()%54321 - 12345;
-    
-    ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
-    ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
-    
+
+    ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+    ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
+
     for(i=0; i<width*height; i++)
-        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
-        
+        if(ABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %d %d %d\n",i, buffer[0][i], buffer[1][i]);
+
+#if 0
     printf("testing AC coder\n");
     memset(s.header_state, 0, sizeof(s.header_state));
     ff_init_range_encoder(&s.c, buffer[0], 256*256);
     ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
-        
+
     for(i=-256; i<256; i++){
 START_TIMER
         put_symbol(&s.c, s.header_state, i*i*i/3*ABS(i), 1);
@@ -3925,7 +4530,7 @@ STOP_TIMER("put_symbol")
     memset(s.header_state, 0, sizeof(s.header_state));
     ff_init_range_decoder(&s.c, buffer[0], 256*256);
     ff_init_cabac_states(&s.c, ff_h264_lps_range, ff_h264_mps_state, ff_h264_lps_state, 64);
-    
+
     for(i=-256; i<256; i++){
         int j;
 START_TIMER
@@ -3933,6 +4538,7 @@ START_TIMER
 STOP_TIMER("get_symbol")
         if(j!=i*i*i/3*ABS(i)) printf("fsck: %d != %d\n", i, j);
     }
+#endif
 {
 int level, orientation, x, y;
 int64_t errors[8][4];
@@ -3951,10 +4557,10 @@ int64_t g=0;
 
             if(orientation&1) buf+=w;
             if(orientation>1) buf+=stride>>1;
-            
+
             memset(buffer[0], 0, sizeof(int)*width*height);
             buf[w/2 + h/2*stride]= 256*256;
-            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
+            ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
             for(y=0; y<height; y++){
                 for(x=0; x<width; x++){
                     int64_t d= buffer[0][x + y*width];
@@ -3989,7 +4595,7 @@ int64_t g=0;
 
             buf+=w;
             buf+=stride>>1;
-            
+
             memset(buffer[0], 0, sizeof(int)*width*height);
 #if 1
             for(y=0; y<height; y++){
@@ -3998,7 +4604,7 @@ int64_t g=0;
                     buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
                 }
             }
-            ff_spatial_dwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
+            ff_spatial_dwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
 #else
             for(y=0; y<h; y++){
                 for(x=0; x<w; x++){
@@ -4006,7 +4612,7 @@ int64_t g=0;
                     buf[x + y*stride-w]=64;
                 }
             }
-            ff_spatial_idwt(buffer[0], width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
+            ff_spatial_idwt(buffer[0], width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
 #endif
             for(y=0; y<height; y++){
                 for(x=0; x<width; x++){
diff --git a/src/libffmpeg/libavcodec/sp5x.h b/src/libffmpeg/libavcodec/sp5x.h
index dee3591bc..72ae1cab1 100644
--- a/src/libffmpeg/libavcodec/sp5x.h
+++ b/src/libffmpeg/libavcodec/sp5x.h
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 #ifndef SP5X_H
@@ -22,12 +22,12 @@
 
 static const uint8_t sp5x_data_sof[] =
 {
-    0xFF, 0xC0,	/* SOF */
-    0x00, 0x11,	/* len */
-    0x08,	/* bits */
-    0x00, 0xf0,	/* height (default: 240) */
-    0x01, 0x40,	/* width (default: 240) */
-    0x03,	/* nb components */
+    0xFF, 0xC0,       /* SOF */
+    0x00, 0x11,       /* len */
+    0x08,             /* bits */
+    0x00, 0xf0,       /* height (default: 240) */
+    0x01, 0x40,       /* width (default: 240) */
+    0x03,             /* nb components */
     0x01, 0x22, 0x00, /* 21 vs 22 ? */
     0x02, 0x11, 0x01,
     0x03, 0x11, 0x01
@@ -35,15 +35,15 @@ static const uint8_t sp5x_data_sof[] =
 
 static const uint8_t sp5x_data_sos[] =
 {
-    0xFF, 0xDA,	/* SOS */
-    0x00, 0x0C,	/* len */
-    0x03,	/* nb components */
+    0xFF, 0xDA,       /* SOS */
+    0x00, 0x0C,       /* len */
+    0x03,             /* nb components */
     0x01, 0x00,
     0x02, 0x11,
     0x03, 0x11,
-    0x00,	/* Ss */
-    0x3F,	/* Se */
-    0x00	/* Ah/Al */
+    0x00,             /* Ss */
+    0x3F,             /* Se */
+    0x00              /* Ah/Al */
 };
 
 static const uint8_t sp5x_data_dqt[] =
@@ -70,9 +70,9 @@ static const uint8_t sp5x_data_dqt[] =
     0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22, 0x22
 };
 
-static const uint8_t sp5x_data_dht[] = {   
-    0xFF, 0xC4,	/* DHT */
-    0x01, 0xA2,	/* len */
+static const uint8_t sp5x_data_dht[] = {
+    0xFF, 0xC4, /* DHT */
+    0x01, 0xA2, /* len */
     0x00, 0x00, 0x01, 0x05, 0x01, 0x01, 0x01, 0x01,
     0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
     0x00, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
@@ -233,7 +233,7 @@ static const uint8_t sp5x_quant_table[20][64]=
 
 #if 0
 /* 4NF-M, not ZigZag */
-static const uint8_t sp5x_quant_table_orig[18][64] = 
+static const uint8_t sp5x_quant_table_orig[18][64] =
 {
     /* index 0, Q50 */
     {  16, 11, 10, 16, 24, 40, 51, 61, 12, 12, 14, 19, 26, 58, 60, 55,
@@ -257,7 +257,7 @@ static const uint8_t sp5x_quant_table_orig[18][64] =
 
     /* index 2, Q80 */
     {   6,  4,  4,  6, 10, 16, 20, 24,  5,  5,  6,  8, 10, 23, 24, 22,
-	6,  5,  6, 10, 16, 23, 28, 22,  6,  7,  9, 12, 20, 35, 32, 25,
+        6,  5,  6, 10, 16, 23, 28, 22,  6,  7,  9, 12, 20, 35, 32, 25,
         7,  9, 15, 22, 27, 44, 41, 31, 10, 14, 22, 26, 32, 42, 45, 37,
        20, 26, 31, 35, 41, 48, 48, 40, 29, 37, 38, 39, 45, 40, 41, 40 },
     {   7,  7, 10, 19, 40, 40, 40, 40,  7,  8, 10, 26, 40, 40, 40, 40,
@@ -267,7 +267,7 @@ static const uint8_t sp5x_quant_table_orig[18][64] =
 
     /* index 3, Q85 */
     {   5,  3,  3,  5,  7, 12, 15, 18,  4,  4,  4,  6,  8, 17, 18, 17,
-	4,  4,  5,  7, 12, 17, 21, 17,  4,  5,  7,  9, 15, 26, 24, 19,
+        4,  4,  5,  7, 12, 17, 21, 17,  4,  5,  7,  9, 15, 26, 24, 19,
         5,  7, 11, 17, 20, 33, 31, 23,  7, 11, 17, 19, 24, 31, 34, 28,
        15, 19, 23, 26, 31, 36, 36, 30, 22, 28, 29, 29, 34, 30, 31, 30 },
     {   5,  5,  7, 14, 30, 30, 30, 30,  5,  6,  8, 20, 30, 30, 30, 30,
diff --git a/src/libffmpeg/libavcodec/sparc/dsputil_vis.c b/src/libffmpeg/libavcodec/sparc/dsputil_vis.c
index 53f38b2aa..f4ac3883d 100644
--- a/src/libffmpeg/libavcodec/sparc/dsputil_vis.c
+++ b/src/libffmpeg/libavcodec/sparc/dsputil_vis.c
@@ -17,7 +17,7 @@
  *
  * You should have received a copy of the Lesser GNU General Public License
  * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /* The *no_round* functions have been added by James A. Morrison, 2003,2004.
@@ -48,12 +48,12 @@
  * of '0xfe' is in f4, a repeating mask of '0x7f' is in f6, and
  * the value 0x80808080 is in f8):
  *
- *	fxor		f0, f2, f10
- *	fand		f10, f4, f10
- *	fmul8x16	f8, f10, f10
- *	fand		f10, f6, f10
- *	for		f0, f2, f12
- *	fpsub16		f12, f10, f10
+ *      fxor            f0,   f2, f10
+ *      fand            f10,  f4, f10
+ *      fmul8x16        f8,  f10, f10
+ *      fand            f10,  f6, f10
+ *      for             f0,   f2, f12
+ *      fpsub16         f12, f10, f10
  */
 
 #define ATTR_ALIGN(alignd) __attribute__ ((aligned(alignd)))
@@ -68,1979 +68,1979 @@ static const int8_t constants_fe[] ATTR_ALIGN(8) = DUP8 (0xfe);
 static const int8_t constants_7f[] ATTR_ALIGN(8) = DUP8 (0x7f);
 static const int8_t constants128[] ATTR_ALIGN(8) = DUP8 (128);
 static const int16_t constants256_512[] ATTR_ALIGN(8) =
-	{256, 512, 256, 512};
+        {256, 512, 256, 512};
 static const int16_t constants256_1024[] ATTR_ALIGN(8) =
-	{256, 1024, 256, 1024};
-
-#define REF_0		0
-#define REF_0_1		1
-#define REF_2		2
-#define REF_2_1		3
-#define REF_4		4
-#define REF_4_1		5
-#define REF_6		6
-#define REF_6_1		7
-#define REF_S0		8
-#define REF_S0_1	9
-#define REF_S2		10
-#define REF_S2_1	11
-#define REF_S4		12
-#define REF_S4_1	13
-#define REF_S6		14
-#define REF_S6_1	15
-#define DST_0		16
-#define DST_1		17
-#define DST_2		18
-#define DST_3		19
-#define CONST_1		20
-#define CONST_2		20
-#define CONST_3		20
-#define CONST_6		20
-#define MASK_fe		20
-#define CONST_128	22
-#define CONST_256	22
-#define CONST_512	22
-#define CONST_1024	22
-#define TMP0		24
-#define TMP1		25
-#define TMP2		26
-#define TMP3		27
-#define TMP4		28
-#define TMP5		29
-#define ZERO		30
-#define MASK_7f		30
-
-#define TMP6		32
-#define TMP8		34
-#define TMP10		36
-#define TMP12		38
-#define TMP14		40
-#define TMP16		42
-#define TMP18		44
-#define TMP20		46
-#define TMP22		48
-#define TMP24		50
-#define TMP26		52
-#define TMP28		54
-#define TMP30		56
-#define TMP32		58
+        {256, 1024, 256, 1024};
+
+#define REF_0           0
+#define REF_0_1         1
+#define REF_2           2
+#define REF_2_1         3
+#define REF_4           4
+#define REF_4_1         5
+#define REF_6           6
+#define REF_6_1         7
+#define REF_S0          8
+#define REF_S0_1        9
+#define REF_S2          10
+#define REF_S2_1        11
+#define REF_S4          12
+#define REF_S4_1        13
+#define REF_S6          14
+#define REF_S6_1        15
+#define DST_0           16
+#define DST_1           17
+#define DST_2           18
+#define DST_3           19
+#define CONST_1         20
+#define CONST_2         20
+#define CONST_3         20
+#define CONST_6         20
+#define MASK_fe         20
+#define CONST_128       22
+#define CONST_256       22
+#define CONST_512       22
+#define CONST_1024      22
+#define TMP0            24
+#define TMP1            25
+#define TMP2            26
+#define TMP3            27
+#define TMP4            28
+#define TMP5            29
+#define ZERO            30
+#define MASK_7f         30
+
+#define TMP6            32
+#define TMP8            34
+#define TMP10           36
+#define TMP12           38
+#define TMP14           40
+#define TMP16           42
+#define TMP18           44
+#define TMP20           46
+#define TMP22           48
+#define TMP24           50
+#define TMP26           52
+#define TMP28           54
+#define TMP30           56
+#define TMP32           58
 
 static void MC_put_o_16_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
+                             const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
+        uint8_t *ref = (uint8_t *) _ref;
 
-	ref = vis_alignaddr(ref);
-	do {	/* 5 cycles */
-		vis_ld64(ref[0], TMP0);
+        ref = vis_alignaddr(ref);
+        do {    /* 5 cycles */
+                vis_ld64(ref[0], TMP0);
 
-		vis_ld64_2(ref, 8, TMP2);
+                vis_ld64_2(ref, 8, TMP2);
 
-		vis_ld64_2(ref, 16, TMP4);
-		ref += stride;
+                vis_ld64_2(ref, 16, TMP4);
+                ref += stride;
 
-		vis_faligndata(TMP0, TMP2, REF_0);
-		vis_st64(REF_0, dest[0]);
+                vis_faligndata(TMP0, TMP2, REF_0);
+                vis_st64(REF_0, dest[0]);
 
-		vis_faligndata(TMP2, TMP4, REF_2);
-		vis_st64_2(REF_2, dest, 8);
-		dest += stride;
-	} while (--height);
+                vis_faligndata(TMP2, TMP4, REF_2);
+                vis_st64_2(REF_2, dest, 8);
+                dest += stride;
+        } while (--height);
 }
 
 static void MC_put_o_8_vis (uint8_t * dest, const uint8_t * _ref,
-			    const int stride, int height)
+                            const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
+        uint8_t *ref = (uint8_t *) _ref;
 
-	ref = vis_alignaddr(ref);
-	do {	/* 4 cycles */
-		vis_ld64(ref[0], TMP0);
+        ref = vis_alignaddr(ref);
+        do {    /* 4 cycles */
+                vis_ld64(ref[0], TMP0);
 
-		vis_ld64(ref[8], TMP2);
-		ref += stride;
+                vis_ld64(ref[8], TMP2);
+                ref += stride;
 
-		/* stall */
+                /* stall */
 
-		vis_faligndata(TMP0, TMP2, REF_0);
-		vis_st64(REF_0, dest[0]);
-		dest += stride;
-	} while (--height);
+                vis_faligndata(TMP0, TMP2, REF_0);
+                vis_st64(REF_0, dest[0]);
+                dest += stride;
+        } while (--height);
 }
 
 
 static void MC_avg_o_16_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
+                             const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	int stride_8 = stride + 8;
+        uint8_t *ref = (uint8_t *) _ref;
+        int stride_8 = stride + 8;
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[0], TMP0);
+        vis_ld64(ref[0], TMP0);
 
-	vis_ld64(ref[8], TMP2);
+        vis_ld64(ref[8], TMP2);
 
-	vis_ld64(ref[16], TMP4);
+        vis_ld64(ref[16], TMP4);
 
-	vis_ld64(dest[0], DST_0);
+        vis_ld64(dest[0], DST_0);
 
-	vis_ld64(dest[8], DST_2);
+        vis_ld64(dest[8], DST_2);
 
-	vis_ld64(constants_fe[0], MASK_fe);
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_ld64(constants_fe[0], MASK_fe);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_ld64(constants_7f[0], MASK_7f);
-	vis_faligndata(TMP2, TMP4, REF_2);
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP2, TMP4, REF_2);
 
-	vis_ld64(constants128[0], CONST_128);
+        vis_ld64(constants128[0], CONST_128);
 
-	ref += stride;
-	height = (height >> 1) - 1;
+        ref += stride;
+        height = (height >> 1) - 1;
 
-	do {	/* 24 cycles */
-		vis_ld64(ref[0], TMP0);
-		vis_xor(DST_0, REF_0, TMP6);
+        do {    /* 24 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(DST_0, REF_0, TMP6);
 
-		vis_ld64_2(ref, 8, TMP2);
-		vis_and(TMP6, MASK_fe, TMP6);
+                vis_ld64_2(ref, 8, TMP2);
+                vis_and(TMP6, MASK_fe, TMP6);
 
-		vis_ld64_2(ref, 16, TMP4);
-		ref += stride;
-		vis_mul8x16(CONST_128, TMP6, TMP6);
-		vis_xor(DST_2, REF_2, TMP8);
+                vis_ld64_2(ref, 16, TMP4);
+                ref += stride;
+                vis_mul8x16(CONST_128, TMP6, TMP6);
+                vis_xor(DST_2, REF_2, TMP8);
 
-		vis_and(TMP8, MASK_fe, TMP8);
+                vis_and(TMP8, MASK_fe, TMP8);
 
-		vis_or(DST_0, REF_0, TMP10);
-		vis_ld64_2(dest, stride, DST_0);
-		vis_mul8x16(CONST_128, TMP8, TMP8);
+                vis_or(DST_0, REF_0, TMP10);
+                vis_ld64_2(dest, stride, DST_0);
+                vis_mul8x16(CONST_128, TMP8, TMP8);
 
-		vis_or(DST_2, REF_2, TMP12);
-		vis_ld64_2(dest, stride_8, DST_2);
+                vis_or(DST_2, REF_2, TMP12);
+                vis_ld64_2(dest, stride_8, DST_2);
 
-		vis_ld64(ref[0], TMP14);
-		vis_and(TMP6, MASK_7f, TMP6);
+                vis_ld64(ref[0], TMP14);
+                vis_and(TMP6, MASK_7f, TMP6);
 
-		vis_and(TMP8, MASK_7f, TMP8);
+                vis_and(TMP8, MASK_7f, TMP8);
 
-		vis_psub16(TMP10, TMP6, TMP6);
-		vis_st64(TMP6, dest[0]);
+                vis_psub16(TMP10, TMP6, TMP6);
+                vis_st64(TMP6, dest[0]);
 
-		vis_psub16(TMP12, TMP8, TMP8);
-		vis_st64_2(TMP8, dest, 8);
+                vis_psub16(TMP12, TMP8, TMP8);
+                vis_st64_2(TMP8, dest, 8);
 
-		dest += stride;
-		vis_ld64_2(ref, 8, TMP16);
-		vis_faligndata(TMP0, TMP2, REF_0);
+                dest += stride;
+                vis_ld64_2(ref, 8, TMP16);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_ld64_2(ref, 16, TMP18);
-		vis_faligndata(TMP2, TMP4, REF_2);
-		ref += stride;
+                vis_ld64_2(ref, 16, TMP18);
+                vis_faligndata(TMP2, TMP4, REF_2);
+                ref += stride;
 
-		vis_xor(DST_0, REF_0, TMP20);
+                vis_xor(DST_0, REF_0, TMP20);
 
-		vis_and(TMP20, MASK_fe, TMP20);
+                vis_and(TMP20, MASK_fe, TMP20);
 
-		vis_xor(DST_2, REF_2, TMP22);
-		vis_mul8x16(CONST_128, TMP20, TMP20);
+                vis_xor(DST_2, REF_2, TMP22);
+                vis_mul8x16(CONST_128, TMP20, TMP20);
 
-		vis_and(TMP22, MASK_fe, TMP22);
+                vis_and(TMP22, MASK_fe, TMP22);
 
-		vis_or(DST_0, REF_0, TMP24);
-		vis_mul8x16(CONST_128, TMP22, TMP22);
+                vis_or(DST_0, REF_0, TMP24);
+                vis_mul8x16(CONST_128, TMP22, TMP22);
 
-		vis_or(DST_2, REF_2, TMP26);
+                vis_or(DST_2, REF_2, TMP26);
 
-		vis_ld64_2(dest, stride, DST_0);
-		vis_faligndata(TMP14, TMP16, REF_0);
+                vis_ld64_2(dest, stride, DST_0);
+                vis_faligndata(TMP14, TMP16, REF_0);
 
-		vis_ld64_2(dest, stride_8, DST_2);
-		vis_faligndata(TMP16, TMP18, REF_2);
+                vis_ld64_2(dest, stride_8, DST_2);
+                vis_faligndata(TMP16, TMP18, REF_2);
 
-		vis_and(TMP20, MASK_7f, TMP20);
+                vis_and(TMP20, MASK_7f, TMP20);
 
-		vis_and(TMP22, MASK_7f, TMP22);
+                vis_and(TMP22, MASK_7f, TMP22);
 
-		vis_psub16(TMP24, TMP20, TMP20);
-		vis_st64(TMP20, dest[0]);
+                vis_psub16(TMP24, TMP20, TMP20);
+                vis_st64(TMP20, dest[0]);
 
-		vis_psub16(TMP26, TMP22, TMP22);
-		vis_st64_2(TMP22, dest, 8);
-		dest += stride;
-	} while (--height);
+                vis_psub16(TMP26, TMP22, TMP22);
+                vis_st64_2(TMP22, dest, 8);
+                dest += stride;
+        } while (--height);
 
-	vis_ld64(ref[0], TMP0);
-	vis_xor(DST_0, REF_0, TMP6);
+        vis_ld64(ref[0], TMP0);
+        vis_xor(DST_0, REF_0, TMP6);
 
-	vis_ld64_2(ref, 8, TMP2);
-	vis_and(TMP6, MASK_fe, TMP6);
+        vis_ld64_2(ref, 8, TMP2);
+        vis_and(TMP6, MASK_fe, TMP6);
 
-	vis_ld64_2(ref, 16, TMP4);
-	vis_mul8x16(CONST_128, TMP6, TMP6);
-	vis_xor(DST_2, REF_2, TMP8);
+        vis_ld64_2(ref, 16, TMP4);
+        vis_mul8x16(CONST_128, TMP6, TMP6);
+        vis_xor(DST_2, REF_2, TMP8);
 
-	vis_and(TMP8, MASK_fe, TMP8);
+        vis_and(TMP8, MASK_fe, TMP8);
 
-	vis_or(DST_0, REF_0, TMP10);
-	vis_ld64_2(dest, stride, DST_0);
-	vis_mul8x16(CONST_128, TMP8, TMP8);
+        vis_or(DST_0, REF_0, TMP10);
+        vis_ld64_2(dest, stride, DST_0);
+        vis_mul8x16(CONST_128, TMP8, TMP8);
 
-	vis_or(DST_2, REF_2, TMP12);
-	vis_ld64_2(dest, stride_8, DST_2);
+        vis_or(DST_2, REF_2, TMP12);
+        vis_ld64_2(dest, stride_8, DST_2);
 
-	vis_ld64(ref[0], TMP14);
-	vis_and(TMP6, MASK_7f, TMP6);
+        vis_ld64(ref[0], TMP14);
+        vis_and(TMP6, MASK_7f, TMP6);
 
-	vis_and(TMP8, MASK_7f, TMP8);
+        vis_and(TMP8, MASK_7f, TMP8);
 
-	vis_psub16(TMP10, TMP6, TMP6);
-	vis_st64(TMP6, dest[0]);
+        vis_psub16(TMP10, TMP6, TMP6);
+        vis_st64(TMP6, dest[0]);
 
-	vis_psub16(TMP12, TMP8, TMP8);
-	vis_st64_2(TMP8, dest, 8);
+        vis_psub16(TMP12, TMP8, TMP8);
+        vis_st64_2(TMP8, dest, 8);
 
-	dest += stride;
-	vis_faligndata(TMP0, TMP2, REF_0);
+        dest += stride;
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_faligndata(TMP2, TMP4, REF_2);
+        vis_faligndata(TMP2, TMP4, REF_2);
 
-	vis_xor(DST_0, REF_0, TMP20);
+        vis_xor(DST_0, REF_0, TMP20);
 
-	vis_and(TMP20, MASK_fe, TMP20);
+        vis_and(TMP20, MASK_fe, TMP20);
 
-	vis_xor(DST_2, REF_2, TMP22);
-	vis_mul8x16(CONST_128, TMP20, TMP20);
+        vis_xor(DST_2, REF_2, TMP22);
+        vis_mul8x16(CONST_128, TMP20, TMP20);
 
-	vis_and(TMP22, MASK_fe, TMP22);
+        vis_and(TMP22, MASK_fe, TMP22);
 
-	vis_or(DST_0, REF_0, TMP24);
-	vis_mul8x16(CONST_128, TMP22, TMP22);
+        vis_or(DST_0, REF_0, TMP24);
+        vis_mul8x16(CONST_128, TMP22, TMP22);
 
-	vis_or(DST_2, REF_2, TMP26);
+        vis_or(DST_2, REF_2, TMP26);
 
-	vis_and(TMP20, MASK_7f, TMP20);
+        vis_and(TMP20, MASK_7f, TMP20);
 
-	vis_and(TMP22, MASK_7f, TMP22);
+        vis_and(TMP22, MASK_7f, TMP22);
 
-	vis_psub16(TMP24, TMP20, TMP20);
-	vis_st64(TMP20, dest[0]);
+        vis_psub16(TMP24, TMP20, TMP20);
+        vis_st64(TMP20, dest[0]);
 
-	vis_psub16(TMP26, TMP22, TMP22);
-	vis_st64_2(TMP22, dest, 8);
+        vis_psub16(TMP26, TMP22, TMP22);
+        vis_st64_2(TMP22, dest, 8);
 }
 
 static void MC_avg_o_8_vis (uint8_t * dest, const uint8_t * _ref,
-			    const int stride, int height)
+                            const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
+        uint8_t *ref = (uint8_t *) _ref;
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[0], TMP0);
+        vis_ld64(ref[0], TMP0);
 
-	vis_ld64(ref[8], TMP2);
+        vis_ld64(ref[8], TMP2);
 
-	vis_ld64(dest[0], DST_0);
+        vis_ld64(dest[0], DST_0);
 
-	vis_ld64(constants_fe[0], MASK_fe);
+        vis_ld64(constants_fe[0], MASK_fe);
 
-	vis_ld64(constants_7f[0], MASK_7f);
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_ld64(constants128[0], CONST_128);
+        vis_ld64(constants128[0], CONST_128);
 
-	ref += stride;
-	height = (height >> 1) - 1;
+        ref += stride;
+        height = (height >> 1) - 1;
 
-	do {	/* 12 cycles */
-		vis_ld64(ref[0], TMP0);
-		vis_xor(DST_0, REF_0, TMP4);
+        do {    /* 12 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(DST_0, REF_0, TMP4);
 
-		vis_ld64(ref[8], TMP2);
-		vis_and(TMP4, MASK_fe, TMP4);
+                vis_ld64(ref[8], TMP2);
+                vis_and(TMP4, MASK_fe, TMP4);
 
-		vis_or(DST_0, REF_0, TMP6);
-		vis_ld64_2(dest, stride, DST_0);
-		ref += stride;
-		vis_mul8x16(CONST_128, TMP4, TMP4);
+                vis_or(DST_0, REF_0, TMP6);
+                vis_ld64_2(dest, stride, DST_0);
+                ref += stride;
+                vis_mul8x16(CONST_128, TMP4, TMP4);
 
-		vis_ld64(ref[0], TMP12);
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64(ref[0], TMP12);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_ld64(ref[8], TMP2);
-		vis_xor(DST_0, REF_0, TMP0);
-		ref += stride;
+                vis_ld64(ref[8], TMP2);
+                vis_xor(DST_0, REF_0, TMP0);
+                ref += stride;
 
-		vis_and(TMP0, MASK_fe, TMP0);
+                vis_and(TMP0, MASK_fe, TMP0);
 
-		vis_and(TMP4, MASK_7f, TMP4);
+                vis_and(TMP4, MASK_7f, TMP4);
 
-		vis_psub16(TMP6, TMP4, TMP4);
-		vis_st64(TMP4, dest[0]);
-		dest += stride;
-		vis_mul8x16(CONST_128, TMP0, TMP0);
+                vis_psub16(TMP6, TMP4, TMP4);
+                vis_st64(TMP4, dest[0]);
+                dest += stride;
+                vis_mul8x16(CONST_128, TMP0, TMP0);
 
-		vis_or(DST_0, REF_0, TMP6);
-		vis_ld64_2(dest, stride, DST_0);
+                vis_or(DST_0, REF_0, TMP6);
+                vis_ld64_2(dest, stride, DST_0);
 
-		vis_faligndata(TMP12, TMP2, REF_0);
+                vis_faligndata(TMP12, TMP2, REF_0);
 
-		vis_and(TMP0, MASK_7f, TMP0);
+                vis_and(TMP0, MASK_7f, TMP0);
 
-		vis_psub16(TMP6, TMP0, TMP4);
-		vis_st64(TMP4, dest[0]);
-		dest += stride;
-	} while (--height);
+                vis_psub16(TMP6, TMP0, TMP4);
+                vis_st64(TMP4, dest[0]);
+                dest += stride;
+        } while (--height);
 
-	vis_ld64(ref[0], TMP0);
-	vis_xor(DST_0, REF_0, TMP4);
+        vis_ld64(ref[0], TMP0);
+        vis_xor(DST_0, REF_0, TMP4);
 
-	vis_ld64(ref[8], TMP2);
-	vis_and(TMP4, MASK_fe, TMP4);
+        vis_ld64(ref[8], TMP2);
+        vis_and(TMP4, MASK_fe, TMP4);
 
-	vis_or(DST_0, REF_0, TMP6);
-	vis_ld64_2(dest, stride, DST_0);
-	vis_mul8x16(CONST_128, TMP4, TMP4);
+        vis_or(DST_0, REF_0, TMP6);
+        vis_ld64_2(dest, stride, DST_0);
+        vis_mul8x16(CONST_128, TMP4, TMP4);
 
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_xor(DST_0, REF_0, TMP0);
+        vis_xor(DST_0, REF_0, TMP0);
 
-	vis_and(TMP0, MASK_fe, TMP0);
+        vis_and(TMP0, MASK_fe, TMP0);
 
-	vis_and(TMP4, MASK_7f, TMP4);
+        vis_and(TMP4, MASK_7f, TMP4);
 
-	vis_psub16(TMP6, TMP4, TMP4);
-	vis_st64(TMP4, dest[0]);
-	dest += stride;
-	vis_mul8x16(CONST_128, TMP0, TMP0);
+        vis_psub16(TMP6, TMP4, TMP4);
+        vis_st64(TMP4, dest[0]);
+        dest += stride;
+        vis_mul8x16(CONST_128, TMP0, TMP0);
 
-	vis_or(DST_0, REF_0, TMP6);
+        vis_or(DST_0, REF_0, TMP6);
 
-	vis_and(TMP0, MASK_7f, TMP0);
+        vis_and(TMP0, MASK_7f, TMP0);
 
-	vis_psub16(TMP6, TMP0, TMP4);
-	vis_st64(TMP4, dest[0]);
+        vis_psub16(TMP6, TMP0, TMP4);
+        vis_st64(TMP4, dest[0]);
 }
 
 static void MC_put_x_16_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
+                             const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[0],    TMP0);
+        vis_ld64(ref[0],    TMP0);
 
-	vis_ld64_2(ref, 8,  TMP2);
+        vis_ld64_2(ref, 8,  TMP2);
 
-	vis_ld64_2(ref, 16, TMP4);
+        vis_ld64_2(ref, 16, TMP4);
 
-	vis_ld64(constants_fe[0], MASK_fe);
+        vis_ld64(constants_fe[0], MASK_fe);
 
-	vis_ld64(constants_7f[0], MASK_7f);
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_ld64(constants128[0], CONST_128);
-	vis_faligndata(TMP2, TMP4, REF_4);
+        vis_ld64(constants128[0], CONST_128);
+        vis_faligndata(TMP2, TMP4, REF_4);
 
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_2);
-		vis_faligndata(TMP2, TMP4, REF_6);
-	} else {
-		vis_src1(TMP2, REF_2);
-		vis_src1(TMP4, REF_6);
-	}
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_2);
+                vis_faligndata(TMP2, TMP4, REF_6);
+        } else {
+                vis_src1(TMP2, REF_2);
+                vis_src1(TMP4, REF_6);
+        }
 
-	ref += stride;
-	height = (height >> 1) - 1;
+        ref += stride;
+        height = (height >> 1) - 1;
 
-	do {	/* 34 cycles */
-		vis_ld64(ref[0],    TMP0);
-		vis_xor(REF_0, REF_2, TMP6);
+        do {    /* 34 cycles */
+                vis_ld64(ref[0],    TMP0);
+                vis_xor(REF_0, REF_2, TMP6);
 
-		vis_ld64_2(ref, 8,  TMP2);
-		vis_xor(REF_4, REF_6, TMP8);
+                vis_ld64_2(ref, 8,  TMP2);
+                vis_xor(REF_4, REF_6, TMP8);
 
-		vis_ld64_2(ref, 16, TMP4);
-		vis_and(TMP6, MASK_fe, TMP6);
-		ref += stride;
+                vis_ld64_2(ref, 16, TMP4);
+                vis_and(TMP6, MASK_fe, TMP6);
+                ref += stride;
 
-		vis_ld64(ref[0],    TMP14);
-		vis_mul8x16(CONST_128, TMP6, TMP6);
-		vis_and(TMP8, MASK_fe, TMP8);
+                vis_ld64(ref[0],    TMP14);
+                vis_mul8x16(CONST_128, TMP6, TMP6);
+                vis_and(TMP8, MASK_fe, TMP8);
 
-		vis_ld64_2(ref, 8,  TMP16);
-		vis_mul8x16(CONST_128, TMP8, TMP8);
-		vis_or(REF_0, REF_2, TMP10);
+                vis_ld64_2(ref, 8,  TMP16);
+                vis_mul8x16(CONST_128, TMP8, TMP8);
+                vis_or(REF_0, REF_2, TMP10);
 
-		vis_ld64_2(ref, 16, TMP18);
-		ref += stride;
-		vis_or(REF_4, REF_6, TMP12);
+                vis_ld64_2(ref, 16, TMP18);
+                ref += stride;
+                vis_or(REF_4, REF_6, TMP12);
 
-		vis_alignaddr_g0((void *)off);
+                vis_alignaddr_g0((void *)off);
 
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_faligndata(TMP2, TMP4, REF_4);
+                vis_faligndata(TMP2, TMP4, REF_4);
 
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_2);
-			vis_faligndata(TMP2, TMP4, REF_6);
-		} else {
-			vis_src1(TMP2, REF_2);
-			vis_src1(TMP4, REF_6);
-		}
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                        vis_faligndata(TMP2, TMP4, REF_6);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                        vis_src1(TMP4, REF_6);
+                }
 
-		vis_and(TMP6, MASK_7f, TMP6);
+                vis_and(TMP6, MASK_7f, TMP6);
 
-		vis_and(TMP8, MASK_7f, TMP8);
+                vis_and(TMP8, MASK_7f, TMP8);
 
-		vis_psub16(TMP10, TMP6, TMP6);
-		vis_st64(TMP6, dest[0]);
+                vis_psub16(TMP10, TMP6, TMP6);
+                vis_st64(TMP6, dest[0]);
 
-		vis_psub16(TMP12, TMP8, TMP8);
-		vis_st64_2(TMP8, dest, 8);
-		dest += stride;
+                vis_psub16(TMP12, TMP8, TMP8);
+                vis_st64_2(TMP8, dest, 8);
+                dest += stride;
 
-		vis_xor(REF_0, REF_2, TMP6);
+                vis_xor(REF_0, REF_2, TMP6);
 
-		vis_xor(REF_4, REF_6, TMP8);
+                vis_xor(REF_4, REF_6, TMP8);
 
-		vis_and(TMP6, MASK_fe, TMP6);
+                vis_and(TMP6, MASK_fe, TMP6);
 
-		vis_mul8x16(CONST_128, TMP6, TMP6);
-		vis_and(TMP8, MASK_fe, TMP8);
+                vis_mul8x16(CONST_128, TMP6, TMP6);
+                vis_and(TMP8, MASK_fe, TMP8);
 
-		vis_mul8x16(CONST_128, TMP8, TMP8);
-		vis_or(REF_0, REF_2, TMP10);
+                vis_mul8x16(CONST_128, TMP8, TMP8);
+                vis_or(REF_0, REF_2, TMP10);
 
-		vis_or(REF_4, REF_6, TMP12);
+                vis_or(REF_4, REF_6, TMP12);
 
-		vis_alignaddr_g0((void *)off);
+                vis_alignaddr_g0((void *)off);
 
-		vis_faligndata(TMP14, TMP16, REF_0);
+                vis_faligndata(TMP14, TMP16, REF_0);
 
-		vis_faligndata(TMP16, TMP18, REF_4);
+                vis_faligndata(TMP16, TMP18, REF_4);
 
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP14, TMP16, REF_2);
-			vis_faligndata(TMP16, TMP18, REF_6);
-		} else {
-			vis_src1(TMP16, REF_2);
-			vis_src1(TMP18, REF_6);
-		}
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP14, TMP16, REF_2);
+                        vis_faligndata(TMP16, TMP18, REF_6);
+                } else {
+                        vis_src1(TMP16, REF_2);
+                        vis_src1(TMP18, REF_6);
+                }
 
-		vis_and(TMP6, MASK_7f, TMP6);
+                vis_and(TMP6, MASK_7f, TMP6);
 
-		vis_and(TMP8, MASK_7f, TMP8);
+                vis_and(TMP8, MASK_7f, TMP8);
 
-		vis_psub16(TMP10, TMP6, TMP6);
-		vis_st64(TMP6, dest[0]);
+                vis_psub16(TMP10, TMP6, TMP6);
+                vis_st64(TMP6, dest[0]);
 
-		vis_psub16(TMP12, TMP8, TMP8);
-		vis_st64_2(TMP8, dest, 8);
-		dest += stride;
-	} while (--height);
+                vis_psub16(TMP12, TMP8, TMP8);
+                vis_st64_2(TMP8, dest, 8);
+                dest += stride;
+        } while (--height);
 
-	vis_ld64(ref[0],    TMP0);
-	vis_xor(REF_0, REF_2, TMP6);
+        vis_ld64(ref[0],    TMP0);
+        vis_xor(REF_0, REF_2, TMP6);
 
-	vis_ld64_2(ref, 8,  TMP2);
-	vis_xor(REF_4, REF_6, TMP8);
+        vis_ld64_2(ref, 8,  TMP2);
+        vis_xor(REF_4, REF_6, TMP8);
 
-	vis_ld64_2(ref, 16, TMP4);
-	vis_and(TMP6, MASK_fe, TMP6);
+        vis_ld64_2(ref, 16, TMP4);
+        vis_and(TMP6, MASK_fe, TMP6);
 
-	vis_mul8x16(CONST_128, TMP6, TMP6);
-	vis_and(TMP8, MASK_fe, TMP8);
+        vis_mul8x16(CONST_128, TMP6, TMP6);
+        vis_and(TMP8, MASK_fe, TMP8);
 
-	vis_mul8x16(CONST_128, TMP8, TMP8);
-	vis_or(REF_0, REF_2, TMP10);
+        vis_mul8x16(CONST_128, TMP8, TMP8);
+        vis_or(REF_0, REF_2, TMP10);
 
-	vis_or(REF_4, REF_6, TMP12);
+        vis_or(REF_4, REF_6, TMP12);
 
-	vis_alignaddr_g0((void *)off);
+        vis_alignaddr_g0((void *)off);
 
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_faligndata(TMP2, TMP4, REF_4);
+        vis_faligndata(TMP2, TMP4, REF_4);
 
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_2);
-		vis_faligndata(TMP2, TMP4, REF_6);
-	} else {
-		vis_src1(TMP2, REF_2);
-		vis_src1(TMP4, REF_6);
-	}
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_2);
+                vis_faligndata(TMP2, TMP4, REF_6);
+        } else {
+                vis_src1(TMP2, REF_2);
+                vis_src1(TMP4, REF_6);
+        }
 
-	vis_and(TMP6, MASK_7f, TMP6);
+        vis_and(TMP6, MASK_7f, TMP6);
 
-	vis_and(TMP8, MASK_7f, TMP8);
+        vis_and(TMP8, MASK_7f, TMP8);
 
-	vis_psub16(TMP10, TMP6, TMP6);
-	vis_st64(TMP6, dest[0]);
+        vis_psub16(TMP10, TMP6, TMP6);
+        vis_st64(TMP6, dest[0]);
 
-	vis_psub16(TMP12, TMP8, TMP8);
-	vis_st64_2(TMP8, dest, 8);
-	dest += stride;
+        vis_psub16(TMP12, TMP8, TMP8);
+        vis_st64_2(TMP8, dest, 8);
+        dest += stride;
 
-	vis_xor(REF_0, REF_2, TMP6);
+        vis_xor(REF_0, REF_2, TMP6);
 
-	vis_xor(REF_4, REF_6, TMP8);
+        vis_xor(REF_4, REF_6, TMP8);
 
-	vis_and(TMP6, MASK_fe, TMP6);
+        vis_and(TMP6, MASK_fe, TMP6);
 
-	vis_mul8x16(CONST_128, TMP6, TMP6);
-	vis_and(TMP8, MASK_fe, TMP8);
+        vis_mul8x16(CONST_128, TMP6, TMP6);
+        vis_and(TMP8, MASK_fe, TMP8);
 
-	vis_mul8x16(CONST_128, TMP8, TMP8);
-	vis_or(REF_0, REF_2, TMP10);
+        vis_mul8x16(CONST_128, TMP8, TMP8);
+        vis_or(REF_0, REF_2, TMP10);
 
-	vis_or(REF_4, REF_6, TMP12);
+        vis_or(REF_4, REF_6, TMP12);
 
-	vis_and(TMP6, MASK_7f, TMP6);
+        vis_and(TMP6, MASK_7f, TMP6);
 
-	vis_and(TMP8, MASK_7f, TMP8);
+        vis_and(TMP8, MASK_7f, TMP8);
 
-	vis_psub16(TMP10, TMP6, TMP6);
-	vis_st64(TMP6, dest[0]);
+        vis_psub16(TMP10, TMP6, TMP6);
+        vis_st64(TMP6, dest[0]);
 
-	vis_psub16(TMP12, TMP8, TMP8);
-	vis_st64_2(TMP8, dest, 8);
+        vis_psub16(TMP12, TMP8, TMP8);
+        vis_st64_2(TMP8, dest, 8);
 }
 
 static void MC_put_x_8_vis (uint8_t * dest, const uint8_t * _ref,
-			    const int stride, int height)
+                            const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[0], TMP0);
+        vis_ld64(ref[0], TMP0);
 
-	vis_ld64(ref[8], TMP2);
+        vis_ld64(ref[8], TMP2);
 
-	vis_ld64(constants_fe[0], MASK_fe);
+        vis_ld64(constants_fe[0], MASK_fe);
 
-	vis_ld64(constants_7f[0], MASK_7f);
+        vis_ld64(constants_7f[0], MASK_7f);
 
-	vis_ld64(constants128[0], CONST_128);
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_ld64(constants128[0], CONST_128);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_2);
-	} else {
-		vis_src1(TMP2, REF_2);
-	}
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_2);
+        } else {
+                vis_src1(TMP2, REF_2);
+        }
 
-	ref += stride;
-	height = (height >> 1) - 1;
+        ref += stride;
+        height = (height >> 1) - 1;
 
-	do {	/* 20 cycles */
-		vis_ld64(ref[0], TMP0);
-		vis_xor(REF_0, REF_2, TMP4);
+        do {    /* 20 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(REF_0, REF_2, TMP4);
 
-		vis_ld64_2(ref, 8, TMP2);
-		vis_and(TMP4, MASK_fe, TMP4);
-		ref += stride;
+                vis_ld64_2(ref, 8, TMP2);
+                vis_and(TMP4, MASK_fe, TMP4);
+                ref += stride;
 
-		vis_ld64(ref[0], TMP8);
-		vis_or(REF_0, REF_2, TMP6);
-		vis_mul8x16(CONST_128, TMP4, TMP4);
+                vis_ld64(ref[0], TMP8);
+                vis_or(REF_0, REF_2, TMP6);
+                vis_mul8x16(CONST_128, TMP4, TMP4);
 
-		vis_alignaddr_g0((void *)off);
+                vis_alignaddr_g0((void *)off);
 
-		vis_ld64_2(ref, 8, TMP10);
-		ref += stride;
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64_2(ref, 8, TMP10);
+                ref += stride;
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_2);
-		} else {
-			vis_src1(TMP2, REF_2);
-		}
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                }
 
-		vis_and(TMP4, MASK_7f, TMP4);
+                vis_and(TMP4, MASK_7f, TMP4);
 
-		vis_psub16(TMP6, TMP4, DST_0);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
+                vis_psub16(TMP6, TMP4, DST_0);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
 
-		vis_xor(REF_0, REF_2, TMP12);
+                vis_xor(REF_0, REF_2, TMP12);
 
-		vis_and(TMP12, MASK_fe, TMP12);
+                vis_and(TMP12, MASK_fe, TMP12);
 
-		vis_or(REF_0, REF_2, TMP14);
-		vis_mul8x16(CONST_128, TMP12, TMP12);
+                vis_or(REF_0, REF_2, TMP14);
+                vis_mul8x16(CONST_128, TMP12, TMP12);
 
-		vis_alignaddr_g0((void *)off);
-		vis_faligndata(TMP8, TMP10, REF_0);
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP8, TMP10, REF_2);
-		} else {
-			vis_src1(TMP10, REF_2);
-		}
+                vis_alignaddr_g0((void *)off);
+                vis_faligndata(TMP8, TMP10, REF_0);
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP8, TMP10, REF_2);
+                } else {
+                        vis_src1(TMP10, REF_2);
+                }
 
-		vis_and(TMP12, MASK_7f, TMP12);
+                vis_and(TMP12, MASK_7f, TMP12);
 
-		vis_psub16(TMP14, TMP12, DST_0);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
-	} while (--height);
+                vis_psub16(TMP14, TMP12, DST_0);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+        } while (--height);
 
-	vis_ld64(ref[0], TMP0);
-	vis_xor(REF_0, REF_2, TMP4);
+        vis_ld64(ref[0], TMP0);
+        vis_xor(REF_0, REF_2, TMP4);
 
-	vis_ld64_2(ref, 8, TMP2);
-	vis_and(TMP4, MASK_fe, TMP4);
+        vis_ld64_2(ref, 8, TMP2);
+        vis_and(TMP4, MASK_fe, TMP4);
 
-	vis_or(REF_0, REF_2, TMP6);
-	vis_mul8x16(CONST_128, TMP4, TMP4);
+        vis_or(REF_0, REF_2, TMP6);
+        vis_mul8x16(CONST_128, TMP4, TMP4);
 
-	vis_alignaddr_g0((void *)off);
+        vis_alignaddr_g0((void *)off);
 
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_2);
-	} else {
-		vis_src1(TMP2, REF_2);
-	}
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_2);
+        } else {
+                vis_src1(TMP2, REF_2);
+        }
 
-	vis_and(TMP4, MASK_7f, TMP4);
+        vis_and(TMP4, MASK_7f, TMP4);
 
-	vis_psub16(TMP6, TMP4, DST_0);
-	vis_st64(DST_0, dest[0]);
-	dest += stride;
+        vis_psub16(TMP6, TMP4, DST_0);
+        vis_st64(DST_0, dest[0]);
+        dest += stride;
 
-	vis_xor(REF_0, REF_2, TMP12);
+        vis_xor(REF_0, REF_2, TMP12);
 
-	vis_and(TMP12, MASK_fe, TMP12);
+        vis_and(TMP12, MASK_fe, TMP12);
 
-	vis_or(REF_0, REF_2, TMP14);
-	vis_mul8x16(CONST_128, TMP12, TMP12);
+        vis_or(REF_0, REF_2, TMP14);
+        vis_mul8x16(CONST_128, TMP12, TMP12);
 
-	vis_and(TMP12, MASK_7f, TMP12);
+        vis_and(TMP12, MASK_7f, TMP12);
 
-	vis_psub16(TMP14, TMP12, DST_0);
-	vis_st64(DST_0, dest[0]);
-	dest += stride;
+        vis_psub16(TMP14, TMP12, DST_0);
+        vis_st64(DST_0, dest[0]);
+        dest += stride;
 }
 
 static void MC_avg_x_16_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
+                             const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
 
-	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
 
-	vis_ld64(constants3[0], CONST_3);
-	vis_fzero(ZERO);
-	vis_ld64(constants256_512[0], CONST_256);
+        vis_ld64(constants3[0], CONST_3);
+        vis_fzero(ZERO);
+        vis_ld64(constants256_512[0], CONST_256);
 
-	ref = vis_alignaddr(ref);
-	do {	/* 26 cycles */
-		vis_ld64(ref[0], TMP0);
+        ref = vis_alignaddr(ref);
+        do {    /* 26 cycles */
+                vis_ld64(ref[0], TMP0);
 
-		vis_ld64(ref[8], TMP2);
+                vis_ld64(ref[8], TMP2);
 
-		vis_alignaddr_g0((void *)off);
+                vis_alignaddr_g0((void *)off);
 
-		vis_ld64(ref[16], TMP4);
+                vis_ld64(ref[16], TMP4);
 
-		vis_ld64(dest[0], DST_0);
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64(dest[0], DST_0);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_ld64(dest[8], DST_2);
-		vis_faligndata(TMP2, TMP4, REF_4);
+                vis_ld64(dest[8], DST_2);
+                vis_faligndata(TMP2, TMP4, REF_4);
 
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_2);
-			vis_faligndata(TMP2, TMP4, REF_6);
-		} else {
-			vis_src1(TMP2, REF_2);
-			vis_src1(TMP4, REF_6);
-		}
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                        vis_faligndata(TMP2, TMP4, REF_6);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                        vis_src1(TMP4, REF_6);
+                }
 
-		vis_mul8x16au(REF_0,   CONST_256, TMP0);
+                vis_mul8x16au(REF_0,   CONST_256, TMP0);
 
-		vis_pmerge(ZERO,     REF_2,     TMP4);
-		vis_mul8x16au(REF_0_1, CONST_256, TMP2);
+                vis_pmerge(ZERO,     REF_2,     TMP4);
+                vis_mul8x16au(REF_0_1, CONST_256, TMP2);
 
-		vis_pmerge(ZERO, REF_2_1, TMP6);
+                vis_pmerge(ZERO, REF_2_1, TMP6);
 
-		vis_padd16(TMP0, TMP4, TMP0);
+                vis_padd16(TMP0, TMP4, TMP0);
 
-		vis_mul8x16al(DST_0,   CONST_512, TMP4);
-		vis_padd16(TMP2, TMP6, TMP2);
+                vis_mul8x16al(DST_0,   CONST_512, TMP4);
+                vis_padd16(TMP2, TMP6, TMP2);
 
-		vis_mul8x16al(DST_1,   CONST_512, TMP6);
+                vis_mul8x16al(DST_1,   CONST_512, TMP6);
 
-		vis_mul8x16au(REF_6,   CONST_256, TMP12);
+                vis_mul8x16au(REF_6,   CONST_256, TMP12);
 
-		vis_padd16(TMP0, TMP4, TMP0);
-		vis_mul8x16au(REF_6_1, CONST_256, TMP14);
+                vis_padd16(TMP0, TMP4, TMP0);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP14);
 
-		vis_padd16(TMP2, TMP6, TMP2);
-		vis_mul8x16au(REF_4,   CONST_256, TMP16);
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_mul8x16au(REF_4,   CONST_256, TMP16);
 
-		vis_padd16(TMP0, CONST_3, TMP8);
-		vis_mul8x16au(REF_4_1, CONST_256, TMP18);
+                vis_padd16(TMP0, CONST_3, TMP8);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP18);
 
-		vis_padd16(TMP2, CONST_3, TMP10);
-		vis_pack16(TMP8, DST_0);
+                vis_padd16(TMP2, CONST_3, TMP10);
+                vis_pack16(TMP8, DST_0);
 
-		vis_pack16(TMP10, DST_1);
-		vis_padd16(TMP16, TMP12, TMP0);
+                vis_pack16(TMP10, DST_1);
+                vis_padd16(TMP16, TMP12, TMP0);
 
-		vis_st64(DST_0, dest[0]);
-		vis_mul8x16al(DST_2,   CONST_512, TMP4);
-		vis_padd16(TMP18, TMP14, TMP2);
+                vis_st64(DST_0, dest[0]);
+                vis_mul8x16al(DST_2,   CONST_512, TMP4);
+                vis_padd16(TMP18, TMP14, TMP2);
 
-		vis_mul8x16al(DST_3,   CONST_512, TMP6);
-		vis_padd16(TMP0, CONST_3, TMP0);
+                vis_mul8x16al(DST_3,   CONST_512, TMP6);
+                vis_padd16(TMP0, CONST_3, TMP0);
 
-		vis_padd16(TMP2, CONST_3, TMP2);
+                vis_padd16(TMP2, CONST_3, TMP2);
 
-		vis_padd16(TMP0, TMP4, TMP0);
+                vis_padd16(TMP0, TMP4, TMP0);
 
-		vis_padd16(TMP2, TMP6, TMP2);
-		vis_pack16(TMP0, DST_2);
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_pack16(TMP0, DST_2);
 
-		vis_pack16(TMP2, DST_3);
-		vis_st64(DST_2, dest[8]);
+                vis_pack16(TMP2, DST_3);
+                vis_st64(DST_2, dest[8]);
 
-		ref += stride;
-		dest += stride;
-	} while (--height);
+                ref += stride;
+                dest += stride;
+        } while (--height);
 }
 
 static void MC_avg_x_8_vis (uint8_t * dest, const uint8_t * _ref,
-			    const int stride, int height)
+                            const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
-	int stride_times_2 = stride << 1;
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_times_2 = stride << 1;
 
-	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
 
-	vis_ld64(constants3[0], CONST_3);
-	vis_fzero(ZERO);
-	vis_ld64(constants256_512[0], CONST_256);
+        vis_ld64(constants3[0], CONST_3);
+        vis_fzero(ZERO);
+        vis_ld64(constants256_512[0], CONST_256);
 
-	ref = vis_alignaddr(ref);
-	height >>= 2;
-	do {	/* 47 cycles */
-		vis_ld64(ref[0],   TMP0);
+        ref = vis_alignaddr(ref);
+        height >>= 2;
+        do {    /* 47 cycles */
+                vis_ld64(ref[0],   TMP0);
 
-		vis_ld64_2(ref, 8, TMP2);
-		ref += stride;
+                vis_ld64_2(ref, 8, TMP2);
+                ref += stride;
 
-		vis_alignaddr_g0((void *)off);
+                vis_alignaddr_g0((void *)off);
 
-		vis_ld64(ref[0],   TMP4);
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64(ref[0],   TMP4);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_ld64_2(ref, 8, TMP6);
-		ref += stride;
+                vis_ld64_2(ref, 8, TMP6);
+                ref += stride;
 
-		vis_ld64(ref[0],   TMP8);
+                vis_ld64(ref[0],   TMP8);
 
-		vis_ld64_2(ref, 8, TMP10);
-		ref += stride;
-		vis_faligndata(TMP4, TMP6, REF_4);
+                vis_ld64_2(ref, 8, TMP10);
+                ref += stride;
+                vis_faligndata(TMP4, TMP6, REF_4);
 
-		vis_ld64(ref[0],   TMP12);
+                vis_ld64(ref[0],   TMP12);
 
-		vis_ld64_2(ref, 8, TMP14);
-		ref += stride;
-		vis_faligndata(TMP8, TMP10, REF_S0);
+                vis_ld64_2(ref, 8, TMP14);
+                ref += stride;
+                vis_faligndata(TMP8, TMP10, REF_S0);
 
-		vis_faligndata(TMP12, TMP14, REF_S4);
+                vis_faligndata(TMP12, TMP14, REF_S4);
 
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
 
-			vis_ld64(dest[0], DST_0);
-			vis_faligndata(TMP0, TMP2, REF_2);
+                        vis_ld64(dest[0], DST_0);
+                        vis_faligndata(TMP0, TMP2, REF_2);
 
-			vis_ld64_2(dest, stride, DST_2);
-			vis_faligndata(TMP4, TMP6, REF_6);
+                        vis_ld64_2(dest, stride, DST_2);
+                        vis_faligndata(TMP4, TMP6, REF_6);
 
-			vis_faligndata(TMP8, TMP10, REF_S2);
+                        vis_faligndata(TMP8, TMP10, REF_S2);
 
-			vis_faligndata(TMP12, TMP14, REF_S6);
-		} else {
-			vis_ld64(dest[0], DST_0);
-			vis_src1(TMP2, REF_2);
+                        vis_faligndata(TMP12, TMP14, REF_S6);
+                } else {
+                        vis_ld64(dest[0], DST_0);
+                        vis_src1(TMP2, REF_2);
 
-			vis_ld64_2(dest, stride, DST_2);
-			vis_src1(TMP6, REF_6);
+                        vis_ld64_2(dest, stride, DST_2);
+                        vis_src1(TMP6, REF_6);
 
-			vis_src1(TMP10, REF_S2);
+                        vis_src1(TMP10, REF_S2);
 
-			vis_src1(TMP14, REF_S6);
-		}
+                        vis_src1(TMP14, REF_S6);
+                }
 
-		vis_pmerge(ZERO,     REF_0,     TMP0);
-		vis_mul8x16au(REF_0_1, CONST_256, TMP2);
+                vis_pmerge(ZERO,     REF_0,     TMP0);
+                vis_mul8x16au(REF_0_1, CONST_256, TMP2);
 
-		vis_pmerge(ZERO,     REF_2,     TMP4);
-		vis_mul8x16au(REF_2_1, CONST_256, TMP6);
+                vis_pmerge(ZERO,     REF_2,     TMP4);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP6);
 
-		vis_padd16(TMP0, CONST_3, TMP0);
-		vis_mul8x16al(DST_0,   CONST_512, TMP16);
+                vis_padd16(TMP0, CONST_3, TMP0);
+                vis_mul8x16al(DST_0,   CONST_512, TMP16);
 
-		vis_padd16(TMP2, CONST_3, TMP2);
-		vis_mul8x16al(DST_1,   CONST_512, TMP18);
+                vis_padd16(TMP2, CONST_3, TMP2);
+                vis_mul8x16al(DST_1,   CONST_512, TMP18);
 
-		vis_padd16(TMP0, TMP4, TMP0);
-		vis_mul8x16au(REF_4, CONST_256, TMP8);
+                vis_padd16(TMP0, TMP4, TMP0);
+                vis_mul8x16au(REF_4, CONST_256, TMP8);
 
-		vis_padd16(TMP2, TMP6, TMP2);
-		vis_mul8x16au(REF_4_1, CONST_256, TMP10);
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP10);
 
-		vis_padd16(TMP0, TMP16, TMP0);
-		vis_mul8x16au(REF_6, CONST_256, TMP12);
+                vis_padd16(TMP0, TMP16, TMP0);
+                vis_mul8x16au(REF_6, CONST_256, TMP12);
 
-		vis_padd16(TMP2, TMP18, TMP2);
-		vis_mul8x16au(REF_6_1, CONST_256, TMP14);
+                vis_padd16(TMP2, TMP18, TMP2);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP14);
 
-		vis_padd16(TMP8, CONST_3, TMP8);
-		vis_mul8x16al(DST_2, CONST_512, TMP16);
+                vis_padd16(TMP8, CONST_3, TMP8);
+                vis_mul8x16al(DST_2, CONST_512, TMP16);
 
-		vis_padd16(TMP8, TMP12, TMP8);
-		vis_mul8x16al(DST_3, CONST_512, TMP18);
+                vis_padd16(TMP8, TMP12, TMP8);
+                vis_mul8x16al(DST_3, CONST_512, TMP18);
 
-		vis_padd16(TMP10, TMP14, TMP10);
-		vis_pack16(TMP0, DST_0);
+                vis_padd16(TMP10, TMP14, TMP10);
+                vis_pack16(TMP0, DST_0);
 
-		vis_pack16(TMP2, DST_1);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
-		vis_padd16(TMP10, CONST_3, TMP10);
+                vis_pack16(TMP2, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+                vis_padd16(TMP10, CONST_3, TMP10);
 
-		vis_ld64_2(dest, stride, DST_0);
-		vis_padd16(TMP8, TMP16, TMP8);
+                vis_ld64_2(dest, stride, DST_0);
+                vis_padd16(TMP8, TMP16, TMP8);
 
-		vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/);
-		vis_padd16(TMP10, TMP18, TMP10);
-		vis_pack16(TMP8, DST_2);
+                vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/);
+                vis_padd16(TMP10, TMP18, TMP10);
+                vis_pack16(TMP8, DST_2);
 
-		vis_pack16(TMP10, DST_3);
-		vis_st64(DST_2, dest[0]);
-		dest += stride;
+                vis_pack16(TMP10, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
 
-		vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
-		vis_pmerge(ZERO,     REF_S0,     TMP0);
+                vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
+                vis_pmerge(ZERO,     REF_S0,     TMP0);
 
-		vis_pmerge(ZERO,     REF_S2,     TMP24);
-		vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
+                vis_pmerge(ZERO,     REF_S2,     TMP24);
+                vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
 
-		vis_padd16(TMP0, CONST_3, TMP0);
-		vis_mul8x16au(REF_S4, CONST_256, TMP8);
+                vis_padd16(TMP0, CONST_3, TMP0);
+                vis_mul8x16au(REF_S4, CONST_256, TMP8);
 
-		vis_padd16(TMP2, CONST_3, TMP2);
-		vis_mul8x16au(REF_S4_1, CONST_256, TMP10);
+                vis_padd16(TMP2, CONST_3, TMP2);
+                vis_mul8x16au(REF_S4_1, CONST_256, TMP10);
 
-		vis_padd16(TMP0, TMP24, TMP0);
-		vis_mul8x16au(REF_S6, CONST_256, TMP12);
+                vis_padd16(TMP0, TMP24, TMP0);
+                vis_mul8x16au(REF_S6, CONST_256, TMP12);
 
-		vis_padd16(TMP2, TMP6, TMP2);
-		vis_mul8x16au(REF_S6_1, CONST_256, TMP14);
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_mul8x16au(REF_S6_1, CONST_256, TMP14);
 
-		vis_padd16(TMP8, CONST_3, TMP8);
-		vis_mul8x16al(DST_0,   CONST_512, TMP16);
+                vis_padd16(TMP8, CONST_3, TMP8);
+                vis_mul8x16al(DST_0,   CONST_512, TMP16);
 
-		vis_padd16(TMP10, CONST_3, TMP10);
-		vis_mul8x16al(DST_1,   CONST_512, TMP18);
+                vis_padd16(TMP10, CONST_3, TMP10);
+                vis_mul8x16al(DST_1,   CONST_512, TMP18);
 
-		vis_padd16(TMP8, TMP12, TMP8);
-		vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20);
+                vis_padd16(TMP8, TMP12, TMP8);
+                vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20);
 
-		vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22);
-		vis_padd16(TMP0, TMP16, TMP0);
+                vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22);
+                vis_padd16(TMP0, TMP16, TMP0);
 
-		vis_padd16(TMP2, TMP18, TMP2);
-		vis_pack16(TMP0, DST_0);
+                vis_padd16(TMP2, TMP18, TMP2);
+                vis_pack16(TMP0, DST_0);
 
-		vis_padd16(TMP10, TMP14, TMP10);
-		vis_pack16(TMP2, DST_1);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
+                vis_padd16(TMP10, TMP14, TMP10);
+                vis_pack16(TMP2, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
 
-		vis_padd16(TMP8, TMP20, TMP8);
+                vis_padd16(TMP8, TMP20, TMP8);
 
-		vis_padd16(TMP10, TMP22, TMP10);
-		vis_pack16(TMP8, DST_2);
+                vis_padd16(TMP10, TMP22, TMP10);
+                vis_pack16(TMP8, DST_2);
 
-		vis_pack16(TMP10, DST_3);
-		vis_st64(DST_2, dest[0]);
-		dest += stride;
-	} while (--height);
+                vis_pack16(TMP10, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
+        } while (--height);
 }
 
 static void MC_put_y_16_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
+                             const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
+        uint8_t *ref = (uint8_t *) _ref;
 
-	ref = vis_alignaddr(ref);
-	vis_ld64(ref[0], TMP0);
+        ref = vis_alignaddr(ref);
+        vis_ld64(ref[0], TMP0);
 
-	vis_ld64_2(ref, 8, TMP2);
+        vis_ld64_2(ref, 8, TMP2);
 
-	vis_ld64_2(ref, 16, TMP4);
-	ref += stride;
+        vis_ld64_2(ref, 16, TMP4);
+        ref += stride;
 
-	vis_ld64(ref[0], TMP6);
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_ld64(ref[0], TMP6);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_ld64_2(ref, 8, TMP8);
-	vis_faligndata(TMP2, TMP4, REF_4);
+        vis_ld64_2(ref, 8, TMP8);
+        vis_faligndata(TMP2, TMP4, REF_4);
 
-	vis_ld64_2(ref, 16, TMP10);
-	ref += stride;
+        vis_ld64_2(ref, 16, TMP10);
+        ref += stride;
 
-	vis_ld64(constants_fe[0], MASK_fe);
-	vis_faligndata(TMP6, TMP8, REF_2);
+        vis_ld64(constants_fe[0], MASK_fe);
+        vis_faligndata(TMP6, TMP8, REF_2);
 
-	vis_ld64(constants_7f[0], MASK_7f);
-	vis_faligndata(TMP8, TMP10, REF_6);
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP8, TMP10, REF_6);
 
-	vis_ld64(constants128[0], CONST_128);
-	height = (height >> 1) - 1;
-	do {	/* 24 cycles */
-		vis_ld64(ref[0], TMP0);
-		vis_xor(REF_0, REF_2, TMP12);
+        vis_ld64(constants128[0], CONST_128);
+        height = (height >> 1) - 1;
+        do {    /* 24 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(REF_0, REF_2, TMP12);
 
-		vis_ld64_2(ref, 8, TMP2);
-		vis_xor(REF_4, REF_6, TMP16);
+                vis_ld64_2(ref, 8, TMP2);
+                vis_xor(REF_4, REF_6, TMP16);
 
-		vis_ld64_2(ref, 16, TMP4);
-		ref += stride;
-		vis_or(REF_0, REF_2, TMP14);
+                vis_ld64_2(ref, 16, TMP4);
+                ref += stride;
+                vis_or(REF_0, REF_2, TMP14);
 
-		vis_ld64(ref[0], TMP6);
-		vis_or(REF_4, REF_6, TMP18);
+                vis_ld64(ref[0], TMP6);
+                vis_or(REF_4, REF_6, TMP18);
 
-		vis_ld64_2(ref, 8, TMP8);
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64_2(ref, 8, TMP8);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_ld64_2(ref, 16, TMP10);
-		ref += stride;
-		vis_faligndata(TMP2, TMP4, REF_4);
+                vis_ld64_2(ref, 16, TMP10);
+                ref += stride;
+                vis_faligndata(TMP2, TMP4, REF_4);
 
-		vis_and(TMP12, MASK_fe, TMP12);
+                vis_and(TMP12, MASK_fe, TMP12);
 
-		vis_and(TMP16, MASK_fe, TMP16);
-		vis_mul8x16(CONST_128, TMP12, TMP12);
+                vis_and(TMP16, MASK_fe, TMP16);
+                vis_mul8x16(CONST_128, TMP12, TMP12);
 
-		vis_mul8x16(CONST_128, TMP16, TMP16);
-		vis_xor(REF_0, REF_2, TMP0);
+                vis_mul8x16(CONST_128, TMP16, TMP16);
+                vis_xor(REF_0, REF_2, TMP0);
 
-		vis_xor(REF_4, REF_6, TMP2);
+                vis_xor(REF_4, REF_6, TMP2);
 
-		vis_or(REF_0, REF_2, TMP20);
+                vis_or(REF_0, REF_2, TMP20);
 
-		vis_and(TMP12, MASK_7f, TMP12);
+                vis_and(TMP12, MASK_7f, TMP12);
 
-		vis_and(TMP16, MASK_7f, TMP16);
+                vis_and(TMP16, MASK_7f, TMP16);
 
-		vis_psub16(TMP14, TMP12, TMP12);
-		vis_st64(TMP12, dest[0]);
+                vis_psub16(TMP14, TMP12, TMP12);
+                vis_st64(TMP12, dest[0]);
 
-		vis_psub16(TMP18, TMP16, TMP16);
-		vis_st64_2(TMP16, dest, 8);
-		dest += stride;
+                vis_psub16(TMP18, TMP16, TMP16);
+                vis_st64_2(TMP16, dest, 8);
+                dest += stride;
 
-		vis_or(REF_4, REF_6, TMP18);
+                vis_or(REF_4, REF_6, TMP18);
 
-		vis_and(TMP0, MASK_fe, TMP0);
+                vis_and(TMP0, MASK_fe, TMP0);
 
-		vis_and(TMP2, MASK_fe, TMP2);
-		vis_mul8x16(CONST_128, TMP0, TMP0);
+                vis_and(TMP2, MASK_fe, TMP2);
+                vis_mul8x16(CONST_128, TMP0, TMP0);
 
-		vis_faligndata(TMP6, TMP8, REF_2);
-		vis_mul8x16(CONST_128, TMP2, TMP2);
+                vis_faligndata(TMP6, TMP8, REF_2);
+                vis_mul8x16(CONST_128, TMP2, TMP2);
 
-		vis_faligndata(TMP8, TMP10, REF_6);
+                vis_faligndata(TMP8, TMP10, REF_6);
 
-		vis_and(TMP0, MASK_7f, TMP0);
+                vis_and(TMP0, MASK_7f, TMP0);
 
-		vis_and(TMP2, MASK_7f, TMP2);
+                vis_and(TMP2, MASK_7f, TMP2);
 
-		vis_psub16(TMP20, TMP0, TMP0);
-		vis_st64(TMP0, dest[0]);
+                vis_psub16(TMP20, TMP0, TMP0);
+                vis_st64(TMP0, dest[0]);
 
-		vis_psub16(TMP18, TMP2, TMP2);
-		vis_st64_2(TMP2, dest, 8);
-		dest += stride;
-	} while (--height);
+                vis_psub16(TMP18, TMP2, TMP2);
+                vis_st64_2(TMP2, dest, 8);
+                dest += stride;
+        } while (--height);
 
-	vis_ld64(ref[0], TMP0);
-	vis_xor(REF_0, REF_2, TMP12);
+        vis_ld64(ref[0], TMP0);
+        vis_xor(REF_0, REF_2, TMP12);
 
-	vis_ld64_2(ref, 8, TMP2);
-	vis_xor(REF_4, REF_6, TMP16);
+        vis_ld64_2(ref, 8, TMP2);
+        vis_xor(REF_4, REF_6, TMP16);
 
-	vis_ld64_2(ref, 16, TMP4);
-	vis_or(REF_0, REF_2, TMP14);
+        vis_ld64_2(ref, 16, TMP4);
+        vis_or(REF_0, REF_2, TMP14);
 
-	vis_or(REF_4, REF_6, TMP18);
+        vis_or(REF_4, REF_6, TMP18);
 
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_faligndata(TMP2, TMP4, REF_4);
+        vis_faligndata(TMP2, TMP4, REF_4);
 
-	vis_and(TMP12, MASK_fe, TMP12);
+        vis_and(TMP12, MASK_fe, TMP12);
 
-	vis_and(TMP16, MASK_fe, TMP16);
-	vis_mul8x16(CONST_128, TMP12, TMP12);
+        vis_and(TMP16, MASK_fe, TMP16);
+        vis_mul8x16(CONST_128, TMP12, TMP12);
 
-	vis_mul8x16(CONST_128, TMP16, TMP16);
-	vis_xor(REF_0, REF_2, TMP0);
+        vis_mul8x16(CONST_128, TMP16, TMP16);
+        vis_xor(REF_0, REF_2, TMP0);
 
-	vis_xor(REF_4, REF_6, TMP2);
+        vis_xor(REF_4, REF_6, TMP2);
 
-	vis_or(REF_0, REF_2, TMP20);
+        vis_or(REF_0, REF_2, TMP20);
 
-	vis_and(TMP12, MASK_7f, TMP12);
+        vis_and(TMP12, MASK_7f, TMP12);
 
-	vis_and(TMP16, MASK_7f, TMP16);
+        vis_and(TMP16, MASK_7f, TMP16);
 
-	vis_psub16(TMP14, TMP12, TMP12);
-	vis_st64(TMP12, dest[0]);
+        vis_psub16(TMP14, TMP12, TMP12);
+        vis_st64(TMP12, dest[0]);
 
-	vis_psub16(TMP18, TMP16, TMP16);
-	vis_st64_2(TMP16, dest, 8);
-	dest += stride;
+        vis_psub16(TMP18, TMP16, TMP16);
+        vis_st64_2(TMP16, dest, 8);
+        dest += stride;
 
-	vis_or(REF_4, REF_6, TMP18);
+        vis_or(REF_4, REF_6, TMP18);
 
-	vis_and(TMP0, MASK_fe, TMP0);
+        vis_and(TMP0, MASK_fe, TMP0);
 
-	vis_and(TMP2, MASK_fe, TMP2);
-	vis_mul8x16(CONST_128, TMP0, TMP0);
+        vis_and(TMP2, MASK_fe, TMP2);
+        vis_mul8x16(CONST_128, TMP0, TMP0);
 
-	vis_mul8x16(CONST_128, TMP2, TMP2);
+        vis_mul8x16(CONST_128, TMP2, TMP2);
 
-	vis_and(TMP0, MASK_7f, TMP0);
+        vis_and(TMP0, MASK_7f, TMP0);
 
-	vis_and(TMP2, MASK_7f, TMP2);
+        vis_and(TMP2, MASK_7f, TMP2);
 
-	vis_psub16(TMP20, TMP0, TMP0);
-	vis_st64(TMP0, dest[0]);
+        vis_psub16(TMP20, TMP0, TMP0);
+        vis_st64(TMP0, dest[0]);
 
-	vis_psub16(TMP18, TMP2, TMP2);
-	vis_st64_2(TMP2, dest, 8);
+        vis_psub16(TMP18, TMP2, TMP2);
+        vis_st64_2(TMP2, dest, 8);
 }
 
 static void MC_put_y_8_vis (uint8_t * dest, const uint8_t * _ref,
-			    const int stride, int height)
+                            const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
+        uint8_t *ref = (uint8_t *) _ref;
 
-	ref = vis_alignaddr(ref);
-	vis_ld64(ref[0], TMP0);
+        ref = vis_alignaddr(ref);
+        vis_ld64(ref[0], TMP0);
 
-	vis_ld64_2(ref, 8, TMP2);
-	ref += stride;
+        vis_ld64_2(ref, 8, TMP2);
+        ref += stride;
 
-	vis_ld64(ref[0], TMP4);
+        vis_ld64(ref[0], TMP4);
 
-	vis_ld64_2(ref, 8, TMP6);
-	ref += stride;
+        vis_ld64_2(ref, 8, TMP6);
+        ref += stride;
 
-	vis_ld64(constants_fe[0], MASK_fe);
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_ld64(constants_fe[0], MASK_fe);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_ld64(constants_7f[0], MASK_7f);
-	vis_faligndata(TMP4, TMP6, REF_2);
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP4, TMP6, REF_2);
 
-	vis_ld64(constants128[0], CONST_128);
-	height = (height >> 1) - 1;
-	do {	/* 12 cycles */
-		vis_ld64(ref[0], TMP0);
-		vis_xor(REF_0, REF_2, TMP4);
+        vis_ld64(constants128[0], CONST_128);
+        height = (height >> 1) - 1;
+        do {    /* 12 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(REF_0, REF_2, TMP4);
 
-		vis_ld64_2(ref, 8, TMP2);
-		ref += stride;
-		vis_and(TMP4, MASK_fe, TMP4);
+                vis_ld64_2(ref, 8, TMP2);
+                ref += stride;
+                vis_and(TMP4, MASK_fe, TMP4);
 
-		vis_or(REF_0, REF_2, TMP6);
-		vis_mul8x16(CONST_128, TMP4, TMP4);
+                vis_or(REF_0, REF_2, TMP6);
+                vis_mul8x16(CONST_128, TMP4, TMP4);
 
-		vis_faligndata(TMP0, TMP2, REF_0);
-		vis_ld64(ref[0], TMP0);
+                vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64(ref[0], TMP0);
 
-		vis_ld64_2(ref, 8, TMP2);
-		ref += stride;
-		vis_xor(REF_0, REF_2, TMP12);
+                vis_ld64_2(ref, 8, TMP2);
+                ref += stride;
+                vis_xor(REF_0, REF_2, TMP12);
 
-		vis_and(TMP4, MASK_7f, TMP4);
+                vis_and(TMP4, MASK_7f, TMP4);
 
-		vis_and(TMP12, MASK_fe, TMP12);
+                vis_and(TMP12, MASK_fe, TMP12);
 
-		vis_mul8x16(CONST_128, TMP12, TMP12);
-		vis_or(REF_0, REF_2, TMP14);
+                vis_mul8x16(CONST_128, TMP12, TMP12);
+                vis_or(REF_0, REF_2, TMP14);
 
-		vis_psub16(TMP6, TMP4, DST_0);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
+                vis_psub16(TMP6, TMP4, DST_0);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
 
-		vis_faligndata(TMP0, TMP2, REF_2);
+                vis_faligndata(TMP0, TMP2, REF_2);
 
-		vis_and(TMP12, MASK_7f, TMP12);
+                vis_and(TMP12, MASK_7f, TMP12);
 
-		vis_psub16(TMP14, TMP12, DST_0);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
-	} while (--height);
+                vis_psub16(TMP14, TMP12, DST_0);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+        } while (--height);
 
-	vis_ld64(ref[0], TMP0);
-	vis_xor(REF_0, REF_2, TMP4);
+        vis_ld64(ref[0], TMP0);
+        vis_xor(REF_0, REF_2, TMP4);
 
-	vis_ld64_2(ref, 8, TMP2);
-	vis_and(TMP4, MASK_fe, TMP4);
+        vis_ld64_2(ref, 8, TMP2);
+        vis_and(TMP4, MASK_fe, TMP4);
 
-	vis_or(REF_0, REF_2, TMP6);
-	vis_mul8x16(CONST_128, TMP4, TMP4);
+        vis_or(REF_0, REF_2, TMP6);
+        vis_mul8x16(CONST_128, TMP4, TMP4);
 
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_xor(REF_0, REF_2, TMP12);
+        vis_xor(REF_0, REF_2, TMP12);
 
-	vis_and(TMP4, MASK_7f, TMP4);
+        vis_and(TMP4, MASK_7f, TMP4);
 
-	vis_and(TMP12, MASK_fe, TMP12);
+        vis_and(TMP12, MASK_fe, TMP12);
 
-	vis_mul8x16(CONST_128, TMP12, TMP12);
-	vis_or(REF_0, REF_2, TMP14);
+        vis_mul8x16(CONST_128, TMP12, TMP12);
+        vis_or(REF_0, REF_2, TMP14);
 
-	vis_psub16(TMP6, TMP4, DST_0);
-	vis_st64(DST_0, dest[0]);
-	dest += stride;
+        vis_psub16(TMP6, TMP4, DST_0);
+        vis_st64(DST_0, dest[0]);
+        dest += stride;
 
-	vis_and(TMP12, MASK_7f, TMP12);
+        vis_and(TMP12, MASK_7f, TMP12);
 
-	vis_psub16(TMP14, TMP12, DST_0);
-	vis_st64(DST_0, dest[0]);
+        vis_psub16(TMP14, TMP12, DST_0);
+        vis_st64(DST_0, dest[0]);
 }
 
 static void MC_avg_y_16_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
+                             const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	int stride_8 = stride + 8;
-	int stride_16 = stride + 16;
+        uint8_t *ref = (uint8_t *) _ref;
+        int stride_8 = stride + 8;
+        int stride_16 = stride + 16;
 
-	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[ 0], TMP0);
-	vis_fzero(ZERO);
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
 
-	vis_ld64(ref[ 8], TMP2);
+        vis_ld64(ref[ 8], TMP2);
 
-	vis_ld64(ref[16], TMP4);
+        vis_ld64(ref[16], TMP4);
 
-	vis_ld64(constants3[0], CONST_3);
-	vis_faligndata(TMP0, TMP2, REF_2);
+        vis_ld64(constants3[0], CONST_3);
+        vis_faligndata(TMP0, TMP2, REF_2);
 
-	vis_ld64(constants256_512[0], CONST_256);
-	vis_faligndata(TMP2, TMP4, REF_6);
-	height >>= 1;
+        vis_ld64(constants256_512[0], CONST_256);
+        vis_faligndata(TMP2, TMP4, REF_6);
+        height >>= 1;
 
-	do {	/* 31 cycles */
-		vis_ld64_2(ref, stride, TMP0);
-		vis_pmerge(ZERO,       REF_2,     TMP12);
-		vis_mul8x16au(REF_2_1, CONST_256, TMP14);
+        do {    /* 31 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_pmerge(ZERO,       REF_2,     TMP12);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP14);
 
-		vis_ld64_2(ref, stride_8, TMP2);
-		vis_pmerge(ZERO,       REF_6,     TMP16);
-		vis_mul8x16au(REF_6_1, CONST_256, TMP18);
+                vis_ld64_2(ref, stride_8, TMP2);
+                vis_pmerge(ZERO,       REF_6,     TMP16);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP18);
 
-		vis_ld64_2(ref, stride_16, TMP4);
-		ref += stride;
+                vis_ld64_2(ref, stride_16, TMP4);
+                ref += stride;
 
-		vis_ld64(dest[0], DST_0);
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64(dest[0], DST_0);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_ld64_2(dest, 8, DST_2);
-		vis_faligndata(TMP2, TMP4, REF_4);
+                vis_ld64_2(dest, 8, DST_2);
+                vis_faligndata(TMP2, TMP4, REF_4);
 
-		vis_ld64_2(ref, stride, TMP6);
-		vis_pmerge(ZERO,     REF_0,     TMP0);
-		vis_mul8x16au(REF_0_1, CONST_256, TMP2);
+                vis_ld64_2(ref, stride, TMP6);
+                vis_pmerge(ZERO,     REF_0,     TMP0);
+                vis_mul8x16au(REF_0_1, CONST_256, TMP2);
 
-		vis_ld64_2(ref, stride_8, TMP8);
-		vis_pmerge(ZERO,     REF_4,     TMP4);
+                vis_ld64_2(ref, stride_8, TMP8);
+                vis_pmerge(ZERO,     REF_4,     TMP4);
 
-		vis_ld64_2(ref, stride_16, TMP10);
-		ref += stride;
+                vis_ld64_2(ref, stride_16, TMP10);
+                ref += stride;
 
-		vis_ld64_2(dest, stride, REF_S0/*DST_4*/);
-		vis_faligndata(TMP6, TMP8, REF_2);
-		vis_mul8x16au(REF_4_1, CONST_256, TMP6);
+                vis_ld64_2(dest, stride, REF_S0/*DST_4*/);
+                vis_faligndata(TMP6, TMP8, REF_2);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP6);
 
-		vis_ld64_2(dest, stride_8, REF_S2/*DST_6*/);
-		vis_faligndata(TMP8, TMP10, REF_6);
-		vis_mul8x16al(DST_0,   CONST_512, TMP20);
+                vis_ld64_2(dest, stride_8, REF_S2/*DST_6*/);
+                vis_faligndata(TMP8, TMP10, REF_6);
+                vis_mul8x16al(DST_0,   CONST_512, TMP20);
 
-		vis_padd16(TMP0, CONST_3, TMP0);
-		vis_mul8x16al(DST_1,   CONST_512, TMP22);
+                vis_padd16(TMP0, CONST_3, TMP0);
+                vis_mul8x16al(DST_1,   CONST_512, TMP22);
 
-		vis_padd16(TMP2, CONST_3, TMP2);
-		vis_mul8x16al(DST_2,   CONST_512, TMP24);
+                vis_padd16(TMP2, CONST_3, TMP2);
+                vis_mul8x16al(DST_2,   CONST_512, TMP24);
 
-		vis_padd16(TMP4, CONST_3, TMP4);
-		vis_mul8x16al(DST_3,   CONST_512, TMP26);
+                vis_padd16(TMP4, CONST_3, TMP4);
+                vis_mul8x16al(DST_3,   CONST_512, TMP26);
 
-		vis_padd16(TMP6, CONST_3, TMP6);
+                vis_padd16(TMP6, CONST_3, TMP6);
 
-		vis_padd16(TMP12, TMP20, TMP12);
-		vis_mul8x16al(REF_S0,   CONST_512, TMP20);
+                vis_padd16(TMP12, TMP20, TMP12);
+                vis_mul8x16al(REF_S0,   CONST_512, TMP20);
 
-		vis_padd16(TMP14, TMP22, TMP14);
-		vis_mul8x16al(REF_S0_1, CONST_512, TMP22);
+                vis_padd16(TMP14, TMP22, TMP14);
+                vis_mul8x16al(REF_S0_1, CONST_512, TMP22);
 
-		vis_padd16(TMP16, TMP24, TMP16);
-		vis_mul8x16al(REF_S2,   CONST_512, TMP24);
+                vis_padd16(TMP16, TMP24, TMP16);
+                vis_mul8x16al(REF_S2,   CONST_512, TMP24);
 
-		vis_padd16(TMP18, TMP26, TMP18);
-		vis_mul8x16al(REF_S2_1, CONST_512, TMP26);
+                vis_padd16(TMP18, TMP26, TMP18);
+                vis_mul8x16al(REF_S2_1, CONST_512, TMP26);
 
-		vis_padd16(TMP12, TMP0, TMP12);
-		vis_mul8x16au(REF_2,   CONST_256, TMP28);
+                vis_padd16(TMP12, TMP0, TMP12);
+                vis_mul8x16au(REF_2,   CONST_256, TMP28);
 
-		vis_padd16(TMP14, TMP2, TMP14);
-		vis_mul8x16au(REF_2_1, CONST_256, TMP30);
+                vis_padd16(TMP14, TMP2, TMP14);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP30);
 
-		vis_padd16(TMP16, TMP4, TMP16);
-		vis_mul8x16au(REF_6,   CONST_256, REF_S4);
+                vis_padd16(TMP16, TMP4, TMP16);
+                vis_mul8x16au(REF_6,   CONST_256, REF_S4);
 
-		vis_padd16(TMP18, TMP6, TMP18);
-		vis_mul8x16au(REF_6_1, CONST_256, REF_S6);
+                vis_padd16(TMP18, TMP6, TMP18);
+                vis_mul8x16au(REF_6_1, CONST_256, REF_S6);
 
-		vis_pack16(TMP12, DST_0);
-		vis_padd16(TMP28, TMP0, TMP12);
+                vis_pack16(TMP12, DST_0);
+                vis_padd16(TMP28, TMP0, TMP12);
 
-		vis_pack16(TMP14, DST_1);
-		vis_st64(DST_0, dest[0]);
-		vis_padd16(TMP30, TMP2, TMP14);
+                vis_pack16(TMP14, DST_1);
+                vis_st64(DST_0, dest[0]);
+                vis_padd16(TMP30, TMP2, TMP14);
 
-		vis_pack16(TMP16, DST_2);
-		vis_padd16(REF_S4, TMP4, TMP16);
+                vis_pack16(TMP16, DST_2);
+                vis_padd16(REF_S4, TMP4, TMP16);
 
-		vis_pack16(TMP18, DST_3);
-		vis_st64_2(DST_2, dest, 8);
-		dest += stride;
-		vis_padd16(REF_S6, TMP6, TMP18);
+                vis_pack16(TMP18, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+                vis_padd16(REF_S6, TMP6, TMP18);
 
-		vis_padd16(TMP12, TMP20, TMP12);
+                vis_padd16(TMP12, TMP20, TMP12);
 
-		vis_padd16(TMP14, TMP22, TMP14);
-		vis_pack16(TMP12, DST_0);
+                vis_padd16(TMP14, TMP22, TMP14);
+                vis_pack16(TMP12, DST_0);
 
-		vis_padd16(TMP16, TMP24, TMP16);
-		vis_pack16(TMP14, DST_1);
-		vis_st64(DST_0, dest[0]);
+                vis_padd16(TMP16, TMP24, TMP16);
+                vis_pack16(TMP14, DST_1);
+                vis_st64(DST_0, dest[0]);
 
-		vis_padd16(TMP18, TMP26, TMP18);
-		vis_pack16(TMP16, DST_2);
+                vis_padd16(TMP18, TMP26, TMP18);
+                vis_pack16(TMP16, DST_2);
 
-		vis_pack16(TMP18, DST_3);
-		vis_st64_2(DST_2, dest, 8);
-		dest += stride;
-	} while (--height);
+                vis_pack16(TMP18, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+        } while (--height);
 }
 
 static void MC_avg_y_8_vis (uint8_t * dest, const uint8_t * _ref,
-			    const int stride, int height)
+                            const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	int stride_8 = stride + 8;
+        uint8_t *ref = (uint8_t *) _ref;
+        int stride_8 = stride + 8;
 
-	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[ 0], TMP0);
-	vis_fzero(ZERO);
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
 
-	vis_ld64(ref[ 8], TMP2);
+        vis_ld64(ref[ 8], TMP2);
 
-	vis_ld64(constants3[0], CONST_3);
-	vis_faligndata(TMP0, TMP2, REF_2);
+        vis_ld64(constants3[0], CONST_3);
+        vis_faligndata(TMP0, TMP2, REF_2);
 
-	vis_ld64(constants256_512[0], CONST_256);
+        vis_ld64(constants256_512[0], CONST_256);
 
-	height >>= 1;
-	do {	/* 20 cycles */
-		vis_ld64_2(ref, stride, TMP0);
-		vis_pmerge(ZERO,       REF_2,     TMP8);
-		vis_mul8x16au(REF_2_1, CONST_256, TMP10);
+        height >>= 1;
+        do {    /* 20 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_pmerge(ZERO,       REF_2,     TMP8);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP10);
 
-		vis_ld64_2(ref, stride_8, TMP2);
-		ref += stride;
+                vis_ld64_2(ref, stride_8, TMP2);
+                ref += stride;
 
-		vis_ld64(dest[0], DST_0);
+                vis_ld64(dest[0], DST_0);
 
-		vis_ld64_2(dest, stride, DST_2);
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64_2(dest, stride, DST_2);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_ld64_2(ref, stride, TMP4);
-		vis_mul8x16al(DST_0,   CONST_512, TMP16);
-		vis_pmerge(ZERO,       REF_0,     TMP12);
+                vis_ld64_2(ref, stride, TMP4);
+                vis_mul8x16al(DST_0,   CONST_512, TMP16);
+                vis_pmerge(ZERO,       REF_0,     TMP12);
 
-		vis_ld64_2(ref, stride_8, TMP6);
-		ref += stride;
-		vis_mul8x16al(DST_1,   CONST_512, TMP18);
-		vis_pmerge(ZERO,       REF_0_1,   TMP14);
+                vis_ld64_2(ref, stride_8, TMP6);
+                ref += stride;
+                vis_mul8x16al(DST_1,   CONST_512, TMP18);
+                vis_pmerge(ZERO,       REF_0_1,   TMP14);
 
-		vis_padd16(TMP12, CONST_3, TMP12);
-		vis_mul8x16al(DST_2,   CONST_512, TMP24);
+                vis_padd16(TMP12, CONST_3, TMP12);
+                vis_mul8x16al(DST_2,   CONST_512, TMP24);
 
-		vis_padd16(TMP14, CONST_3, TMP14);
-		vis_mul8x16al(DST_3,   CONST_512, TMP26);
+                vis_padd16(TMP14, CONST_3, TMP14);
+                vis_mul8x16al(DST_3,   CONST_512, TMP26);
 
-		vis_faligndata(TMP4, TMP6, REF_2);
+                vis_faligndata(TMP4, TMP6, REF_2);
 
-		vis_padd16(TMP8, TMP12, TMP8);
+                vis_padd16(TMP8, TMP12, TMP8);
 
-		vis_padd16(TMP10, TMP14, TMP10);
-		vis_mul8x16au(REF_2,   CONST_256, TMP20);
+                vis_padd16(TMP10, TMP14, TMP10);
+                vis_mul8x16au(REF_2,   CONST_256, TMP20);
 
-		vis_padd16(TMP8, TMP16, TMP0);
-		vis_mul8x16au(REF_2_1, CONST_256, TMP22);
+                vis_padd16(TMP8, TMP16, TMP0);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP22);
 
-		vis_padd16(TMP10, TMP18, TMP2);
-		vis_pack16(TMP0, DST_0);
+                vis_padd16(TMP10, TMP18, TMP2);
+                vis_pack16(TMP0, DST_0);
 
-		vis_pack16(TMP2, DST_1);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
-		vis_padd16(TMP12, TMP20, TMP12);
+                vis_pack16(TMP2, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+                vis_padd16(TMP12, TMP20, TMP12);
 
-		vis_padd16(TMP14, TMP22, TMP14);
+                vis_padd16(TMP14, TMP22, TMP14);
 
-		vis_padd16(TMP12, TMP24, TMP0);
+                vis_padd16(TMP12, TMP24, TMP0);
 
-		vis_padd16(TMP14, TMP26, TMP2);
-		vis_pack16(TMP0, DST_2);
+                vis_padd16(TMP14, TMP26, TMP2);
+                vis_pack16(TMP0, DST_2);
 
-		vis_pack16(TMP2, DST_3);
-		vis_st64(DST_2, dest[0]);
-		dest += stride;
-	} while (--height);
+                vis_pack16(TMP2, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
+        } while (--height);
 }
 
 static void MC_put_xy_16_vis (uint8_t * dest, const uint8_t * _ref,
-			      const int stride, int height)
+                              const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
-	int stride_8 = stride + 8;
-	int stride_16 = stride + 16;
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_8 = stride + 8;
+        int stride_16 = stride + 16;
 
-	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[ 0], TMP0);
-	vis_fzero(ZERO);
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
 
-	vis_ld64(ref[ 8], TMP2);
+        vis_ld64(ref[ 8], TMP2);
 
-	vis_ld64(ref[16], TMP4);
+        vis_ld64(ref[16], TMP4);
 
-	vis_ld64(constants2[0], CONST_2);
-	vis_faligndata(TMP0, TMP2, REF_S0);
+        vis_ld64(constants2[0], CONST_2);
+        vis_faligndata(TMP0, TMP2, REF_S0);
 
-	vis_ld64(constants256_512[0], CONST_256);
-	vis_faligndata(TMP2, TMP4, REF_S4);
+        vis_ld64(constants256_512[0], CONST_256);
+        vis_faligndata(TMP2, TMP4, REF_S4);
 
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_S2);
-		vis_faligndata(TMP2, TMP4, REF_S6);
-	} else {
-		vis_src1(TMP2, REF_S2);
-		vis_src1(TMP4, REF_S6);
-	}
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_S2);
+                vis_faligndata(TMP2, TMP4, REF_S6);
+        } else {
+                vis_src1(TMP2, REF_S2);
+                vis_src1(TMP4, REF_S6);
+        }
 
-	height >>= 1;
-	do {
-		vis_ld64_2(ref, stride, TMP0);
-		vis_mul8x16au(REF_S0, CONST_256, TMP12);
-		vis_pmerge(ZERO,      REF_S0_1,  TMP14);
+        height >>= 1;
+        do {
+                vis_ld64_2(ref, stride, TMP0);
+                vis_mul8x16au(REF_S0, CONST_256, TMP12);
+                vis_pmerge(ZERO,      REF_S0_1,  TMP14);
 
-		vis_alignaddr_g0((void *)off);
+                vis_alignaddr_g0((void *)off);
 
-		vis_ld64_2(ref, stride_8, TMP2);
-		vis_mul8x16au(REF_S2, CONST_256, TMP16);
-		vis_pmerge(ZERO,      REF_S2_1,  TMP18);
+                vis_ld64_2(ref, stride_8, TMP2);
+                vis_mul8x16au(REF_S2, CONST_256, TMP16);
+                vis_pmerge(ZERO,      REF_S2_1,  TMP18);
 
-		vis_ld64_2(ref, stride_16, TMP4);
-		ref += stride;
-		vis_mul8x16au(REF_S4, CONST_256, TMP20);
-		vis_pmerge(ZERO,      REF_S4_1,  TMP22);
+                vis_ld64_2(ref, stride_16, TMP4);
+                ref += stride;
+                vis_mul8x16au(REF_S4, CONST_256, TMP20);
+                vis_pmerge(ZERO,      REF_S4_1,  TMP22);
 
-		vis_ld64_2(ref, stride, TMP6);
-		vis_mul8x16au(REF_S6, CONST_256, TMP24);
-		vis_pmerge(ZERO,      REF_S6_1,  TMP26);
+                vis_ld64_2(ref, stride, TMP6);
+                vis_mul8x16au(REF_S6, CONST_256, TMP24);
+                vis_pmerge(ZERO,      REF_S6_1,  TMP26);
 
-		vis_ld64_2(ref, stride_8, TMP8);
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64_2(ref, stride_8, TMP8);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_ld64_2(ref, stride_16, TMP10);
-		ref += stride;
-		vis_faligndata(TMP2, TMP4, REF_4);
+                vis_ld64_2(ref, stride_16, TMP10);
+                ref += stride;
+                vis_faligndata(TMP2, TMP4, REF_4);
 
-		vis_faligndata(TMP6, TMP8, REF_S0);
+                vis_faligndata(TMP6, TMP8, REF_S0);
 
-		vis_faligndata(TMP8, TMP10, REF_S4);
+                vis_faligndata(TMP8, TMP10, REF_S4);
 
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_2);
-			vis_faligndata(TMP2, TMP4, REF_6);
-			vis_faligndata(TMP6, TMP8, REF_S2);
-			vis_faligndata(TMP8, TMP10, REF_S6);
-		} else {
-			vis_src1(TMP2, REF_2);
-			vis_src1(TMP4, REF_6);
-			vis_src1(TMP8, REF_S2);
-			vis_src1(TMP10, REF_S6);
-		}
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                        vis_faligndata(TMP2, TMP4, REF_6);
+                        vis_faligndata(TMP6, TMP8, REF_S2);
+                        vis_faligndata(TMP8, TMP10, REF_S6);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                        vis_src1(TMP4, REF_6);
+                        vis_src1(TMP8, REF_S2);
+                        vis_src1(TMP10, REF_S6);
+                }
 
-		vis_mul8x16au(REF_0, CONST_256, TMP0);
-		vis_pmerge(ZERO,      REF_0_1,  TMP2);
+                vis_mul8x16au(REF_0, CONST_256, TMP0);
+                vis_pmerge(ZERO,      REF_0_1,  TMP2);
 
-		vis_mul8x16au(REF_2, CONST_256, TMP4);
-		vis_pmerge(ZERO,      REF_2_1,  TMP6);
+                vis_mul8x16au(REF_2, CONST_256, TMP4);
+                vis_pmerge(ZERO,      REF_2_1,  TMP6);
 
-		vis_padd16(TMP0, CONST_2, TMP8);
-		vis_mul8x16au(REF_4, CONST_256, TMP0);
+                vis_padd16(TMP0, CONST_2, TMP8);
+                vis_mul8x16au(REF_4, CONST_256, TMP0);
 
-		vis_padd16(TMP2, CONST_2, TMP10);
-		vis_mul8x16au(REF_4_1, CONST_256, TMP2);
+                vis_padd16(TMP2, CONST_2, TMP10);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP2);
 
-		vis_padd16(TMP8, TMP4, TMP8);
-		vis_mul8x16au(REF_6, CONST_256, TMP4);
+                vis_padd16(TMP8, TMP4, TMP8);
+                vis_mul8x16au(REF_6, CONST_256, TMP4);
 
-		vis_padd16(TMP10, TMP6, TMP10);
-		vis_mul8x16au(REF_6_1, CONST_256, TMP6);
+                vis_padd16(TMP10, TMP6, TMP10);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP6);
 
-		vis_padd16(TMP12, TMP8, TMP12);
+                vis_padd16(TMP12, TMP8, TMP12);
 
-		vis_padd16(TMP14, TMP10, TMP14);
+                vis_padd16(TMP14, TMP10, TMP14);
 
-		vis_padd16(TMP12, TMP16, TMP12);
+                vis_padd16(TMP12, TMP16, TMP12);
 
-		vis_padd16(TMP14, TMP18, TMP14);
-		vis_pack16(TMP12, DST_0);
+                vis_padd16(TMP14, TMP18, TMP14);
+                vis_pack16(TMP12, DST_0);
 
-		vis_pack16(TMP14, DST_1);
-		vis_st64(DST_0, dest[0]);
-		vis_padd16(TMP0, CONST_2, TMP12);
+                vis_pack16(TMP14, DST_1);
+                vis_st64(DST_0, dest[0]);
+                vis_padd16(TMP0, CONST_2, TMP12);
 
-		vis_mul8x16au(REF_S0, CONST_256, TMP0);
-		vis_padd16(TMP2, CONST_2, TMP14);
+                vis_mul8x16au(REF_S0, CONST_256, TMP0);
+                vis_padd16(TMP2, CONST_2, TMP14);
 
-		vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
-		vis_padd16(TMP12, TMP4, TMP12);
+                vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
+                vis_padd16(TMP12, TMP4, TMP12);
 
-		vis_mul8x16au(REF_S2, CONST_256, TMP4);
-		vis_padd16(TMP14, TMP6, TMP14);
+                vis_mul8x16au(REF_S2, CONST_256, TMP4);
+                vis_padd16(TMP14, TMP6, TMP14);
 
-		vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
-		vis_padd16(TMP20, TMP12, TMP20);
+                vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
+                vis_padd16(TMP20, TMP12, TMP20);
 
-		vis_padd16(TMP22, TMP14, TMP22);
+                vis_padd16(TMP22, TMP14, TMP22);
 
-		vis_padd16(TMP20, TMP24, TMP20);
+                vis_padd16(TMP20, TMP24, TMP20);
 
-		vis_padd16(TMP22, TMP26, TMP22);
-		vis_pack16(TMP20, DST_2);
+                vis_padd16(TMP22, TMP26, TMP22);
+                vis_pack16(TMP20, DST_2);
 
-		vis_pack16(TMP22, DST_3);
-		vis_st64_2(DST_2, dest, 8);
-		dest += stride;
-		vis_padd16(TMP0, TMP4, TMP24);
+                vis_pack16(TMP22, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+                vis_padd16(TMP0, TMP4, TMP24);
 
-		vis_mul8x16au(REF_S4, CONST_256, TMP0);
-		vis_padd16(TMP2, TMP6, TMP26);
+                vis_mul8x16au(REF_S4, CONST_256, TMP0);
+                vis_padd16(TMP2, TMP6, TMP26);
 
-		vis_mul8x16au(REF_S4_1, CONST_256, TMP2);
-		vis_padd16(TMP24, TMP8, TMP24);
+                vis_mul8x16au(REF_S4_1, CONST_256, TMP2);
+                vis_padd16(TMP24, TMP8, TMP24);
 
-		vis_padd16(TMP26, TMP10, TMP26);
-		vis_pack16(TMP24, DST_0);
+                vis_padd16(TMP26, TMP10, TMP26);
+                vis_pack16(TMP24, DST_0);
 
-		vis_pack16(TMP26, DST_1);
-		vis_st64(DST_0, dest[0]);
-		vis_pmerge(ZERO, REF_S6, TMP4);
+                vis_pack16(TMP26, DST_1);
+                vis_st64(DST_0, dest[0]);
+                vis_pmerge(ZERO, REF_S6, TMP4);
 
-		vis_pmerge(ZERO,      REF_S6_1,  TMP6);
+                vis_pmerge(ZERO,      REF_S6_1,  TMP6);
 
-		vis_padd16(TMP0, TMP4, TMP0);
+                vis_padd16(TMP0, TMP4, TMP0);
 
-		vis_padd16(TMP2, TMP6, TMP2);
+                vis_padd16(TMP2, TMP6, TMP2);
 
-		vis_padd16(TMP0, TMP12, TMP0);
+                vis_padd16(TMP0, TMP12, TMP0);
 
-		vis_padd16(TMP2, TMP14, TMP2);
-		vis_pack16(TMP0, DST_2);
+                vis_padd16(TMP2, TMP14, TMP2);
+                vis_pack16(TMP0, DST_2);
 
-		vis_pack16(TMP2, DST_3);
-		vis_st64_2(DST_2, dest, 8);
-		dest += stride;
-	} while (--height);
+                vis_pack16(TMP2, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+        } while (--height);
 }
 
 static void MC_put_xy_8_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
+                             const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
-	int stride_8 = stride + 8;
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_8 = stride + 8;
 
-	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[ 0], TMP0);
-	vis_fzero(ZERO);
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
 
-	vis_ld64(ref[ 8], TMP2);
+        vis_ld64(ref[ 8], TMP2);
 
-	vis_ld64(constants2[0], CONST_2);
+        vis_ld64(constants2[0], CONST_2);
 
-	vis_ld64(constants256_512[0], CONST_256);
-	vis_faligndata(TMP0, TMP2, REF_S0);
+        vis_ld64(constants256_512[0], CONST_256);
+        vis_faligndata(TMP0, TMP2, REF_S0);
 
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_S2);
-	} else {
-		vis_src1(TMP2, REF_S2);
-	}
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_S2);
+        } else {
+                vis_src1(TMP2, REF_S2);
+        }
 
-	height >>= 1;
-	do {	/* 26 cycles */
-		vis_ld64_2(ref, stride, TMP0);
-		vis_mul8x16au(REF_S0,   CONST_256, TMP8);
-		vis_pmerge(ZERO,        REF_S2,    TMP12);
+        height >>= 1;
+        do {    /* 26 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_mul8x16au(REF_S0,   CONST_256, TMP8);
+                vis_pmerge(ZERO,        REF_S2,    TMP12);
 
-		vis_alignaddr_g0((void *)off);
+                vis_alignaddr_g0((void *)off);
 
-		vis_ld64_2(ref, stride_8, TMP2);
-		ref += stride;
-		vis_mul8x16au(REF_S0_1, CONST_256, TMP10);
-		vis_pmerge(ZERO,        REF_S2_1,  TMP14);
+                vis_ld64_2(ref, stride_8, TMP2);
+                ref += stride;
+                vis_mul8x16au(REF_S0_1, CONST_256, TMP10);
+                vis_pmerge(ZERO,        REF_S2_1,  TMP14);
 
-		vis_ld64_2(ref, stride, TMP4);
+                vis_ld64_2(ref, stride, TMP4);
 
-		vis_ld64_2(ref, stride_8, TMP6);
-		ref += stride;
-		vis_faligndata(TMP0, TMP2, REF_S4);
+                vis_ld64_2(ref, stride_8, TMP6);
+                ref += stride;
+                vis_faligndata(TMP0, TMP2, REF_S4);
 
-		vis_pmerge(ZERO, REF_S4, TMP18);
+                vis_pmerge(ZERO, REF_S4, TMP18);
 
-		vis_pmerge(ZERO, REF_S4_1, TMP20);
+                vis_pmerge(ZERO, REF_S4_1, TMP20);
 
-		vis_faligndata(TMP4, TMP6, REF_S0);
+                vis_faligndata(TMP4, TMP6, REF_S0);
 
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_S6);
-			vis_faligndata(TMP4, TMP6, REF_S2);
-		} else {
-			vis_src1(TMP2, REF_S6);
-			vis_src1(TMP6, REF_S2);
-		}
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_S6);
+                        vis_faligndata(TMP4, TMP6, REF_S2);
+                } else {
+                        vis_src1(TMP2, REF_S6);
+                        vis_src1(TMP6, REF_S2);
+                }
 
-		vis_padd16(TMP18, CONST_2, TMP18);
-		vis_mul8x16au(REF_S6,   CONST_256, TMP22);
+                vis_padd16(TMP18, CONST_2, TMP18);
+                vis_mul8x16au(REF_S6,   CONST_256, TMP22);
 
-		vis_padd16(TMP20, CONST_2, TMP20);
-		vis_mul8x16au(REF_S6_1, CONST_256, TMP24);
+                vis_padd16(TMP20, CONST_2, TMP20);
+                vis_mul8x16au(REF_S6_1, CONST_256, TMP24);
 
-		vis_mul8x16au(REF_S0,   CONST_256, TMP26);
-		vis_pmerge(ZERO, REF_S0_1, TMP28);
+                vis_mul8x16au(REF_S0,   CONST_256, TMP26);
+                vis_pmerge(ZERO, REF_S0_1, TMP28);
 
-		vis_mul8x16au(REF_S2,   CONST_256, TMP30);
-		vis_padd16(TMP18, TMP22, TMP18);
+                vis_mul8x16au(REF_S2,   CONST_256, TMP30);
+                vis_padd16(TMP18, TMP22, TMP18);
 
-		vis_mul8x16au(REF_S2_1, CONST_256, TMP32);
-		vis_padd16(TMP20, TMP24, TMP20);
+                vis_mul8x16au(REF_S2_1, CONST_256, TMP32);
+                vis_padd16(TMP20, TMP24, TMP20);
 
-		vis_padd16(TMP8,  TMP18, TMP8);
+                vis_padd16(TMP8,  TMP18, TMP8);
 
-		vis_padd16(TMP10, TMP20, TMP10);
+                vis_padd16(TMP10, TMP20, TMP10);
 
-		vis_padd16(TMP8,  TMP12, TMP8);
+                vis_padd16(TMP8,  TMP12, TMP8);
 
-		vis_padd16(TMP10, TMP14, TMP10);
-		vis_pack16(TMP8,  DST_0);
+                vis_padd16(TMP10, TMP14, TMP10);
+                vis_pack16(TMP8,  DST_0);
 
-		vis_pack16(TMP10, DST_1);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
-		vis_padd16(TMP18, TMP26, TMP18);
+                vis_pack16(TMP10, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+                vis_padd16(TMP18, TMP26, TMP18);
 
-		vis_padd16(TMP20, TMP28, TMP20);
+                vis_padd16(TMP20, TMP28, TMP20);
 
-		vis_padd16(TMP18, TMP30, TMP18);
+                vis_padd16(TMP18, TMP30, TMP18);
 
-		vis_padd16(TMP20, TMP32, TMP20);
-		vis_pack16(TMP18, DST_2);
+                vis_padd16(TMP20, TMP32, TMP20);
+                vis_pack16(TMP18, DST_2);
 
-		vis_pack16(TMP20, DST_3);
-		vis_st64(DST_2, dest[0]);
-		dest += stride;
-	} while (--height);
+                vis_pack16(TMP20, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
+        } while (--height);
 }
 
 static void MC_avg_xy_16_vis (uint8_t * dest, const uint8_t * _ref,
-			      const int stride, int height)
+                              const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
-	int stride_8 = stride + 8;
-	int stride_16 = stride + 16;
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_8 = stride + 8;
+        int stride_16 = stride + 16;
 
-	vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
+        vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[ 0], TMP0);
-	vis_fzero(ZERO);
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
 
-	vis_ld64(ref[ 8], TMP2);
+        vis_ld64(ref[ 8], TMP2);
 
-	vis_ld64(ref[16], TMP4);
+        vis_ld64(ref[16], TMP4);
 
-	vis_ld64(constants6[0], CONST_6);
-	vis_faligndata(TMP0, TMP2, REF_S0);
+        vis_ld64(constants6[0], CONST_6);
+        vis_faligndata(TMP0, TMP2, REF_S0);
 
-	vis_ld64(constants256_1024[0], CONST_256);
-	vis_faligndata(TMP2, TMP4, REF_S4);
+        vis_ld64(constants256_1024[0], CONST_256);
+        vis_faligndata(TMP2, TMP4, REF_S4);
 
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_S2);
-		vis_faligndata(TMP2, TMP4, REF_S6);
-	} else {
-		vis_src1(TMP2, REF_S2);
-		vis_src1(TMP4, REF_S6);
-	}
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_S2);
+                vis_faligndata(TMP2, TMP4, REF_S6);
+        } else {
+                vis_src1(TMP2, REF_S2);
+                vis_src1(TMP4, REF_S6);
+        }
 
-	height >>= 1;
-	do {	/* 55 cycles */
-		vis_ld64_2(ref, stride, TMP0);
-		vis_mul8x16au(REF_S0, CONST_256, TMP12);
-		vis_pmerge(ZERO,      REF_S0_1,  TMP14);
+        height >>= 1;
+        do {    /* 55 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_mul8x16au(REF_S0, CONST_256, TMP12);
+                vis_pmerge(ZERO,      REF_S0_1,  TMP14);
 
-		vis_alignaddr_g0((void *)off);
+                vis_alignaddr_g0((void *)off);
 
-		vis_ld64_2(ref, stride_8, TMP2);
-		vis_mul8x16au(REF_S2, CONST_256, TMP16);
-		vis_pmerge(ZERO,      REF_S2_1,  TMP18);
+                vis_ld64_2(ref, stride_8, TMP2);
+                vis_mul8x16au(REF_S2, CONST_256, TMP16);
+                vis_pmerge(ZERO,      REF_S2_1,  TMP18);
 
-		vis_ld64_2(ref, stride_16, TMP4);
-		ref += stride;
-		vis_mul8x16au(REF_S4, CONST_256, TMP20);
-		vis_pmerge(ZERO,      REF_S4_1,  TMP22);
+                vis_ld64_2(ref, stride_16, TMP4);
+                ref += stride;
+                vis_mul8x16au(REF_S4, CONST_256, TMP20);
+                vis_pmerge(ZERO,      REF_S4_1,  TMP22);
 
-		vis_ld64_2(ref, stride, TMP6);
-		vis_mul8x16au(REF_S6, CONST_256, TMP24);
-		vis_pmerge(ZERO,      REF_S6_1,  TMP26);
+                vis_ld64_2(ref, stride, TMP6);
+                vis_mul8x16au(REF_S6, CONST_256, TMP24);
+                vis_pmerge(ZERO,      REF_S6_1,  TMP26);
 
-		vis_ld64_2(ref, stride_8, TMP8);
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64_2(ref, stride_8, TMP8);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_ld64_2(ref, stride_16, TMP10);
-		ref += stride;
-		vis_faligndata(TMP2, TMP4, REF_4);
+                vis_ld64_2(ref, stride_16, TMP10);
+                ref += stride;
+                vis_faligndata(TMP2, TMP4, REF_4);
 
-		vis_ld64(dest[0], DST_0);
-		vis_faligndata(TMP6, TMP8, REF_S0);
+                vis_ld64(dest[0], DST_0);
+                vis_faligndata(TMP6, TMP8, REF_S0);
 
-		vis_ld64_2(dest, 8, DST_2);
-		vis_faligndata(TMP8, TMP10, REF_S4);
+                vis_ld64_2(dest, 8, DST_2);
+                vis_faligndata(TMP8, TMP10, REF_S4);
 
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_2);
-			vis_faligndata(TMP2, TMP4, REF_6);
-			vis_faligndata(TMP6, TMP8, REF_S2);
-			vis_faligndata(TMP8, TMP10, REF_S6);
-		} else {
-			vis_src1(TMP2, REF_2);
-			vis_src1(TMP4, REF_6);
-			vis_src1(TMP8, REF_S2);
-			vis_src1(TMP10, REF_S6);
-		}
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                        vis_faligndata(TMP2, TMP4, REF_6);
+                        vis_faligndata(TMP6, TMP8, REF_S2);
+                        vis_faligndata(TMP8, TMP10, REF_S6);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                        vis_src1(TMP4, REF_6);
+                        vis_src1(TMP8, REF_S2);
+                        vis_src1(TMP10, REF_S6);
+                }
 
-		vis_mul8x16al(DST_0,   CONST_1024, TMP30);
-		vis_pmerge(ZERO, REF_0, TMP0);
+                vis_mul8x16al(DST_0,   CONST_1024, TMP30);
+                vis_pmerge(ZERO, REF_0, TMP0);
 
-		vis_mul8x16al(DST_1,   CONST_1024, TMP32);
-		vis_pmerge(ZERO,      REF_0_1,  TMP2);
+                vis_mul8x16al(DST_1,   CONST_1024, TMP32);
+                vis_pmerge(ZERO,      REF_0_1,  TMP2);
 
-		vis_mul8x16au(REF_2, CONST_256, TMP4);
-		vis_pmerge(ZERO,      REF_2_1,  TMP6);
+                vis_mul8x16au(REF_2, CONST_256, TMP4);
+                vis_pmerge(ZERO,      REF_2_1,  TMP6);
 
-		vis_mul8x16al(DST_2,   CONST_1024, REF_0);
-		vis_padd16(TMP0, CONST_6, TMP0);
+                vis_mul8x16al(DST_2,   CONST_1024, REF_0);
+                vis_padd16(TMP0, CONST_6, TMP0);
 
-		vis_mul8x16al(DST_3,   CONST_1024, REF_2);
-		vis_padd16(TMP2, CONST_6, TMP2);
+                vis_mul8x16al(DST_3,   CONST_1024, REF_2);
+                vis_padd16(TMP2, CONST_6, TMP2);
 
-		vis_padd16(TMP0, TMP4, TMP0);
-		vis_mul8x16au(REF_4, CONST_256, TMP4);
+                vis_padd16(TMP0, TMP4, TMP0);
+                vis_mul8x16au(REF_4, CONST_256, TMP4);
 
-		vis_padd16(TMP2, TMP6, TMP2);
-		vis_mul8x16au(REF_4_1, CONST_256, TMP6);
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP6);
 
-		vis_padd16(TMP12, TMP0, TMP12);
-		vis_mul8x16au(REF_6, CONST_256, TMP8);
+                vis_padd16(TMP12, TMP0, TMP12);
+                vis_mul8x16au(REF_6, CONST_256, TMP8);
 
-		vis_padd16(TMP14, TMP2, TMP14);
-		vis_mul8x16au(REF_6_1, CONST_256, TMP10);
+                vis_padd16(TMP14, TMP2, TMP14);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP10);
 
-		vis_padd16(TMP12, TMP16, TMP12);
-		vis_mul8x16au(REF_S0, CONST_256, REF_4);
+                vis_padd16(TMP12, TMP16, TMP12);
+                vis_mul8x16au(REF_S0, CONST_256, REF_4);
 
-		vis_padd16(TMP14, TMP18, TMP14);
-		vis_mul8x16au(REF_S0_1, CONST_256, REF_6);
+                vis_padd16(TMP14, TMP18, TMP14);
+                vis_mul8x16au(REF_S0_1, CONST_256, REF_6);
 
-		vis_padd16(TMP12, TMP30, TMP12);
+                vis_padd16(TMP12, TMP30, TMP12);
 
-		vis_padd16(TMP14, TMP32, TMP14);
-		vis_pack16(TMP12, DST_0);
+                vis_padd16(TMP14, TMP32, TMP14);
+                vis_pack16(TMP12, DST_0);
 
-		vis_pack16(TMP14, DST_1);
-		vis_st64(DST_0, dest[0]);
-		vis_padd16(TMP4, CONST_6, TMP4);
+                vis_pack16(TMP14, DST_1);
+                vis_st64(DST_0, dest[0]);
+                vis_padd16(TMP4, CONST_6, TMP4);
 
-		vis_ld64_2(dest, stride, DST_0);
-		vis_padd16(TMP6, CONST_6, TMP6);
-		vis_mul8x16au(REF_S2, CONST_256, TMP12);
+                vis_ld64_2(dest, stride, DST_0);
+                vis_padd16(TMP6, CONST_6, TMP6);
+                vis_mul8x16au(REF_S2, CONST_256, TMP12);
 
-		vis_padd16(TMP4, TMP8, TMP4);
-		vis_mul8x16au(REF_S2_1, CONST_256,  TMP14);
+                vis_padd16(TMP4, TMP8, TMP4);
+                vis_mul8x16au(REF_S2_1, CONST_256,  TMP14);
 
-		vis_padd16(TMP6, TMP10, TMP6);
+                vis_padd16(TMP6, TMP10, TMP6);
 
-		vis_padd16(TMP20, TMP4, TMP20);
+                vis_padd16(TMP20, TMP4, TMP20);
 
-		vis_padd16(TMP22, TMP6, TMP22);
+                vis_padd16(TMP22, TMP6, TMP22);
 
-		vis_padd16(TMP20, TMP24, TMP20);
+                vis_padd16(TMP20, TMP24, TMP20);
 
-		vis_padd16(TMP22, TMP26, TMP22);
+                vis_padd16(TMP22, TMP26, TMP22);
 
-		vis_padd16(TMP20, REF_0, TMP20);
-		vis_mul8x16au(REF_S4, CONST_256, REF_0);
+                vis_padd16(TMP20, REF_0, TMP20);
+                vis_mul8x16au(REF_S4, CONST_256, REF_0);
 
-		vis_padd16(TMP22, REF_2, TMP22);
-		vis_pack16(TMP20, DST_2);
+                vis_padd16(TMP22, REF_2, TMP22);
+                vis_pack16(TMP20, DST_2);
 
-		vis_pack16(TMP22, DST_3);
-		vis_st64_2(DST_2, dest, 8);
-		dest += stride;
+                vis_pack16(TMP22, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
 
-		vis_ld64_2(dest, 8, DST_2);
-		vis_mul8x16al(DST_0,   CONST_1024, TMP30);
-		vis_pmerge(ZERO,      REF_S4_1,  REF_2);
+                vis_ld64_2(dest, 8, DST_2);
+                vis_mul8x16al(DST_0,   CONST_1024, TMP30);
+                vis_pmerge(ZERO,      REF_S4_1,  REF_2);
 
-		vis_mul8x16al(DST_1,   CONST_1024, TMP32);
-		vis_padd16(REF_4, TMP0, TMP8);
+                vis_mul8x16al(DST_1,   CONST_1024, TMP32);
+                vis_padd16(REF_4, TMP0, TMP8);
 
-		vis_mul8x16au(REF_S6, CONST_256, REF_4);
-		vis_padd16(REF_6, TMP2, TMP10);
+                vis_mul8x16au(REF_S6, CONST_256, REF_4);
+                vis_padd16(REF_6, TMP2, TMP10);
 
-		vis_mul8x16au(REF_S6_1, CONST_256, REF_6);
-		vis_padd16(TMP8, TMP12, TMP8);
+                vis_mul8x16au(REF_S6_1, CONST_256, REF_6);
+                vis_padd16(TMP8, TMP12, TMP8);
 
-		vis_padd16(TMP10, TMP14, TMP10);
+                vis_padd16(TMP10, TMP14, TMP10);
 
-		vis_padd16(TMP8, TMP30, TMP8);
+                vis_padd16(TMP8, TMP30, TMP8);
 
-		vis_padd16(TMP10, TMP32, TMP10);
-		vis_pack16(TMP8, DST_0);
+                vis_padd16(TMP10, TMP32, TMP10);
+                vis_pack16(TMP8, DST_0);
 
-		vis_pack16(TMP10, DST_1);
-		vis_st64(DST_0, dest[0]);
+                vis_pack16(TMP10, DST_1);
+                vis_st64(DST_0, dest[0]);
 
-		vis_padd16(REF_0, TMP4, REF_0);
+                vis_padd16(REF_0, TMP4, REF_0);
 
-		vis_mul8x16al(DST_2,   CONST_1024, TMP30);
-		vis_padd16(REF_2, TMP6, REF_2);
+                vis_mul8x16al(DST_2,   CONST_1024, TMP30);
+                vis_padd16(REF_2, TMP6, REF_2);
 
-		vis_mul8x16al(DST_3,   CONST_1024, TMP32);
-		vis_padd16(REF_0, REF_4, REF_0);
+                vis_mul8x16al(DST_3,   CONST_1024, TMP32);
+                vis_padd16(REF_0, REF_4, REF_0);
 
-		vis_padd16(REF_2, REF_6, REF_2);
+                vis_padd16(REF_2, REF_6, REF_2);
 
-		vis_padd16(REF_0, TMP30, REF_0);
+                vis_padd16(REF_0, TMP30, REF_0);
 
-		/* stall */
+                /* stall */
 
-		vis_padd16(REF_2, TMP32, REF_2);
-		vis_pack16(REF_0, DST_2);
+                vis_padd16(REF_2, TMP32, REF_2);
+                vis_pack16(REF_0, DST_2);
 
-		vis_pack16(REF_2, DST_3);
-		vis_st64_2(DST_2, dest, 8);
-		dest += stride;
-	} while (--height);
+                vis_pack16(REF_2, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+        } while (--height);
 }
 
 static void MC_avg_xy_8_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
+                             const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
-	int stride_8 = stride + 8;
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_8 = stride + 8;
 
-	vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
+        vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[0], TMP0);
-	vis_fzero(ZERO);
+        vis_ld64(ref[0], TMP0);
+        vis_fzero(ZERO);
 
-	vis_ld64_2(ref, 8, TMP2);
+        vis_ld64_2(ref, 8, TMP2);
 
-	vis_ld64(constants6[0], CONST_6);
+        vis_ld64(constants6[0], CONST_6);
 
-	vis_ld64(constants256_1024[0], CONST_256);
-	vis_faligndata(TMP0, TMP2, REF_S0);
+        vis_ld64(constants256_1024[0], CONST_256);
+        vis_faligndata(TMP0, TMP2, REF_S0);
 
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_S2);
-	} else {
-		vis_src1(TMP2, REF_S2);
-	}
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_S2);
+        } else {
+                vis_src1(TMP2, REF_S2);
+        }
 
-	height >>= 1;
-	do {	/* 31 cycles */
-		vis_ld64_2(ref, stride, TMP0);
-		vis_mul8x16au(REF_S0, CONST_256, TMP8);
-		vis_pmerge(ZERO,      REF_S0_1,  TMP10);
+        height >>= 1;
+        do {    /* 31 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_mul8x16au(REF_S0, CONST_256, TMP8);
+                vis_pmerge(ZERO,      REF_S0_1,  TMP10);
 
-		vis_ld64_2(ref, stride_8, TMP2);
-		ref += stride;
-		vis_mul8x16au(REF_S2, CONST_256, TMP12);
-		vis_pmerge(ZERO,      REF_S2_1,  TMP14);
+                vis_ld64_2(ref, stride_8, TMP2);
+                ref += stride;
+                vis_mul8x16au(REF_S2, CONST_256, TMP12);
+                vis_pmerge(ZERO,      REF_S2_1,  TMP14);
 
-		vis_alignaddr_g0((void *)off);
+                vis_alignaddr_g0((void *)off);
 
-		vis_ld64_2(ref, stride, TMP4);
-		vis_faligndata(TMP0, TMP2, REF_S4);
+                vis_ld64_2(ref, stride, TMP4);
+                vis_faligndata(TMP0, TMP2, REF_S4);
 
-		vis_ld64_2(ref, stride_8, TMP6);
-		ref += stride;
+                vis_ld64_2(ref, stride_8, TMP6);
+                ref += stride;
 
-		vis_ld64(dest[0], DST_0);
-		vis_faligndata(TMP4, TMP6, REF_S0);
+                vis_ld64(dest[0], DST_0);
+                vis_faligndata(TMP4, TMP6, REF_S0);
 
-		vis_ld64_2(dest, stride, DST_2);
+                vis_ld64_2(dest, stride, DST_2);
 
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_S6);
-			vis_faligndata(TMP4, TMP6, REF_S2);
-		} else {
-			vis_src1(TMP2, REF_S6);
-			vis_src1(TMP6, REF_S2);
-		}
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_S6);
+                        vis_faligndata(TMP4, TMP6, REF_S2);
+                } else {
+                        vis_src1(TMP2, REF_S6);
+                        vis_src1(TMP6, REF_S2);
+                }
 
-		vis_mul8x16al(DST_0,   CONST_1024, TMP30);
-		vis_pmerge(ZERO, REF_S4, TMP22);
+                vis_mul8x16al(DST_0,   CONST_1024, TMP30);
+                vis_pmerge(ZERO, REF_S4, TMP22);
 
-		vis_mul8x16al(DST_1,   CONST_1024, TMP32);
-		vis_pmerge(ZERO,      REF_S4_1,  TMP24);
+                vis_mul8x16al(DST_1,   CONST_1024, TMP32);
+                vis_pmerge(ZERO,      REF_S4_1,  TMP24);
 
-		vis_mul8x16au(REF_S6, CONST_256, TMP26);
-		vis_pmerge(ZERO,      REF_S6_1,  TMP28);
+                vis_mul8x16au(REF_S6, CONST_256, TMP26);
+                vis_pmerge(ZERO,      REF_S6_1,  TMP28);
 
-		vis_mul8x16au(REF_S0, CONST_256, REF_S4);
-		vis_padd16(TMP22, CONST_6, TMP22);
+                vis_mul8x16au(REF_S0, CONST_256, REF_S4);
+                vis_padd16(TMP22, CONST_6, TMP22);
 
-		vis_mul8x16au(REF_S0_1, CONST_256, REF_S6);
-		vis_padd16(TMP24, CONST_6, TMP24);
+                vis_mul8x16au(REF_S0_1, CONST_256, REF_S6);
+                vis_padd16(TMP24, CONST_6, TMP24);
 
-		vis_mul8x16al(DST_2,   CONST_1024, REF_0);
-		vis_padd16(TMP22, TMP26, TMP22);
+                vis_mul8x16al(DST_2,   CONST_1024, REF_0);
+                vis_padd16(TMP22, TMP26, TMP22);
 
-		vis_mul8x16al(DST_3,   CONST_1024, REF_2);
-		vis_padd16(TMP24, TMP28, TMP24);
+                vis_mul8x16al(DST_3,   CONST_1024, REF_2);
+                vis_padd16(TMP24, TMP28, TMP24);
 
-		vis_mul8x16au(REF_S2, CONST_256, TMP26);
-		vis_padd16(TMP8, TMP22, TMP8);
+                vis_mul8x16au(REF_S2, CONST_256, TMP26);
+                vis_padd16(TMP8, TMP22, TMP8);
 
-		vis_mul8x16au(REF_S2_1, CONST_256, TMP28);
-		vis_padd16(TMP10, TMP24, TMP10);
+                vis_mul8x16au(REF_S2_1, CONST_256, TMP28);
+                vis_padd16(TMP10, TMP24, TMP10);
 
-		vis_padd16(TMP8, TMP12, TMP8);
+                vis_padd16(TMP8, TMP12, TMP8);
 
-		vis_padd16(TMP10, TMP14, TMP10);
+                vis_padd16(TMP10, TMP14, TMP10);
 
-		vis_padd16(TMP8, TMP30, TMP8);
+                vis_padd16(TMP8, TMP30, TMP8);
 
-		vis_padd16(TMP10, TMP32, TMP10);
-		vis_pack16(TMP8, DST_0);
+                vis_padd16(TMP10, TMP32, TMP10);
+                vis_pack16(TMP8, DST_0);
 
-		vis_pack16(TMP10, DST_1);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
+                vis_pack16(TMP10, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
 
-		vis_padd16(REF_S4, TMP22, TMP12);
+                vis_padd16(REF_S4, TMP22, TMP12);
 
-		vis_padd16(REF_S6, TMP24, TMP14);
+                vis_padd16(REF_S6, TMP24, TMP14);
 
-		vis_padd16(TMP12, TMP26, TMP12);
+                vis_padd16(TMP12, TMP26, TMP12);
 
-		vis_padd16(TMP14, TMP28, TMP14);
+                vis_padd16(TMP14, TMP28, TMP14);
 
-		vis_padd16(TMP12, REF_0, TMP12);
+                vis_padd16(TMP12, REF_0, TMP12);
 
-		vis_padd16(TMP14, REF_2, TMP14);
-		vis_pack16(TMP12, DST_2);
+                vis_padd16(TMP14, REF_2, TMP14);
+                vis_pack16(TMP12, DST_2);
 
-		vis_pack16(TMP14, DST_3);
-		vis_st64(DST_2, dest[0]);
-		dest += stride;
-	} while (--height);
+                vis_pack16(TMP14, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
+        } while (--height);
 }
 
 /* End of rounding code */
@@ -2058,1937 +2058,1937 @@ static void MC_avg_xy_8_vis (uint8_t * dest, const uint8_t * _ref,
  * of '0xfe' is in f4, a repeating mask of '0x7f' is in f6, and
  * the value 0x80808080 is in f8):
  *
- *	fxor		f0, f2, f10
- *	fand		f10, f4, f10
- *	fmul8x16	f8, f10, f10
- *	fand		f10, f6, f10
- *	fand		f0, f2, f12
- *	fpadd16		f12, f10, f10
+ *      fxor            f0,   f2, f10
+ *      fand            f10,  f4, f10
+ *      fmul8x16        f8,  f10, f10
+ *      fand            f10,  f6, f10
+ *      fand            f0,   f2, f12
+ *      fpadd16         f12, f10, f10
  */
 
 static void MC_put_no_round_o_16_vis (uint8_t * dest, const uint8_t * _ref,
-				      const int stride, int height)
+                                      const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
+        uint8_t *ref = (uint8_t *) _ref;
 
-	ref = vis_alignaddr(ref);
-	do {	/* 5 cycles */
-		vis_ld64(ref[0], TMP0);
+        ref = vis_alignaddr(ref);
+        do {    /* 5 cycles */
+                vis_ld64(ref[0], TMP0);
 
-		vis_ld64_2(ref, 8, TMP2);
+                vis_ld64_2(ref, 8, TMP2);
 
-		vis_ld64_2(ref, 16, TMP4);
-		ref += stride;
+                vis_ld64_2(ref, 16, TMP4);
+                ref += stride;
 
-		vis_faligndata(TMP0, TMP2, REF_0);
-		vis_st64(REF_0, dest[0]);
+                vis_faligndata(TMP0, TMP2, REF_0);
+                vis_st64(REF_0, dest[0]);
 
-		vis_faligndata(TMP2, TMP4, REF_2);
-		vis_st64_2(REF_2, dest, 8);
-		dest += stride;
-	} while (--height);
+                vis_faligndata(TMP2, TMP4, REF_2);
+                vis_st64_2(REF_2, dest, 8);
+                dest += stride;
+        } while (--height);
 }
 
 static void MC_put_no_round_o_8_vis (uint8_t * dest, const uint8_t * _ref,
-			    const int stride, int height)
+                            const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
+        uint8_t *ref = (uint8_t *) _ref;
 
-	ref = vis_alignaddr(ref);
-	do {	/* 4 cycles */
-		vis_ld64(ref[0], TMP0);
+        ref = vis_alignaddr(ref);
+        do {    /* 4 cycles */
+                vis_ld64(ref[0], TMP0);
 
-		vis_ld64(ref[8], TMP2);
-		ref += stride;
+                vis_ld64(ref[8], TMP2);
+                ref += stride;
 
-		/* stall */
+                /* stall */
 
-		vis_faligndata(TMP0, TMP2, REF_0);
-		vis_st64(REF_0, dest[0]);
-		dest += stride;
-	} while (--height);
+                vis_faligndata(TMP0, TMP2, REF_0);
+                vis_st64(REF_0, dest[0]);
+                dest += stride;
+        } while (--height);
 }
 
 
 static void MC_avg_no_round_o_16_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
+                             const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	int stride_8 = stride + 8;
+        uint8_t *ref = (uint8_t *) _ref;
+        int stride_8 = stride + 8;
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[0], TMP0);
+        vis_ld64(ref[0], TMP0);
 
-	vis_ld64(ref[8], TMP2);
+        vis_ld64(ref[8], TMP2);
 
-	vis_ld64(ref[16], TMP4);
+        vis_ld64(ref[16], TMP4);
 
-	vis_ld64(dest[0], DST_0);
+        vis_ld64(dest[0], DST_0);
 
-	vis_ld64(dest[8], DST_2);
+        vis_ld64(dest[8], DST_2);
 
-	vis_ld64(constants_fe[0], MASK_fe);
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_ld64(constants_fe[0], MASK_fe);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_ld64(constants_7f[0], MASK_7f);
-	vis_faligndata(TMP2, TMP4, REF_2);
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP2, TMP4, REF_2);
 
-	vis_ld64(constants128[0], CONST_128);
+        vis_ld64(constants128[0], CONST_128);
 
-	ref += stride;
-	height = (height >> 1) - 1;
+        ref += stride;
+        height = (height >> 1) - 1;
 
-	do {	/* 24 cycles */
-		vis_ld64(ref[0], TMP0);
-		vis_xor(DST_0, REF_0, TMP6);
+        do {    /* 24 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(DST_0, REF_0, TMP6);
 
-		vis_ld64_2(ref, 8, TMP2);
-		vis_and(TMP6, MASK_fe, TMP6);
+                vis_ld64_2(ref, 8, TMP2);
+                vis_and(TMP6, MASK_fe, TMP6);
 
-		vis_ld64_2(ref, 16, TMP4);
-		ref += stride;
-		vis_mul8x16(CONST_128, TMP6, TMP6);
-		vis_xor(DST_2, REF_2, TMP8);
+                vis_ld64_2(ref, 16, TMP4);
+                ref += stride;
+                vis_mul8x16(CONST_128, TMP6, TMP6);
+                vis_xor(DST_2, REF_2, TMP8);
 
-		vis_and(TMP8, MASK_fe, TMP8);
+                vis_and(TMP8, MASK_fe, TMP8);
 
-		vis_and(DST_0, REF_0, TMP10);
-		vis_ld64_2(dest, stride, DST_0);
-		vis_mul8x16(CONST_128, TMP8, TMP8);
+                vis_and(DST_0, REF_0, TMP10);
+                vis_ld64_2(dest, stride, DST_0);
+                vis_mul8x16(CONST_128, TMP8, TMP8);
 
-		vis_and(DST_2, REF_2, TMP12);
-		vis_ld64_2(dest, stride_8, DST_2);
+                vis_and(DST_2, REF_2, TMP12);
+                vis_ld64_2(dest, stride_8, DST_2);
 
-		vis_ld64(ref[0], TMP14);
-		vis_and(TMP6, MASK_7f, TMP6);
+                vis_ld64(ref[0], TMP14);
+                vis_and(TMP6, MASK_7f, TMP6);
 
-		vis_and(TMP8, MASK_7f, TMP8);
+                vis_and(TMP8, MASK_7f, TMP8);
 
-		vis_padd16(TMP10, TMP6, TMP6);
-		vis_st64(TMP6, dest[0]);
+                vis_padd16(TMP10, TMP6, TMP6);
+                vis_st64(TMP6, dest[0]);
 
-		vis_padd16(TMP12, TMP8, TMP8);
-		vis_st64_2(TMP8, dest, 8);
+                vis_padd16(TMP12, TMP8, TMP8);
+                vis_st64_2(TMP8, dest, 8);
 
-		dest += stride;
-		vis_ld64_2(ref, 8, TMP16);
-		vis_faligndata(TMP0, TMP2, REF_0);
+                dest += stride;
+                vis_ld64_2(ref, 8, TMP16);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_ld64_2(ref, 16, TMP18);
-		vis_faligndata(TMP2, TMP4, REF_2);
-		ref += stride;
+                vis_ld64_2(ref, 16, TMP18);
+                vis_faligndata(TMP2, TMP4, REF_2);
+                ref += stride;
 
-		vis_xor(DST_0, REF_0, TMP20);
+                vis_xor(DST_0, REF_0, TMP20);
 
-		vis_and(TMP20, MASK_fe, TMP20);
+                vis_and(TMP20, MASK_fe, TMP20);
 
-		vis_xor(DST_2, REF_2, TMP22);
-		vis_mul8x16(CONST_128, TMP20, TMP20);
+                vis_xor(DST_2, REF_2, TMP22);
+                vis_mul8x16(CONST_128, TMP20, TMP20);
 
-		vis_and(TMP22, MASK_fe, TMP22);
+                vis_and(TMP22, MASK_fe, TMP22);
 
-		vis_and(DST_0, REF_0, TMP24);
-		vis_mul8x16(CONST_128, TMP22, TMP22);
+                vis_and(DST_0, REF_0, TMP24);
+                vis_mul8x16(CONST_128, TMP22, TMP22);
 
-		vis_and(DST_2, REF_2, TMP26);
+                vis_and(DST_2, REF_2, TMP26);
 
-		vis_ld64_2(dest, stride, DST_0);
-		vis_faligndata(TMP14, TMP16, REF_0);
+                vis_ld64_2(dest, stride, DST_0);
+                vis_faligndata(TMP14, TMP16, REF_0);
 
-		vis_ld64_2(dest, stride_8, DST_2);
-		vis_faligndata(TMP16, TMP18, REF_2);
+                vis_ld64_2(dest, stride_8, DST_2);
+                vis_faligndata(TMP16, TMP18, REF_2);
 
-		vis_and(TMP20, MASK_7f, TMP20);
+                vis_and(TMP20, MASK_7f, TMP20);
 
-		vis_and(TMP22, MASK_7f, TMP22);
+                vis_and(TMP22, MASK_7f, TMP22);
 
-		vis_padd16(TMP24, TMP20, TMP20);
-		vis_st64(TMP20, dest[0]);
+                vis_padd16(TMP24, TMP20, TMP20);
+                vis_st64(TMP20, dest[0]);
 
-		vis_padd16(TMP26, TMP22, TMP22);
-		vis_st64_2(TMP22, dest, 8);
-		dest += stride;
-	} while (--height);
+                vis_padd16(TMP26, TMP22, TMP22);
+                vis_st64_2(TMP22, dest, 8);
+                dest += stride;
+        } while (--height);
 
-	vis_ld64(ref[0], TMP0);
-	vis_xor(DST_0, REF_0, TMP6);
+        vis_ld64(ref[0], TMP0);
+        vis_xor(DST_0, REF_0, TMP6);
 
-	vis_ld64_2(ref, 8, TMP2);
-	vis_and(TMP6, MASK_fe, TMP6);
+        vis_ld64_2(ref, 8, TMP2);
+        vis_and(TMP6, MASK_fe, TMP6);
 
-	vis_ld64_2(ref, 16, TMP4);
-	vis_mul8x16(CONST_128, TMP6, TMP6);
-	vis_xor(DST_2, REF_2, TMP8);
+        vis_ld64_2(ref, 16, TMP4);
+        vis_mul8x16(CONST_128, TMP6, TMP6);
+        vis_xor(DST_2, REF_2, TMP8);
 
-	vis_and(TMP8, MASK_fe, TMP8);
+        vis_and(TMP8, MASK_fe, TMP8);
 
-	vis_and(DST_0, REF_0, TMP10);
-	vis_ld64_2(dest, stride, DST_0);
-	vis_mul8x16(CONST_128, TMP8, TMP8);
+        vis_and(DST_0, REF_0, TMP10);
+        vis_ld64_2(dest, stride, DST_0);
+        vis_mul8x16(CONST_128, TMP8, TMP8);
 
-	vis_and(DST_2, REF_2, TMP12);
-	vis_ld64_2(dest, stride_8, DST_2);
+        vis_and(DST_2, REF_2, TMP12);
+        vis_ld64_2(dest, stride_8, DST_2);
 
-	vis_ld64(ref[0], TMP14);
-	vis_and(TMP6, MASK_7f, TMP6);
+        vis_ld64(ref[0], TMP14);
+        vis_and(TMP6, MASK_7f, TMP6);
 
-	vis_and(TMP8, MASK_7f, TMP8);
+        vis_and(TMP8, MASK_7f, TMP8);
 
-	vis_padd16(TMP10, TMP6, TMP6);
-	vis_st64(TMP6, dest[0]);
+        vis_padd16(TMP10, TMP6, TMP6);
+        vis_st64(TMP6, dest[0]);
 
-	vis_padd16(TMP12, TMP8, TMP8);
-	vis_st64_2(TMP8, dest, 8);
+        vis_padd16(TMP12, TMP8, TMP8);
+        vis_st64_2(TMP8, dest, 8);
 
-	dest += stride;
-	vis_faligndata(TMP0, TMP2, REF_0);
+        dest += stride;
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_faligndata(TMP2, TMP4, REF_2);
+        vis_faligndata(TMP2, TMP4, REF_2);
 
-	vis_xor(DST_0, REF_0, TMP20);
+        vis_xor(DST_0, REF_0, TMP20);
 
-	vis_and(TMP20, MASK_fe, TMP20);
+        vis_and(TMP20, MASK_fe, TMP20);
 
-	vis_xor(DST_2, REF_2, TMP22);
-	vis_mul8x16(CONST_128, TMP20, TMP20);
+        vis_xor(DST_2, REF_2, TMP22);
+        vis_mul8x16(CONST_128, TMP20, TMP20);
 
-	vis_and(TMP22, MASK_fe, TMP22);
+        vis_and(TMP22, MASK_fe, TMP22);
 
-	vis_and(DST_0, REF_0, TMP24);
-	vis_mul8x16(CONST_128, TMP22, TMP22);
+        vis_and(DST_0, REF_0, TMP24);
+        vis_mul8x16(CONST_128, TMP22, TMP22);
 
-	vis_and(DST_2, REF_2, TMP26);
+        vis_and(DST_2, REF_2, TMP26);
 
-	vis_and(TMP20, MASK_7f, TMP20);
+        vis_and(TMP20, MASK_7f, TMP20);
 
-	vis_and(TMP22, MASK_7f, TMP22);
+        vis_and(TMP22, MASK_7f, TMP22);
 
-	vis_padd16(TMP24, TMP20, TMP20);
-	vis_st64(TMP20, dest[0]);
+        vis_padd16(TMP24, TMP20, TMP20);
+        vis_st64(TMP20, dest[0]);
 
-	vis_padd16(TMP26, TMP22, TMP22);
-	vis_st64_2(TMP22, dest, 8);
+        vis_padd16(TMP26, TMP22, TMP22);
+        vis_st64_2(TMP22, dest, 8);
 }
 
 static void MC_avg_no_round_o_8_vis (uint8_t * dest, const uint8_t * _ref,
-			    const int stride, int height)
+                            const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
+        uint8_t *ref = (uint8_t *) _ref;
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[0], TMP0);
+        vis_ld64(ref[0], TMP0);
 
-	vis_ld64(ref[8], TMP2);
+        vis_ld64(ref[8], TMP2);
 
-	vis_ld64(dest[0], DST_0);
+        vis_ld64(dest[0], DST_0);
 
-	vis_ld64(constants_fe[0], MASK_fe);
+        vis_ld64(constants_fe[0], MASK_fe);
 
-	vis_ld64(constants_7f[0], MASK_7f);
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_ld64(constants128[0], CONST_128);
+        vis_ld64(constants128[0], CONST_128);
 
-	ref += stride;
-	height = (height >> 1) - 1;
+        ref += stride;
+        height = (height >> 1) - 1;
 
-	do {	/* 12 cycles */
-		vis_ld64(ref[0], TMP0);
-		vis_xor(DST_0, REF_0, TMP4);
+        do {    /* 12 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(DST_0, REF_0, TMP4);
 
-		vis_ld64(ref[8], TMP2);
-		vis_and(TMP4, MASK_fe, TMP4);
+                vis_ld64(ref[8], TMP2);
+                vis_and(TMP4, MASK_fe, TMP4);
 
-		vis_and(DST_0, REF_0, TMP6);
-		vis_ld64_2(dest, stride, DST_0);
-		ref += stride;
-		vis_mul8x16(CONST_128, TMP4, TMP4);
+                vis_and(DST_0, REF_0, TMP6);
+                vis_ld64_2(dest, stride, DST_0);
+                ref += stride;
+                vis_mul8x16(CONST_128, TMP4, TMP4);
 
-		vis_ld64(ref[0], TMP12);
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64(ref[0], TMP12);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_ld64(ref[8], TMP2);
-		vis_xor(DST_0, REF_0, TMP0);
-		ref += stride;
+                vis_ld64(ref[8], TMP2);
+                vis_xor(DST_0, REF_0, TMP0);
+                ref += stride;
 
-		vis_and(TMP0, MASK_fe, TMP0);
+                vis_and(TMP0, MASK_fe, TMP0);
 
-		vis_and(TMP4, MASK_7f, TMP4);
+                vis_and(TMP4, MASK_7f, TMP4);
 
-		vis_padd16(TMP6, TMP4, TMP4);
-		vis_st64(TMP4, dest[0]);
-		dest += stride;
-		vis_mul8x16(CONST_128, TMP0, TMP0);
+                vis_padd16(TMP6, TMP4, TMP4);
+                vis_st64(TMP4, dest[0]);
+                dest += stride;
+                vis_mul8x16(CONST_128, TMP0, TMP0);
 
-		vis_and(DST_0, REF_0, TMP6);
-		vis_ld64_2(dest, stride, DST_0);
+                vis_and(DST_0, REF_0, TMP6);
+                vis_ld64_2(dest, stride, DST_0);
 
-		vis_faligndata(TMP12, TMP2, REF_0);
+                vis_faligndata(TMP12, TMP2, REF_0);
 
-		vis_and(TMP0, MASK_7f, TMP0);
+                vis_and(TMP0, MASK_7f, TMP0);
 
-		vis_padd16(TMP6, TMP0, TMP4);
-		vis_st64(TMP4, dest[0]);
-		dest += stride;
-	} while (--height);
+                vis_padd16(TMP6, TMP0, TMP4);
+                vis_st64(TMP4, dest[0]);
+                dest += stride;
+        } while (--height);
 
-	vis_ld64(ref[0], TMP0);
-	vis_xor(DST_0, REF_0, TMP4);
+        vis_ld64(ref[0], TMP0);
+        vis_xor(DST_0, REF_0, TMP4);
 
-	vis_ld64(ref[8], TMP2);
-	vis_and(TMP4, MASK_fe, TMP4);
+        vis_ld64(ref[8], TMP2);
+        vis_and(TMP4, MASK_fe, TMP4);
 
-	vis_and(DST_0, REF_0, TMP6);
-	vis_ld64_2(dest, stride, DST_0);
-	vis_mul8x16(CONST_128, TMP4, TMP4);
+        vis_and(DST_0, REF_0, TMP6);
+        vis_ld64_2(dest, stride, DST_0);
+        vis_mul8x16(CONST_128, TMP4, TMP4);
 
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_xor(DST_0, REF_0, TMP0);
+        vis_xor(DST_0, REF_0, TMP0);
 
-	vis_and(TMP0, MASK_fe, TMP0);
+        vis_and(TMP0, MASK_fe, TMP0);
 
-	vis_and(TMP4, MASK_7f, TMP4);
+        vis_and(TMP4, MASK_7f, TMP4);
 
-	vis_padd16(TMP6, TMP4, TMP4);
-	vis_st64(TMP4, dest[0]);
-	dest += stride;
-	vis_mul8x16(CONST_128, TMP0, TMP0);
+        vis_padd16(TMP6, TMP4, TMP4);
+        vis_st64(TMP4, dest[0]);
+        dest += stride;
+        vis_mul8x16(CONST_128, TMP0, TMP0);
 
-	vis_and(DST_0, REF_0, TMP6);
+        vis_and(DST_0, REF_0, TMP6);
 
-	vis_and(TMP0, MASK_7f, TMP0);
+        vis_and(TMP0, MASK_7f, TMP0);
 
-	vis_padd16(TMP6, TMP0, TMP4);
-	vis_st64(TMP4, dest[0]);
+        vis_padd16(TMP6, TMP0, TMP4);
+        vis_st64(TMP4, dest[0]);
 }
 
 static void MC_put_no_round_x_16_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
+                             const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[0],    TMP0);
+        vis_ld64(ref[0],    TMP0);
 
-	vis_ld64_2(ref, 8,  TMP2);
+        vis_ld64_2(ref, 8,  TMP2);
 
-	vis_ld64_2(ref, 16, TMP4);
+        vis_ld64_2(ref, 16, TMP4);
 
-	vis_ld64(constants_fe[0], MASK_fe);
+        vis_ld64(constants_fe[0], MASK_fe);
 
-	vis_ld64(constants_7f[0], MASK_7f);
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_ld64(constants128[0], CONST_128);
-	vis_faligndata(TMP2, TMP4, REF_4);
+        vis_ld64(constants128[0], CONST_128);
+        vis_faligndata(TMP2, TMP4, REF_4);
 
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_2);
-		vis_faligndata(TMP2, TMP4, REF_6);
-	} else {
-		vis_src1(TMP2, REF_2);
-		vis_src1(TMP4, REF_6);
-	}
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_2);
+                vis_faligndata(TMP2, TMP4, REF_6);
+        } else {
+                vis_src1(TMP2, REF_2);
+                vis_src1(TMP4, REF_6);
+        }
 
-	ref += stride;
-	height = (height >> 1) - 1;
+        ref += stride;
+        height = (height >> 1) - 1;
 
-	do {	/* 34 cycles */
-		vis_ld64(ref[0],    TMP0);
-		vis_xor(REF_0, REF_2, TMP6);
+        do {    /* 34 cycles */
+                vis_ld64(ref[0],    TMP0);
+                vis_xor(REF_0, REF_2, TMP6);
 
-		vis_ld64_2(ref, 8,  TMP2);
-		vis_xor(REF_4, REF_6, TMP8);
+                vis_ld64_2(ref, 8,  TMP2);
+                vis_xor(REF_4, REF_6, TMP8);
 
-		vis_ld64_2(ref, 16, TMP4);
-		vis_and(TMP6, MASK_fe, TMP6);
-		ref += stride;
+                vis_ld64_2(ref, 16, TMP4);
+                vis_and(TMP6, MASK_fe, TMP6);
+                ref += stride;
 
-		vis_ld64(ref[0],    TMP14);
-		vis_mul8x16(CONST_128, TMP6, TMP6);
-		vis_and(TMP8, MASK_fe, TMP8);
+                vis_ld64(ref[0],    TMP14);
+                vis_mul8x16(CONST_128, TMP6, TMP6);
+                vis_and(TMP8, MASK_fe, TMP8);
 
-		vis_ld64_2(ref, 8,  TMP16);
-		vis_mul8x16(CONST_128, TMP8, TMP8);
-		vis_and(REF_0, REF_2, TMP10);
+                vis_ld64_2(ref, 8,  TMP16);
+                vis_mul8x16(CONST_128, TMP8, TMP8);
+                vis_and(REF_0, REF_2, TMP10);
 
-		vis_ld64_2(ref, 16, TMP18);
-		ref += stride;
-		vis_and(REF_4, REF_6, TMP12);
+                vis_ld64_2(ref, 16, TMP18);
+                ref += stride;
+                vis_and(REF_4, REF_6, TMP12);
 
-		vis_alignaddr_g0((void *)off);
+                vis_alignaddr_g0((void *)off);
 
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_faligndata(TMP2, TMP4, REF_4);
+                vis_faligndata(TMP2, TMP4, REF_4);
 
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_2);
-			vis_faligndata(TMP2, TMP4, REF_6);
-		} else {
-			vis_src1(TMP2, REF_2);
-			vis_src1(TMP4, REF_6);
-		}
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                        vis_faligndata(TMP2, TMP4, REF_6);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                        vis_src1(TMP4, REF_6);
+                }
 
-		vis_and(TMP6, MASK_7f, TMP6);
+                vis_and(TMP6, MASK_7f, TMP6);
 
-		vis_and(TMP8, MASK_7f, TMP8);
+                vis_and(TMP8, MASK_7f, TMP8);
 
-		vis_padd16(TMP10, TMP6, TMP6);
-		vis_st64(TMP6, dest[0]);
+                vis_padd16(TMP10, TMP6, TMP6);
+                vis_st64(TMP6, dest[0]);
 
-		vis_padd16(TMP12, TMP8, TMP8);
-		vis_st64_2(TMP8, dest, 8);
-		dest += stride;
+                vis_padd16(TMP12, TMP8, TMP8);
+                vis_st64_2(TMP8, dest, 8);
+                dest += stride;
 
-		vis_xor(REF_0, REF_2, TMP6);
+                vis_xor(REF_0, REF_2, TMP6);
 
-		vis_xor(REF_4, REF_6, TMP8);
+                vis_xor(REF_4, REF_6, TMP8);
 
-		vis_and(TMP6, MASK_fe, TMP6);
+                vis_and(TMP6, MASK_fe, TMP6);
 
-		vis_mul8x16(CONST_128, TMP6, TMP6);
-		vis_and(TMP8, MASK_fe, TMP8);
+                vis_mul8x16(CONST_128, TMP6, TMP6);
+                vis_and(TMP8, MASK_fe, TMP8);
 
-		vis_mul8x16(CONST_128, TMP8, TMP8);
-		vis_and(REF_0, REF_2, TMP10);
+                vis_mul8x16(CONST_128, TMP8, TMP8);
+                vis_and(REF_0, REF_2, TMP10);
 
-		vis_and(REF_4, REF_6, TMP12);
+                vis_and(REF_4, REF_6, TMP12);
 
-		vis_alignaddr_g0((void *)off);
+                vis_alignaddr_g0((void *)off);
 
-		vis_faligndata(TMP14, TMP16, REF_0);
+                vis_faligndata(TMP14, TMP16, REF_0);
 
-		vis_faligndata(TMP16, TMP18, REF_4);
+                vis_faligndata(TMP16, TMP18, REF_4);
 
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP14, TMP16, REF_2);
-			vis_faligndata(TMP16, TMP18, REF_6);
-		} else {
-			vis_src1(TMP16, REF_2);
-			vis_src1(TMP18, REF_6);
-		}
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP14, TMP16, REF_2);
+                        vis_faligndata(TMP16, TMP18, REF_6);
+                } else {
+                        vis_src1(TMP16, REF_2);
+                        vis_src1(TMP18, REF_6);
+                }
 
-		vis_and(TMP6, MASK_7f, TMP6);
+                vis_and(TMP6, MASK_7f, TMP6);
 
-		vis_and(TMP8, MASK_7f, TMP8);
+                vis_and(TMP8, MASK_7f, TMP8);
 
-		vis_padd16(TMP10, TMP6, TMP6);
-		vis_st64(TMP6, dest[0]);
+                vis_padd16(TMP10, TMP6, TMP6);
+                vis_st64(TMP6, dest[0]);
 
-		vis_padd16(TMP12, TMP8, TMP8);
-		vis_st64_2(TMP8, dest, 8);
-		dest += stride;
-	} while (--height);
+                vis_padd16(TMP12, TMP8, TMP8);
+                vis_st64_2(TMP8, dest, 8);
+                dest += stride;
+        } while (--height);
 
-	vis_ld64(ref[0],    TMP0);
-	vis_xor(REF_0, REF_2, TMP6);
+        vis_ld64(ref[0],    TMP0);
+        vis_xor(REF_0, REF_2, TMP6);
 
-	vis_ld64_2(ref, 8,  TMP2);
-	vis_xor(REF_4, REF_6, TMP8);
+        vis_ld64_2(ref, 8,  TMP2);
+        vis_xor(REF_4, REF_6, TMP8);
 
-	vis_ld64_2(ref, 16, TMP4);
-	vis_and(TMP6, MASK_fe, TMP6);
+        vis_ld64_2(ref, 16, TMP4);
+        vis_and(TMP6, MASK_fe, TMP6);
 
-	vis_mul8x16(CONST_128, TMP6, TMP6);
-	vis_and(TMP8, MASK_fe, TMP8);
+        vis_mul8x16(CONST_128, TMP6, TMP6);
+        vis_and(TMP8, MASK_fe, TMP8);
 
-	vis_mul8x16(CONST_128, TMP8, TMP8);
-	vis_and(REF_0, REF_2, TMP10);
+        vis_mul8x16(CONST_128, TMP8, TMP8);
+        vis_and(REF_0, REF_2, TMP10);
 
-	vis_and(REF_4, REF_6, TMP12);
+        vis_and(REF_4, REF_6, TMP12);
 
-	vis_alignaddr_g0((void *)off);
+        vis_alignaddr_g0((void *)off);
 
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_faligndata(TMP2, TMP4, REF_4);
+        vis_faligndata(TMP2, TMP4, REF_4);
 
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_2);
-		vis_faligndata(TMP2, TMP4, REF_6);
-	} else {
-		vis_src1(TMP2, REF_2);
-		vis_src1(TMP4, REF_6);
-	}
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_2);
+                vis_faligndata(TMP2, TMP4, REF_6);
+        } else {
+                vis_src1(TMP2, REF_2);
+                vis_src1(TMP4, REF_6);
+        }
 
-	vis_and(TMP6, MASK_7f, TMP6);
+        vis_and(TMP6, MASK_7f, TMP6);
 
-	vis_and(TMP8, MASK_7f, TMP8);
+        vis_and(TMP8, MASK_7f, TMP8);
 
-	vis_padd16(TMP10, TMP6, TMP6);
-	vis_st64(TMP6, dest[0]);
+        vis_padd16(TMP10, TMP6, TMP6);
+        vis_st64(TMP6, dest[0]);
 
-	vis_padd16(TMP12, TMP8, TMP8);
-	vis_st64_2(TMP8, dest, 8);
-	dest += stride;
+        vis_padd16(TMP12, TMP8, TMP8);
+        vis_st64_2(TMP8, dest, 8);
+        dest += stride;
 
-	vis_xor(REF_0, REF_2, TMP6);
+        vis_xor(REF_0, REF_2, TMP6);
 
-	vis_xor(REF_4, REF_6, TMP8);
+        vis_xor(REF_4, REF_6, TMP8);
 
-	vis_and(TMP6, MASK_fe, TMP6);
+        vis_and(TMP6, MASK_fe, TMP6);
 
-	vis_mul8x16(CONST_128, TMP6, TMP6);
-	vis_and(TMP8, MASK_fe, TMP8);
+        vis_mul8x16(CONST_128, TMP6, TMP6);
+        vis_and(TMP8, MASK_fe, TMP8);
 
-	vis_mul8x16(CONST_128, TMP8, TMP8);
-	vis_and(REF_0, REF_2, TMP10);
+        vis_mul8x16(CONST_128, TMP8, TMP8);
+        vis_and(REF_0, REF_2, TMP10);
 
-	vis_and(REF_4, REF_6, TMP12);
+        vis_and(REF_4, REF_6, TMP12);
 
-	vis_and(TMP6, MASK_7f, TMP6);
+        vis_and(TMP6, MASK_7f, TMP6);
 
-	vis_and(TMP8, MASK_7f, TMP8);
+        vis_and(TMP8, MASK_7f, TMP8);
 
-	vis_padd16(TMP10, TMP6, TMP6);
-	vis_st64(TMP6, dest[0]);
+        vis_padd16(TMP10, TMP6, TMP6);
+        vis_st64(TMP6, dest[0]);
 
-	vis_padd16(TMP12, TMP8, TMP8);
-	vis_st64_2(TMP8, dest, 8);
+        vis_padd16(TMP12, TMP8, TMP8);
+        vis_st64_2(TMP8, dest, 8);
 }
 
 static void MC_put_no_round_x_8_vis (uint8_t * dest, const uint8_t * _ref,
-			    const int stride, int height)
+                            const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[0], TMP0);
+        vis_ld64(ref[0], TMP0);
 
-	vis_ld64(ref[8], TMP2);
+        vis_ld64(ref[8], TMP2);
 
-	vis_ld64(constants_fe[0], MASK_fe);
+        vis_ld64(constants_fe[0], MASK_fe);
 
-	vis_ld64(constants_7f[0], MASK_7f);
+        vis_ld64(constants_7f[0], MASK_7f);
 
-	vis_ld64(constants128[0], CONST_128);
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_ld64(constants128[0], CONST_128);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_2);
-	} else {
-		vis_src1(TMP2, REF_2);
-	}
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_2);
+        } else {
+                vis_src1(TMP2, REF_2);
+        }
 
-	ref += stride;
-	height = (height >> 1) - 1;
+        ref += stride;
+        height = (height >> 1) - 1;
 
-	do {	/* 20 cycles */
-		vis_ld64(ref[0], TMP0);
-		vis_xor(REF_0, REF_2, TMP4);
+        do {    /* 20 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(REF_0, REF_2, TMP4);
 
-		vis_ld64_2(ref, 8, TMP2);
-		vis_and(TMP4, MASK_fe, TMP4);
-		ref += stride;
+                vis_ld64_2(ref, 8, TMP2);
+                vis_and(TMP4, MASK_fe, TMP4);
+                ref += stride;
 
-		vis_ld64(ref[0], TMP8);
-		vis_and(REF_0, REF_2, TMP6);
-		vis_mul8x16(CONST_128, TMP4, TMP4);
+                vis_ld64(ref[0], TMP8);
+                vis_and(REF_0, REF_2, TMP6);
+                vis_mul8x16(CONST_128, TMP4, TMP4);
 
-		vis_alignaddr_g0((void *)off);
+                vis_alignaddr_g0((void *)off);
 
-		vis_ld64_2(ref, 8, TMP10);
-		ref += stride;
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64_2(ref, 8, TMP10);
+                ref += stride;
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_2);
-		} else {
-			vis_src1(TMP2, REF_2);
-		}
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                }
 
-		vis_and(TMP4, MASK_7f, TMP4);
+                vis_and(TMP4, MASK_7f, TMP4);
 
-		vis_padd16(TMP6, TMP4, DST_0);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
+                vis_padd16(TMP6, TMP4, DST_0);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
 
-		vis_xor(REF_0, REF_2, TMP12);
+                vis_xor(REF_0, REF_2, TMP12);
 
-		vis_and(TMP12, MASK_fe, TMP12);
+                vis_and(TMP12, MASK_fe, TMP12);
 
-		vis_and(REF_0, REF_2, TMP14);
-		vis_mul8x16(CONST_128, TMP12, TMP12);
+                vis_and(REF_0, REF_2, TMP14);
+                vis_mul8x16(CONST_128, TMP12, TMP12);
 
-		vis_alignaddr_g0((void *)off);
-		vis_faligndata(TMP8, TMP10, REF_0);
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP8, TMP10, REF_2);
-		} else {
-			vis_src1(TMP10, REF_2);
-		}
+                vis_alignaddr_g0((void *)off);
+                vis_faligndata(TMP8, TMP10, REF_0);
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP8, TMP10, REF_2);
+                } else {
+                        vis_src1(TMP10, REF_2);
+                }
 
-		vis_and(TMP12, MASK_7f, TMP12);
+                vis_and(TMP12, MASK_7f, TMP12);
 
-		vis_padd16(TMP14, TMP12, DST_0);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
-	} while (--height);
+                vis_padd16(TMP14, TMP12, DST_0);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+        } while (--height);
 
-	vis_ld64(ref[0], TMP0);
-	vis_xor(REF_0, REF_2, TMP4);
+        vis_ld64(ref[0], TMP0);
+        vis_xor(REF_0, REF_2, TMP4);
 
-	vis_ld64_2(ref, 8, TMP2);
-	vis_and(TMP4, MASK_fe, TMP4);
+        vis_ld64_2(ref, 8, TMP2);
+        vis_and(TMP4, MASK_fe, TMP4);
 
-	vis_and(REF_0, REF_2, TMP6);
-	vis_mul8x16(CONST_128, TMP4, TMP4);
+        vis_and(REF_0, REF_2, TMP6);
+        vis_mul8x16(CONST_128, TMP4, TMP4);
 
-	vis_alignaddr_g0((void *)off);
+        vis_alignaddr_g0((void *)off);
 
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_2);
-	} else {
-		vis_src1(TMP2, REF_2);
-	}
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_2);
+        } else {
+                vis_src1(TMP2, REF_2);
+        }
 
-	vis_and(TMP4, MASK_7f, TMP4);
+        vis_and(TMP4, MASK_7f, TMP4);
 
-	vis_padd16(TMP6, TMP4, DST_0);
-	vis_st64(DST_0, dest[0]);
-	dest += stride;
+        vis_padd16(TMP6, TMP4, DST_0);
+        vis_st64(DST_0, dest[0]);
+        dest += stride;
 
-	vis_xor(REF_0, REF_2, TMP12);
+        vis_xor(REF_0, REF_2, TMP12);
 
-	vis_and(TMP12, MASK_fe, TMP12);
+        vis_and(TMP12, MASK_fe, TMP12);
 
-	vis_and(REF_0, REF_2, TMP14);
-	vis_mul8x16(CONST_128, TMP12, TMP12);
+        vis_and(REF_0, REF_2, TMP14);
+        vis_mul8x16(CONST_128, TMP12, TMP12);
 
-	vis_and(TMP12, MASK_7f, TMP12);
+        vis_and(TMP12, MASK_7f, TMP12);
 
-	vis_padd16(TMP14, TMP12, DST_0);
-	vis_st64(DST_0, dest[0]);
-	dest += stride;
+        vis_padd16(TMP14, TMP12, DST_0);
+        vis_st64(DST_0, dest[0]);
+        dest += stride;
 }
 
 static void MC_avg_no_round_x_16_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
+                             const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
 
-	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
 
-	vis_ld64(constants3[0], CONST_3);
-	vis_fzero(ZERO);
-	vis_ld64(constants256_512[0], CONST_256);
+        vis_ld64(constants3[0], CONST_3);
+        vis_fzero(ZERO);
+        vis_ld64(constants256_512[0], CONST_256);
 
-	ref = vis_alignaddr(ref);
-	do {	/* 26 cycles */
-		vis_ld64(ref[0], TMP0);
+        ref = vis_alignaddr(ref);
+        do {    /* 26 cycles */
+                vis_ld64(ref[0], TMP0);
 
-		vis_ld64(ref[8], TMP2);
+                vis_ld64(ref[8], TMP2);
 
-		vis_alignaddr_g0((void *)off);
+                vis_alignaddr_g0((void *)off);
 
-		vis_ld64(ref[16], TMP4);
+                vis_ld64(ref[16], TMP4);
 
-		vis_ld64(dest[0], DST_0);
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64(dest[0], DST_0);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_ld64(dest[8], DST_2);
-		vis_faligndata(TMP2, TMP4, REF_4);
+                vis_ld64(dest[8], DST_2);
+                vis_faligndata(TMP2, TMP4, REF_4);
 
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_2);
-			vis_faligndata(TMP2, TMP4, REF_6);
-		} else {
-			vis_src1(TMP2, REF_2);
-			vis_src1(TMP4, REF_6);
-		}
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                        vis_faligndata(TMP2, TMP4, REF_6);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                        vis_src1(TMP4, REF_6);
+                }
 
-		vis_mul8x16au(REF_0,   CONST_256, TMP0);
+                vis_mul8x16au(REF_0,   CONST_256, TMP0);
 
-		vis_pmerge(ZERO,     REF_2,     TMP4);
-		vis_mul8x16au(REF_0_1, CONST_256, TMP2);
+                vis_pmerge(ZERO,     REF_2,     TMP4);
+                vis_mul8x16au(REF_0_1, CONST_256, TMP2);
 
-		vis_pmerge(ZERO, REF_2_1, TMP6);
+                vis_pmerge(ZERO, REF_2_1, TMP6);
 
-		vis_padd16(TMP0, TMP4, TMP0);
+                vis_padd16(TMP0, TMP4, TMP0);
 
-		vis_mul8x16al(DST_0,   CONST_512, TMP4);
-		vis_padd16(TMP2, TMP6, TMP2);
+                vis_mul8x16al(DST_0,   CONST_512, TMP4);
+                vis_padd16(TMP2, TMP6, TMP2);
 
-		vis_mul8x16al(DST_1,   CONST_512, TMP6);
+                vis_mul8x16al(DST_1,   CONST_512, TMP6);
 
-		vis_mul8x16au(REF_6,   CONST_256, TMP12);
+                vis_mul8x16au(REF_6,   CONST_256, TMP12);
 
-		vis_padd16(TMP0, TMP4, TMP0);
-		vis_mul8x16au(REF_6_1, CONST_256, TMP14);
+                vis_padd16(TMP0, TMP4, TMP0);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP14);
 
-		vis_padd16(TMP2, TMP6, TMP2);
-		vis_mul8x16au(REF_4,   CONST_256, TMP16);
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_mul8x16au(REF_4,   CONST_256, TMP16);
 
-		vis_padd16(TMP0, CONST_3, TMP8);
-		vis_mul8x16au(REF_4_1, CONST_256, TMP18);
+                vis_padd16(TMP0, CONST_3, TMP8);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP18);
 
-		vis_padd16(TMP2, CONST_3, TMP10);
-		vis_pack16(TMP8, DST_0);
+                vis_padd16(TMP2, CONST_3, TMP10);
+                vis_pack16(TMP8, DST_0);
 
-		vis_pack16(TMP10, DST_1);
-		vis_padd16(TMP16, TMP12, TMP0);
+                vis_pack16(TMP10, DST_1);
+                vis_padd16(TMP16, TMP12, TMP0);
 
-		vis_st64(DST_0, dest[0]);
-		vis_mul8x16al(DST_2,   CONST_512, TMP4);
-		vis_padd16(TMP18, TMP14, TMP2);
+                vis_st64(DST_0, dest[0]);
+                vis_mul8x16al(DST_2,   CONST_512, TMP4);
+                vis_padd16(TMP18, TMP14, TMP2);
 
-		vis_mul8x16al(DST_3,   CONST_512, TMP6);
-		vis_padd16(TMP0, CONST_3, TMP0);
+                vis_mul8x16al(DST_3,   CONST_512, TMP6);
+                vis_padd16(TMP0, CONST_3, TMP0);
 
-		vis_padd16(TMP2, CONST_3, TMP2);
+                vis_padd16(TMP2, CONST_3, TMP2);
 
-		vis_padd16(TMP0, TMP4, TMP0);
+                vis_padd16(TMP0, TMP4, TMP0);
 
-		vis_padd16(TMP2, TMP6, TMP2);
-		vis_pack16(TMP0, DST_2);
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_pack16(TMP0, DST_2);
 
-		vis_pack16(TMP2, DST_3);
-		vis_st64(DST_2, dest[8]);
+                vis_pack16(TMP2, DST_3);
+                vis_st64(DST_2, dest[8]);
 
-		ref += stride;
-		dest += stride;
-	} while (--height);
+                ref += stride;
+                dest += stride;
+        } while (--height);
 }
 
 static void MC_avg_no_round_x_8_vis (uint8_t * dest, const uint8_t * _ref,
-			    const int stride, int height)
+                            const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
-	int stride_times_2 = stride << 1;
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_times_2 = stride << 1;
 
-	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
 
-	vis_ld64(constants3[0], CONST_3);
-	vis_fzero(ZERO);
-	vis_ld64(constants256_512[0], CONST_256);
+        vis_ld64(constants3[0], CONST_3);
+        vis_fzero(ZERO);
+        vis_ld64(constants256_512[0], CONST_256);
 
-	ref = vis_alignaddr(ref);
-	height >>= 2;
-	do {	/* 47 cycles */
-		vis_ld64(ref[0],   TMP0);
+        ref = vis_alignaddr(ref);
+        height >>= 2;
+        do {    /* 47 cycles */
+                vis_ld64(ref[0],   TMP0);
 
-		vis_ld64_2(ref, 8, TMP2);
-		ref += stride;
+                vis_ld64_2(ref, 8, TMP2);
+                ref += stride;
 
-		vis_alignaddr_g0((void *)off);
+                vis_alignaddr_g0((void *)off);
 
-		vis_ld64(ref[0],   TMP4);
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64(ref[0],   TMP4);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_ld64_2(ref, 8, TMP6);
-		ref += stride;
+                vis_ld64_2(ref, 8, TMP6);
+                ref += stride;
 
-		vis_ld64(ref[0],   TMP8);
+                vis_ld64(ref[0],   TMP8);
 
-		vis_ld64_2(ref, 8, TMP10);
-		ref += stride;
-		vis_faligndata(TMP4, TMP6, REF_4);
+                vis_ld64_2(ref, 8, TMP10);
+                ref += stride;
+                vis_faligndata(TMP4, TMP6, REF_4);
 
-		vis_ld64(ref[0],   TMP12);
+                vis_ld64(ref[0],   TMP12);
 
-		vis_ld64_2(ref, 8, TMP14);
-		ref += stride;
-		vis_faligndata(TMP8, TMP10, REF_S0);
+                vis_ld64_2(ref, 8, TMP14);
+                ref += stride;
+                vis_faligndata(TMP8, TMP10, REF_S0);
 
-		vis_faligndata(TMP12, TMP14, REF_S4);
+                vis_faligndata(TMP12, TMP14, REF_S4);
 
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
 
-			vis_ld64(dest[0], DST_0);
-			vis_faligndata(TMP0, TMP2, REF_2);
+                        vis_ld64(dest[0], DST_0);
+                        vis_faligndata(TMP0, TMP2, REF_2);
 
-			vis_ld64_2(dest, stride, DST_2);
-			vis_faligndata(TMP4, TMP6, REF_6);
+                        vis_ld64_2(dest, stride, DST_2);
+                        vis_faligndata(TMP4, TMP6, REF_6);
 
-			vis_faligndata(TMP8, TMP10, REF_S2);
+                        vis_faligndata(TMP8, TMP10, REF_S2);
 
-			vis_faligndata(TMP12, TMP14, REF_S6);
-		} else {
-			vis_ld64(dest[0], DST_0);
-			vis_src1(TMP2, REF_2);
+                        vis_faligndata(TMP12, TMP14, REF_S6);
+                } else {
+                        vis_ld64(dest[0], DST_0);
+                        vis_src1(TMP2, REF_2);
 
-			vis_ld64_2(dest, stride, DST_2);
-			vis_src1(TMP6, REF_6);
+                        vis_ld64_2(dest, stride, DST_2);
+                        vis_src1(TMP6, REF_6);
 
-			vis_src1(TMP10, REF_S2);
+                        vis_src1(TMP10, REF_S2);
 
-			vis_src1(TMP14, REF_S6);
-		}
+                        vis_src1(TMP14, REF_S6);
+                }
 
-		vis_pmerge(ZERO,     REF_0,     TMP0);
-		vis_mul8x16au(REF_0_1, CONST_256, TMP2);
+                vis_pmerge(ZERO,     REF_0,     TMP0);
+                vis_mul8x16au(REF_0_1, CONST_256, TMP2);
 
-		vis_pmerge(ZERO,     REF_2,     TMP4);
-		vis_mul8x16au(REF_2_1, CONST_256, TMP6);
+                vis_pmerge(ZERO,     REF_2,     TMP4);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP6);
 
-		vis_padd16(TMP0, CONST_3, TMP0);
-		vis_mul8x16al(DST_0,   CONST_512, TMP16);
+                vis_padd16(TMP0, CONST_3, TMP0);
+                vis_mul8x16al(DST_0,   CONST_512, TMP16);
 
-		vis_padd16(TMP2, CONST_3, TMP2);
-		vis_mul8x16al(DST_1,   CONST_512, TMP18);
+                vis_padd16(TMP2, CONST_3, TMP2);
+                vis_mul8x16al(DST_1,   CONST_512, TMP18);
 
-		vis_padd16(TMP0, TMP4, TMP0);
-		vis_mul8x16au(REF_4, CONST_256, TMP8);
+                vis_padd16(TMP0, TMP4, TMP0);
+                vis_mul8x16au(REF_4, CONST_256, TMP8);
 
-		vis_padd16(TMP2, TMP6, TMP2);
-		vis_mul8x16au(REF_4_1, CONST_256, TMP10);
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP10);
 
-		vis_padd16(TMP0, TMP16, TMP0);
-		vis_mul8x16au(REF_6, CONST_256, TMP12);
+                vis_padd16(TMP0, TMP16, TMP0);
+                vis_mul8x16au(REF_6, CONST_256, TMP12);
 
-		vis_padd16(TMP2, TMP18, TMP2);
-		vis_mul8x16au(REF_6_1, CONST_256, TMP14);
+                vis_padd16(TMP2, TMP18, TMP2);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP14);
 
-		vis_padd16(TMP8, CONST_3, TMP8);
-		vis_mul8x16al(DST_2, CONST_512, TMP16);
+                vis_padd16(TMP8, CONST_3, TMP8);
+                vis_mul8x16al(DST_2, CONST_512, TMP16);
 
-		vis_padd16(TMP8, TMP12, TMP8);
-		vis_mul8x16al(DST_3, CONST_512, TMP18);
+                vis_padd16(TMP8, TMP12, TMP8);
+                vis_mul8x16al(DST_3, CONST_512, TMP18);
 
-		vis_padd16(TMP10, TMP14, TMP10);
-		vis_pack16(TMP0, DST_0);
+                vis_padd16(TMP10, TMP14, TMP10);
+                vis_pack16(TMP0, DST_0);
 
-		vis_pack16(TMP2, DST_1);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
-		vis_padd16(TMP10, CONST_3, TMP10);
+                vis_pack16(TMP2, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+                vis_padd16(TMP10, CONST_3, TMP10);
 
-		vis_ld64_2(dest, stride, DST_0);
-		vis_padd16(TMP8, TMP16, TMP8);
+                vis_ld64_2(dest, stride, DST_0);
+                vis_padd16(TMP8, TMP16, TMP8);
 
-		vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/);
-		vis_padd16(TMP10, TMP18, TMP10);
-		vis_pack16(TMP8, DST_2);
+                vis_ld64_2(dest, stride_times_2, TMP4/*DST_2*/);
+                vis_padd16(TMP10, TMP18, TMP10);
+                vis_pack16(TMP8, DST_2);
 
-		vis_pack16(TMP10, DST_3);
-		vis_st64(DST_2, dest[0]);
-		dest += stride;
+                vis_pack16(TMP10, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
 
-		vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
-		vis_pmerge(ZERO,     REF_S0,     TMP0);
+                vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
+                vis_pmerge(ZERO,     REF_S0,     TMP0);
 
-		vis_pmerge(ZERO,     REF_S2,     TMP24);
-		vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
+                vis_pmerge(ZERO,     REF_S2,     TMP24);
+                vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
 
-		vis_padd16(TMP0, CONST_3, TMP0);
-		vis_mul8x16au(REF_S4, CONST_256, TMP8);
+                vis_padd16(TMP0, CONST_3, TMP0);
+                vis_mul8x16au(REF_S4, CONST_256, TMP8);
 
-		vis_padd16(TMP2, CONST_3, TMP2);
-		vis_mul8x16au(REF_S4_1, CONST_256, TMP10);
+                vis_padd16(TMP2, CONST_3, TMP2);
+                vis_mul8x16au(REF_S4_1, CONST_256, TMP10);
 
-		vis_padd16(TMP0, TMP24, TMP0);
-		vis_mul8x16au(REF_S6, CONST_256, TMP12);
+                vis_padd16(TMP0, TMP24, TMP0);
+                vis_mul8x16au(REF_S6, CONST_256, TMP12);
 
-		vis_padd16(TMP2, TMP6, TMP2);
-		vis_mul8x16au(REF_S6_1, CONST_256, TMP14);
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_mul8x16au(REF_S6_1, CONST_256, TMP14);
 
-		vis_padd16(TMP8, CONST_3, TMP8);
-		vis_mul8x16al(DST_0,   CONST_512, TMP16);
+                vis_padd16(TMP8, CONST_3, TMP8);
+                vis_mul8x16al(DST_0,   CONST_512, TMP16);
 
-		vis_padd16(TMP10, CONST_3, TMP10);
-		vis_mul8x16al(DST_1,   CONST_512, TMP18);
+                vis_padd16(TMP10, CONST_3, TMP10);
+                vis_mul8x16al(DST_1,   CONST_512, TMP18);
 
-		vis_padd16(TMP8, TMP12, TMP8);
-		vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20);
+                vis_padd16(TMP8, TMP12, TMP8);
+                vis_mul8x16al(TMP4/*DST_2*/, CONST_512, TMP20);
 
-		vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22);
-		vis_padd16(TMP0, TMP16, TMP0);
+                vis_mul8x16al(TMP5/*DST_3*/, CONST_512, TMP22);
+                vis_padd16(TMP0, TMP16, TMP0);
 
-		vis_padd16(TMP2, TMP18, TMP2);
-		vis_pack16(TMP0, DST_0);
+                vis_padd16(TMP2, TMP18, TMP2);
+                vis_pack16(TMP0, DST_0);
 
-		vis_padd16(TMP10, TMP14, TMP10);
-		vis_pack16(TMP2, DST_1);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
+                vis_padd16(TMP10, TMP14, TMP10);
+                vis_pack16(TMP2, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
 
-		vis_padd16(TMP8, TMP20, TMP8);
+                vis_padd16(TMP8, TMP20, TMP8);
 
-		vis_padd16(TMP10, TMP22, TMP10);
-		vis_pack16(TMP8, DST_2);
+                vis_padd16(TMP10, TMP22, TMP10);
+                vis_pack16(TMP8, DST_2);
 
-		vis_pack16(TMP10, DST_3);
-		vis_st64(DST_2, dest[0]);
-		dest += stride;
-	} while (--height);
+                vis_pack16(TMP10, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
+        } while (--height);
 }
 
 static void MC_put_no_round_y_16_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
+                             const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
+        uint8_t *ref = (uint8_t *) _ref;
 
-	ref = vis_alignaddr(ref);
-	vis_ld64(ref[0], TMP0);
+        ref = vis_alignaddr(ref);
+        vis_ld64(ref[0], TMP0);
 
-	vis_ld64_2(ref, 8, TMP2);
+        vis_ld64_2(ref, 8, TMP2);
 
-	vis_ld64_2(ref, 16, TMP4);
-	ref += stride;
+        vis_ld64_2(ref, 16, TMP4);
+        ref += stride;
 
-	vis_ld64(ref[0], TMP6);
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_ld64(ref[0], TMP6);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_ld64_2(ref, 8, TMP8);
-	vis_faligndata(TMP2, TMP4, REF_4);
+        vis_ld64_2(ref, 8, TMP8);
+        vis_faligndata(TMP2, TMP4, REF_4);
 
-	vis_ld64_2(ref, 16, TMP10);
-	ref += stride;
+        vis_ld64_2(ref, 16, TMP10);
+        ref += stride;
 
-	vis_ld64(constants_fe[0], MASK_fe);
-	vis_faligndata(TMP6, TMP8, REF_2);
+        vis_ld64(constants_fe[0], MASK_fe);
+        vis_faligndata(TMP6, TMP8, REF_2);
 
-	vis_ld64(constants_7f[0], MASK_7f);
-	vis_faligndata(TMP8, TMP10, REF_6);
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP8, TMP10, REF_6);
 
-	vis_ld64(constants128[0], CONST_128);
-	height = (height >> 1) - 1;
-	do {	/* 24 cycles */
-		vis_ld64(ref[0], TMP0);
-		vis_xor(REF_0, REF_2, TMP12);
+        vis_ld64(constants128[0], CONST_128);
+        height = (height >> 1) - 1;
+        do {    /* 24 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(REF_0, REF_2, TMP12);
 
-		vis_ld64_2(ref, 8, TMP2);
-		vis_xor(REF_4, REF_6, TMP16);
+                vis_ld64_2(ref, 8, TMP2);
+                vis_xor(REF_4, REF_6, TMP16);
 
-		vis_ld64_2(ref, 16, TMP4);
-		ref += stride;
-		vis_and(REF_0, REF_2, TMP14);
+                vis_ld64_2(ref, 16, TMP4);
+                ref += stride;
+                vis_and(REF_0, REF_2, TMP14);
 
-		vis_ld64(ref[0], TMP6);
-		vis_and(REF_4, REF_6, TMP18);
+                vis_ld64(ref[0], TMP6);
+                vis_and(REF_4, REF_6, TMP18);
 
-		vis_ld64_2(ref, 8, TMP8);
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64_2(ref, 8, TMP8);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_ld64_2(ref, 16, TMP10);
-		ref += stride;
-		vis_faligndata(TMP2, TMP4, REF_4);
+                vis_ld64_2(ref, 16, TMP10);
+                ref += stride;
+                vis_faligndata(TMP2, TMP4, REF_4);
 
-		vis_and(TMP12, MASK_fe, TMP12);
+                vis_and(TMP12, MASK_fe, TMP12);
 
-		vis_and(TMP16, MASK_fe, TMP16);
-		vis_mul8x16(CONST_128, TMP12, TMP12);
+                vis_and(TMP16, MASK_fe, TMP16);
+                vis_mul8x16(CONST_128, TMP12, TMP12);
 
-		vis_mul8x16(CONST_128, TMP16, TMP16);
-		vis_xor(REF_0, REF_2, TMP0);
+                vis_mul8x16(CONST_128, TMP16, TMP16);
+                vis_xor(REF_0, REF_2, TMP0);
 
-		vis_xor(REF_4, REF_6, TMP2);
+                vis_xor(REF_4, REF_6, TMP2);
 
-		vis_and(REF_0, REF_2, TMP20);
+                vis_and(REF_0, REF_2, TMP20);
 
-		vis_and(TMP12, MASK_7f, TMP12);
+                vis_and(TMP12, MASK_7f, TMP12);
 
-		vis_and(TMP16, MASK_7f, TMP16);
+                vis_and(TMP16, MASK_7f, TMP16);
 
-		vis_padd16(TMP14, TMP12, TMP12);
-		vis_st64(TMP12, dest[0]);
+                vis_padd16(TMP14, TMP12, TMP12);
+                vis_st64(TMP12, dest[0]);
 
-		vis_padd16(TMP18, TMP16, TMP16);
-		vis_st64_2(TMP16, dest, 8);
-		dest += stride;
+                vis_padd16(TMP18, TMP16, TMP16);
+                vis_st64_2(TMP16, dest, 8);
+                dest += stride;
 
-		vis_and(REF_4, REF_6, TMP18);
+                vis_and(REF_4, REF_6, TMP18);
 
-		vis_and(TMP0, MASK_fe, TMP0);
+                vis_and(TMP0, MASK_fe, TMP0);
 
-		vis_and(TMP2, MASK_fe, TMP2);
-		vis_mul8x16(CONST_128, TMP0, TMP0);
+                vis_and(TMP2, MASK_fe, TMP2);
+                vis_mul8x16(CONST_128, TMP0, TMP0);
 
-		vis_faligndata(TMP6, TMP8, REF_2);
-		vis_mul8x16(CONST_128, TMP2, TMP2);
+                vis_faligndata(TMP6, TMP8, REF_2);
+                vis_mul8x16(CONST_128, TMP2, TMP2);
 
-		vis_faligndata(TMP8, TMP10, REF_6);
+                vis_faligndata(TMP8, TMP10, REF_6);
 
-		vis_and(TMP0, MASK_7f, TMP0);
+                vis_and(TMP0, MASK_7f, TMP0);
 
-		vis_and(TMP2, MASK_7f, TMP2);
+                vis_and(TMP2, MASK_7f, TMP2);
 
-		vis_padd16(TMP20, TMP0, TMP0);
-		vis_st64(TMP0, dest[0]);
+                vis_padd16(TMP20, TMP0, TMP0);
+                vis_st64(TMP0, dest[0]);
 
-		vis_padd16(TMP18, TMP2, TMP2);
-		vis_st64_2(TMP2, dest, 8);
-		dest += stride;
-	} while (--height);
+                vis_padd16(TMP18, TMP2, TMP2);
+                vis_st64_2(TMP2, dest, 8);
+                dest += stride;
+        } while (--height);
 
-	vis_ld64(ref[0], TMP0);
-	vis_xor(REF_0, REF_2, TMP12);
+        vis_ld64(ref[0], TMP0);
+        vis_xor(REF_0, REF_2, TMP12);
 
-	vis_ld64_2(ref, 8, TMP2);
-	vis_xor(REF_4, REF_6, TMP16);
+        vis_ld64_2(ref, 8, TMP2);
+        vis_xor(REF_4, REF_6, TMP16);
 
-	vis_ld64_2(ref, 16, TMP4);
-	vis_and(REF_0, REF_2, TMP14);
+        vis_ld64_2(ref, 16, TMP4);
+        vis_and(REF_0, REF_2, TMP14);
 
-	vis_and(REF_4, REF_6, TMP18);
+        vis_and(REF_4, REF_6, TMP18);
 
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_faligndata(TMP2, TMP4, REF_4);
+        vis_faligndata(TMP2, TMP4, REF_4);
 
-	vis_and(TMP12, MASK_fe, TMP12);
+        vis_and(TMP12, MASK_fe, TMP12);
 
-	vis_and(TMP16, MASK_fe, TMP16);
-	vis_mul8x16(CONST_128, TMP12, TMP12);
+        vis_and(TMP16, MASK_fe, TMP16);
+        vis_mul8x16(CONST_128, TMP12, TMP12);
 
-	vis_mul8x16(CONST_128, TMP16, TMP16);
-	vis_xor(REF_0, REF_2, TMP0);
+        vis_mul8x16(CONST_128, TMP16, TMP16);
+        vis_xor(REF_0, REF_2, TMP0);
 
-	vis_xor(REF_4, REF_6, TMP2);
+        vis_xor(REF_4, REF_6, TMP2);
 
-	vis_and(REF_0, REF_2, TMP20);
+        vis_and(REF_0, REF_2, TMP20);
 
-	vis_and(TMP12, MASK_7f, TMP12);
+        vis_and(TMP12, MASK_7f, TMP12);
 
-	vis_and(TMP16, MASK_7f, TMP16);
+        vis_and(TMP16, MASK_7f, TMP16);
 
-	vis_padd16(TMP14, TMP12, TMP12);
-	vis_st64(TMP12, dest[0]);
+        vis_padd16(TMP14, TMP12, TMP12);
+        vis_st64(TMP12, dest[0]);
 
-	vis_padd16(TMP18, TMP16, TMP16);
-	vis_st64_2(TMP16, dest, 8);
-	dest += stride;
+        vis_padd16(TMP18, TMP16, TMP16);
+        vis_st64_2(TMP16, dest, 8);
+        dest += stride;
 
-	vis_and(REF_4, REF_6, TMP18);
+        vis_and(REF_4, REF_6, TMP18);
 
-	vis_and(TMP0, MASK_fe, TMP0);
+        vis_and(TMP0, MASK_fe, TMP0);
 
-	vis_and(TMP2, MASK_fe, TMP2);
-	vis_mul8x16(CONST_128, TMP0, TMP0);
+        vis_and(TMP2, MASK_fe, TMP2);
+        vis_mul8x16(CONST_128, TMP0, TMP0);
 
-	vis_mul8x16(CONST_128, TMP2, TMP2);
+        vis_mul8x16(CONST_128, TMP2, TMP2);
 
-	vis_and(TMP0, MASK_7f, TMP0);
+        vis_and(TMP0, MASK_7f, TMP0);
 
-	vis_and(TMP2, MASK_7f, TMP2);
+        vis_and(TMP2, MASK_7f, TMP2);
 
-	vis_padd16(TMP20, TMP0, TMP0);
-	vis_st64(TMP0, dest[0]);
+        vis_padd16(TMP20, TMP0, TMP0);
+        vis_st64(TMP0, dest[0]);
 
-	vis_padd16(TMP18, TMP2, TMP2);
-	vis_st64_2(TMP2, dest, 8);
+        vis_padd16(TMP18, TMP2, TMP2);
+        vis_st64_2(TMP2, dest, 8);
 }
 
 static void MC_put_no_round_y_8_vis (uint8_t * dest, const uint8_t * _ref,
-			    const int stride, int height)
+                            const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
+        uint8_t *ref = (uint8_t *) _ref;
 
-	ref = vis_alignaddr(ref);
-	vis_ld64(ref[0], TMP0);
+        ref = vis_alignaddr(ref);
+        vis_ld64(ref[0], TMP0);
 
-	vis_ld64_2(ref, 8, TMP2);
-	ref += stride;
+        vis_ld64_2(ref, 8, TMP2);
+        ref += stride;
 
-	vis_ld64(ref[0], TMP4);
+        vis_ld64(ref[0], TMP4);
 
-	vis_ld64_2(ref, 8, TMP6);
-	ref += stride;
+        vis_ld64_2(ref, 8, TMP6);
+        ref += stride;
 
-	vis_ld64(constants_fe[0], MASK_fe);
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_ld64(constants_fe[0], MASK_fe);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_ld64(constants_7f[0], MASK_7f);
-	vis_faligndata(TMP4, TMP6, REF_2);
+        vis_ld64(constants_7f[0], MASK_7f);
+        vis_faligndata(TMP4, TMP6, REF_2);
 
-	vis_ld64(constants128[0], CONST_128);
-	height = (height >> 1) - 1;
-	do {	/* 12 cycles */
-		vis_ld64(ref[0], TMP0);
-		vis_xor(REF_0, REF_2, TMP4);
+        vis_ld64(constants128[0], CONST_128);
+        height = (height >> 1) - 1;
+        do {    /* 12 cycles */
+                vis_ld64(ref[0], TMP0);
+                vis_xor(REF_0, REF_2, TMP4);
 
-		vis_ld64_2(ref, 8, TMP2);
-		ref += stride;
-		vis_and(TMP4, MASK_fe, TMP4);
+                vis_ld64_2(ref, 8, TMP2);
+                ref += stride;
+                vis_and(TMP4, MASK_fe, TMP4);
 
-		vis_and(REF_0, REF_2, TMP6);
-		vis_mul8x16(CONST_128, TMP4, TMP4);
+                vis_and(REF_0, REF_2, TMP6);
+                vis_mul8x16(CONST_128, TMP4, TMP4);
 
-		vis_faligndata(TMP0, TMP2, REF_0);
-		vis_ld64(ref[0], TMP0);
+                vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64(ref[0], TMP0);
 
-		vis_ld64_2(ref, 8, TMP2);
-		ref += stride;
-		vis_xor(REF_0, REF_2, TMP12);
+                vis_ld64_2(ref, 8, TMP2);
+                ref += stride;
+                vis_xor(REF_0, REF_2, TMP12);
 
-		vis_and(TMP4, MASK_7f, TMP4);
+                vis_and(TMP4, MASK_7f, TMP4);
 
-		vis_and(TMP12, MASK_fe, TMP12);
+                vis_and(TMP12, MASK_fe, TMP12);
 
-		vis_mul8x16(CONST_128, TMP12, TMP12);
-		vis_and(REF_0, REF_2, TMP14);
+                vis_mul8x16(CONST_128, TMP12, TMP12);
+                vis_and(REF_0, REF_2, TMP14);
 
-		vis_padd16(TMP6, TMP4, DST_0);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
+                vis_padd16(TMP6, TMP4, DST_0);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
 
-		vis_faligndata(TMP0, TMP2, REF_2);
+                vis_faligndata(TMP0, TMP2, REF_2);
 
-		vis_and(TMP12, MASK_7f, TMP12);
+                vis_and(TMP12, MASK_7f, TMP12);
 
-		vis_padd16(TMP14, TMP12, DST_0);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
-	} while (--height);
+                vis_padd16(TMP14, TMP12, DST_0);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+        } while (--height);
 
-	vis_ld64(ref[0], TMP0);
-	vis_xor(REF_0, REF_2, TMP4);
+        vis_ld64(ref[0], TMP0);
+        vis_xor(REF_0, REF_2, TMP4);
 
-	vis_ld64_2(ref, 8, TMP2);
-	vis_and(TMP4, MASK_fe, TMP4);
+        vis_ld64_2(ref, 8, TMP2);
+        vis_and(TMP4, MASK_fe, TMP4);
 
-	vis_and(REF_0, REF_2, TMP6);
-	vis_mul8x16(CONST_128, TMP4, TMP4);
+        vis_and(REF_0, REF_2, TMP6);
+        vis_mul8x16(CONST_128, TMP4, TMP4);
 
-	vis_faligndata(TMP0, TMP2, REF_0);
+        vis_faligndata(TMP0, TMP2, REF_0);
 
-	vis_xor(REF_0, REF_2, TMP12);
+        vis_xor(REF_0, REF_2, TMP12);
 
-	vis_and(TMP4, MASK_7f, TMP4);
+        vis_and(TMP4, MASK_7f, TMP4);
 
-	vis_and(TMP12, MASK_fe, TMP12);
+        vis_and(TMP12, MASK_fe, TMP12);
 
-	vis_mul8x16(CONST_128, TMP12, TMP12);
-	vis_and(REF_0, REF_2, TMP14);
+        vis_mul8x16(CONST_128, TMP12, TMP12);
+        vis_and(REF_0, REF_2, TMP14);
 
-	vis_padd16(TMP6, TMP4, DST_0);
-	vis_st64(DST_0, dest[0]);
-	dest += stride;
+        vis_padd16(TMP6, TMP4, DST_0);
+        vis_st64(DST_0, dest[0]);
+        dest += stride;
 
-	vis_and(TMP12, MASK_7f, TMP12);
+        vis_and(TMP12, MASK_7f, TMP12);
 
-	vis_padd16(TMP14, TMP12, DST_0);
-	vis_st64(DST_0, dest[0]);
+        vis_padd16(TMP14, TMP12, DST_0);
+        vis_st64(DST_0, dest[0]);
 }
 
 static void MC_avg_no_round_y_16_vis (uint8_t * dest, const uint8_t * _ref,
-			     const int stride, int height)
+                             const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	int stride_8 = stride + 8;
-	int stride_16 = stride + 16;
+        uint8_t *ref = (uint8_t *) _ref;
+        int stride_8 = stride + 8;
+        int stride_16 = stride + 16;
 
-	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[ 0], TMP0);
-	vis_fzero(ZERO);
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
 
-	vis_ld64(ref[ 8], TMP2);
+        vis_ld64(ref[ 8], TMP2);
 
-	vis_ld64(ref[16], TMP4);
+        vis_ld64(ref[16], TMP4);
 
-	vis_ld64(constants3[0], CONST_3);
-	vis_faligndata(TMP0, TMP2, REF_2);
+        vis_ld64(constants3[0], CONST_3);
+        vis_faligndata(TMP0, TMP2, REF_2);
 
-	vis_ld64(constants256_512[0], CONST_256);
-	vis_faligndata(TMP2, TMP4, REF_6);
-	height >>= 1;
+        vis_ld64(constants256_512[0], CONST_256);
+        vis_faligndata(TMP2, TMP4, REF_6);
+        height >>= 1;
 
-	do {	/* 31 cycles */
-		vis_ld64_2(ref, stride, TMP0);
-		vis_pmerge(ZERO,       REF_2,     TMP12);
-		vis_mul8x16au(REF_2_1, CONST_256, TMP14);
+        do {    /* 31 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_pmerge(ZERO,       REF_2,     TMP12);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP14);
 
-		vis_ld64_2(ref, stride_8, TMP2);
-		vis_pmerge(ZERO,       REF_6,     TMP16);
-		vis_mul8x16au(REF_6_1, CONST_256, TMP18);
+                vis_ld64_2(ref, stride_8, TMP2);
+                vis_pmerge(ZERO,       REF_6,     TMP16);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP18);
 
-		vis_ld64_2(ref, stride_16, TMP4);
-		ref += stride;
+                vis_ld64_2(ref, stride_16, TMP4);
+                ref += stride;
 
-		vis_ld64(dest[0], DST_0);
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64(dest[0], DST_0);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_ld64_2(dest, 8, DST_2);
-		vis_faligndata(TMP2, TMP4, REF_4);
+                vis_ld64_2(dest, 8, DST_2);
+                vis_faligndata(TMP2, TMP4, REF_4);
 
-		vis_ld64_2(ref, stride, TMP6);
-		vis_pmerge(ZERO,     REF_0,     TMP0);
-		vis_mul8x16au(REF_0_1, CONST_256, TMP2);
+                vis_ld64_2(ref, stride, TMP6);
+                vis_pmerge(ZERO,     REF_0,     TMP0);
+                vis_mul8x16au(REF_0_1, CONST_256, TMP2);
 
-		vis_ld64_2(ref, stride_8, TMP8);
-		vis_pmerge(ZERO,     REF_4,     TMP4);
+                vis_ld64_2(ref, stride_8, TMP8);
+                vis_pmerge(ZERO,     REF_4,     TMP4);
 
-		vis_ld64_2(ref, stride_16, TMP10);
-		ref += stride;
+                vis_ld64_2(ref, stride_16, TMP10);
+                ref += stride;
 
-		vis_ld64_2(dest, stride, REF_S0/*DST_4*/);
-		vis_faligndata(TMP6, TMP8, REF_2);
-		vis_mul8x16au(REF_4_1, CONST_256, TMP6);
+                vis_ld64_2(dest, stride, REF_S0/*DST_4*/);
+                vis_faligndata(TMP6, TMP8, REF_2);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP6);
 
-		vis_ld64_2(dest, stride_8, REF_S2/*DST_6*/);
-		vis_faligndata(TMP8, TMP10, REF_6);
-		vis_mul8x16al(DST_0,   CONST_512, TMP20);
+                vis_ld64_2(dest, stride_8, REF_S2/*DST_6*/);
+                vis_faligndata(TMP8, TMP10, REF_6);
+                vis_mul8x16al(DST_0,   CONST_512, TMP20);
 
-		vis_padd16(TMP0, CONST_3, TMP0);
-		vis_mul8x16al(DST_1,   CONST_512, TMP22);
+                vis_padd16(TMP0, CONST_3, TMP0);
+                vis_mul8x16al(DST_1,   CONST_512, TMP22);
 
-		vis_padd16(TMP2, CONST_3, TMP2);
-		vis_mul8x16al(DST_2,   CONST_512, TMP24);
+                vis_padd16(TMP2, CONST_3, TMP2);
+                vis_mul8x16al(DST_2,   CONST_512, TMP24);
 
-		vis_padd16(TMP4, CONST_3, TMP4);
-		vis_mul8x16al(DST_3,   CONST_512, TMP26);
+                vis_padd16(TMP4, CONST_3, TMP4);
+                vis_mul8x16al(DST_3,   CONST_512, TMP26);
 
-		vis_padd16(TMP6, CONST_3, TMP6);
+                vis_padd16(TMP6, CONST_3, TMP6);
 
-		vis_padd16(TMP12, TMP20, TMP12);
-		vis_mul8x16al(REF_S0,   CONST_512, TMP20);
+                vis_padd16(TMP12, TMP20, TMP12);
+                vis_mul8x16al(REF_S0,   CONST_512, TMP20);
 
-		vis_padd16(TMP14, TMP22, TMP14);
-		vis_mul8x16al(REF_S0_1, CONST_512, TMP22);
+                vis_padd16(TMP14, TMP22, TMP14);
+                vis_mul8x16al(REF_S0_1, CONST_512, TMP22);
 
-		vis_padd16(TMP16, TMP24, TMP16);
-		vis_mul8x16al(REF_S2,   CONST_512, TMP24);
+                vis_padd16(TMP16, TMP24, TMP16);
+                vis_mul8x16al(REF_S2,   CONST_512, TMP24);
 
-		vis_padd16(TMP18, TMP26, TMP18);
-		vis_mul8x16al(REF_S2_1, CONST_512, TMP26);
+                vis_padd16(TMP18, TMP26, TMP18);
+                vis_mul8x16al(REF_S2_1, CONST_512, TMP26);
 
-		vis_padd16(TMP12, TMP0, TMP12);
-		vis_mul8x16au(REF_2,   CONST_256, TMP28);
+                vis_padd16(TMP12, TMP0, TMP12);
+                vis_mul8x16au(REF_2,   CONST_256, TMP28);
 
-		vis_padd16(TMP14, TMP2, TMP14);
-		vis_mul8x16au(REF_2_1, CONST_256, TMP30);
+                vis_padd16(TMP14, TMP2, TMP14);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP30);
 
-		vis_padd16(TMP16, TMP4, TMP16);
-		vis_mul8x16au(REF_6,   CONST_256, REF_S4);
+                vis_padd16(TMP16, TMP4, TMP16);
+                vis_mul8x16au(REF_6,   CONST_256, REF_S4);
 
-		vis_padd16(TMP18, TMP6, TMP18);
-		vis_mul8x16au(REF_6_1, CONST_256, REF_S6);
+                vis_padd16(TMP18, TMP6, TMP18);
+                vis_mul8x16au(REF_6_1, CONST_256, REF_S6);
 
-		vis_pack16(TMP12, DST_0);
-		vis_padd16(TMP28, TMP0, TMP12);
+                vis_pack16(TMP12, DST_0);
+                vis_padd16(TMP28, TMP0, TMP12);
 
-		vis_pack16(TMP14, DST_1);
-		vis_st64(DST_0, dest[0]);
-		vis_padd16(TMP30, TMP2, TMP14);
+                vis_pack16(TMP14, DST_1);
+                vis_st64(DST_0, dest[0]);
+                vis_padd16(TMP30, TMP2, TMP14);
 
-		vis_pack16(TMP16, DST_2);
-		vis_padd16(REF_S4, TMP4, TMP16);
+                vis_pack16(TMP16, DST_2);
+                vis_padd16(REF_S4, TMP4, TMP16);
 
-		vis_pack16(TMP18, DST_3);
-		vis_st64_2(DST_2, dest, 8);
-		dest += stride;
-		vis_padd16(REF_S6, TMP6, TMP18);
+                vis_pack16(TMP18, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+                vis_padd16(REF_S6, TMP6, TMP18);
 
-		vis_padd16(TMP12, TMP20, TMP12);
+                vis_padd16(TMP12, TMP20, TMP12);
 
-		vis_padd16(TMP14, TMP22, TMP14);
-		vis_pack16(TMP12, DST_0);
+                vis_padd16(TMP14, TMP22, TMP14);
+                vis_pack16(TMP12, DST_0);
 
-		vis_padd16(TMP16, TMP24, TMP16);
-		vis_pack16(TMP14, DST_1);
-		vis_st64(DST_0, dest[0]);
+                vis_padd16(TMP16, TMP24, TMP16);
+                vis_pack16(TMP14, DST_1);
+                vis_st64(DST_0, dest[0]);
 
-		vis_padd16(TMP18, TMP26, TMP18);
-		vis_pack16(TMP16, DST_2);
+                vis_padd16(TMP18, TMP26, TMP18);
+                vis_pack16(TMP16, DST_2);
 
-		vis_pack16(TMP18, DST_3);
-		vis_st64_2(DST_2, dest, 8);
-		dest += stride;
-	} while (--height);
+                vis_pack16(TMP18, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+        } while (--height);
 }
 
 static void MC_avg_no_round_y_8_vis (uint8_t * dest, const uint8_t * _ref,
-			    const int stride, int height)
+                            const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	int stride_8 = stride + 8;
+        uint8_t *ref = (uint8_t *) _ref;
+        int stride_8 = stride + 8;
 
-	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[ 0], TMP0);
-	vis_fzero(ZERO);
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
 
-	vis_ld64(ref[ 8], TMP2);
+        vis_ld64(ref[ 8], TMP2);
 
-	vis_ld64(constants3[0], CONST_3);
-	vis_faligndata(TMP0, TMP2, REF_2);
+        vis_ld64(constants3[0], CONST_3);
+        vis_faligndata(TMP0, TMP2, REF_2);
 
-	vis_ld64(constants256_512[0], CONST_256);
+        vis_ld64(constants256_512[0], CONST_256);
 
-	height >>= 1;
-	do {	/* 20 cycles */
-		vis_ld64_2(ref, stride, TMP0);
-		vis_pmerge(ZERO,       REF_2,     TMP8);
-		vis_mul8x16au(REF_2_1, CONST_256, TMP10);
+        height >>= 1;
+        do {    /* 20 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_pmerge(ZERO,       REF_2,     TMP8);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP10);
 
-		vis_ld64_2(ref, stride_8, TMP2);
-		ref += stride;
+                vis_ld64_2(ref, stride_8, TMP2);
+                ref += stride;
 
-		vis_ld64(dest[0], DST_0);
+                vis_ld64(dest[0], DST_0);
 
-		vis_ld64_2(dest, stride, DST_2);
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64_2(dest, stride, DST_2);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_ld64_2(ref, stride, TMP4);
-		vis_mul8x16al(DST_0,   CONST_512, TMP16);
-		vis_pmerge(ZERO,       REF_0,     TMP12);
+                vis_ld64_2(ref, stride, TMP4);
+                vis_mul8x16al(DST_0,   CONST_512, TMP16);
+                vis_pmerge(ZERO,       REF_0,     TMP12);
 
-		vis_ld64_2(ref, stride_8, TMP6);
-		ref += stride;
-		vis_mul8x16al(DST_1,   CONST_512, TMP18);
-		vis_pmerge(ZERO,       REF_0_1,   TMP14);
+                vis_ld64_2(ref, stride_8, TMP6);
+                ref += stride;
+                vis_mul8x16al(DST_1,   CONST_512, TMP18);
+                vis_pmerge(ZERO,       REF_0_1,   TMP14);
 
-		vis_padd16(TMP12, CONST_3, TMP12);
-		vis_mul8x16al(DST_2,   CONST_512, TMP24);
+                vis_padd16(TMP12, CONST_3, TMP12);
+                vis_mul8x16al(DST_2,   CONST_512, TMP24);
 
-		vis_padd16(TMP14, CONST_3, TMP14);
-		vis_mul8x16al(DST_3,   CONST_512, TMP26);
+                vis_padd16(TMP14, CONST_3, TMP14);
+                vis_mul8x16al(DST_3,   CONST_512, TMP26);
 
-		vis_faligndata(TMP4, TMP6, REF_2);
+                vis_faligndata(TMP4, TMP6, REF_2);
 
-		vis_padd16(TMP8, TMP12, TMP8);
+                vis_padd16(TMP8, TMP12, TMP8);
 
-		vis_padd16(TMP10, TMP14, TMP10);
-		vis_mul8x16au(REF_2,   CONST_256, TMP20);
+                vis_padd16(TMP10, TMP14, TMP10);
+                vis_mul8x16au(REF_2,   CONST_256, TMP20);
 
-		vis_padd16(TMP8, TMP16, TMP0);
-		vis_mul8x16au(REF_2_1, CONST_256, TMP22);
+                vis_padd16(TMP8, TMP16, TMP0);
+                vis_mul8x16au(REF_2_1, CONST_256, TMP22);
 
-		vis_padd16(TMP10, TMP18, TMP2);
-		vis_pack16(TMP0, DST_0);
+                vis_padd16(TMP10, TMP18, TMP2);
+                vis_pack16(TMP0, DST_0);
 
-		vis_pack16(TMP2, DST_1);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
-		vis_padd16(TMP12, TMP20, TMP12);
+                vis_pack16(TMP2, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+                vis_padd16(TMP12, TMP20, TMP12);
 
-		vis_padd16(TMP14, TMP22, TMP14);
+                vis_padd16(TMP14, TMP22, TMP14);
 
-		vis_padd16(TMP12, TMP24, TMP0);
+                vis_padd16(TMP12, TMP24, TMP0);
 
-		vis_padd16(TMP14, TMP26, TMP2);
-		vis_pack16(TMP0, DST_2);
+                vis_padd16(TMP14, TMP26, TMP2);
+                vis_pack16(TMP0, DST_2);
 
-		vis_pack16(TMP2, DST_3);
-		vis_st64(DST_2, dest[0]);
-		dest += stride;
-	} while (--height);
+                vis_pack16(TMP2, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
+        } while (--height);
 }
 
 static void MC_put_no_round_xy_16_vis (uint8_t * dest, const uint8_t * _ref,
-				       const int stride, int height)
+                                       const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
-	int stride_8 = stride + 8;
-	int stride_16 = stride + 16;
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_8 = stride + 8;
+        int stride_16 = stride + 16;
 
-	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[ 0], TMP0);
-	vis_fzero(ZERO);
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
 
-	vis_ld64(ref[ 8], TMP2);
+        vis_ld64(ref[ 8], TMP2);
 
-	vis_ld64(ref[16], TMP4);
+        vis_ld64(ref[16], TMP4);
 
-	vis_ld64(constants1[0], CONST_1);
-	vis_faligndata(TMP0, TMP2, REF_S0);
+        vis_ld64(constants1[0], CONST_1);
+        vis_faligndata(TMP0, TMP2, REF_S0);
 
-	vis_ld64(constants256_512[0], CONST_256);
-	vis_faligndata(TMP2, TMP4, REF_S4);
+        vis_ld64(constants256_512[0], CONST_256);
+        vis_faligndata(TMP2, TMP4, REF_S4);
 
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_S2);
-		vis_faligndata(TMP2, TMP4, REF_S6);
-	} else {
-		vis_src1(TMP2, REF_S2);
-		vis_src1(TMP4, REF_S6);
-	}
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_S2);
+                vis_faligndata(TMP2, TMP4, REF_S6);
+        } else {
+                vis_src1(TMP2, REF_S2);
+                vis_src1(TMP4, REF_S6);
+        }
 
-	height >>= 1;
-	do {
-		vis_ld64_2(ref, stride, TMP0);
-		vis_mul8x16au(REF_S0, CONST_256, TMP12);
-		vis_pmerge(ZERO,      REF_S0_1,  TMP14);
+        height >>= 1;
+        do {
+                vis_ld64_2(ref, stride, TMP0);
+                vis_mul8x16au(REF_S0, CONST_256, TMP12);
+                vis_pmerge(ZERO,      REF_S0_1,  TMP14);
 
-		vis_alignaddr_g0((void *)off);
+                vis_alignaddr_g0((void *)off);
 
-		vis_ld64_2(ref, stride_8, TMP2);
-		vis_mul8x16au(REF_S2, CONST_256, TMP16);
-		vis_pmerge(ZERO,      REF_S2_1,  TMP18);
+                vis_ld64_2(ref, stride_8, TMP2);
+                vis_mul8x16au(REF_S2, CONST_256, TMP16);
+                vis_pmerge(ZERO,      REF_S2_1,  TMP18);
 
-		vis_ld64_2(ref, stride_16, TMP4);
-		ref += stride;
-		vis_mul8x16au(REF_S4, CONST_256, TMP20);
-		vis_pmerge(ZERO,      REF_S4_1,  TMP22);
+                vis_ld64_2(ref, stride_16, TMP4);
+                ref += stride;
+                vis_mul8x16au(REF_S4, CONST_256, TMP20);
+                vis_pmerge(ZERO,      REF_S4_1,  TMP22);
 
-		vis_ld64_2(ref, stride, TMP6);
-		vis_mul8x16au(REF_S6, CONST_256, TMP24);
-		vis_pmerge(ZERO,      REF_S6_1,  TMP26);
+                vis_ld64_2(ref, stride, TMP6);
+                vis_mul8x16au(REF_S6, CONST_256, TMP24);
+                vis_pmerge(ZERO,      REF_S6_1,  TMP26);
 
-		vis_ld64_2(ref, stride_8, TMP8);
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64_2(ref, stride_8, TMP8);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_ld64_2(ref, stride_16, TMP10);
-		ref += stride;
-		vis_faligndata(TMP2, TMP4, REF_4);
+                vis_ld64_2(ref, stride_16, TMP10);
+                ref += stride;
+                vis_faligndata(TMP2, TMP4, REF_4);
 
-		vis_faligndata(TMP6, TMP8, REF_S0);
+                vis_faligndata(TMP6, TMP8, REF_S0);
 
-		vis_faligndata(TMP8, TMP10, REF_S4);
+                vis_faligndata(TMP8, TMP10, REF_S4);
 
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_2);
-			vis_faligndata(TMP2, TMP4, REF_6);
-			vis_faligndata(TMP6, TMP8, REF_S2);
-			vis_faligndata(TMP8, TMP10, REF_S6);
-		} else {
-			vis_src1(TMP2, REF_2);
-			vis_src1(TMP4, REF_6);
-			vis_src1(TMP8, REF_S2);
-			vis_src1(TMP10, REF_S6);
-		}
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                        vis_faligndata(TMP2, TMP4, REF_6);
+                        vis_faligndata(TMP6, TMP8, REF_S2);
+                        vis_faligndata(TMP8, TMP10, REF_S6);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                        vis_src1(TMP4, REF_6);
+                        vis_src1(TMP8, REF_S2);
+                        vis_src1(TMP10, REF_S6);
+                }
 
-		vis_mul8x16au(REF_0, CONST_256, TMP0);
-		vis_pmerge(ZERO,      REF_0_1,  TMP2);
+                vis_mul8x16au(REF_0, CONST_256, TMP0);
+                vis_pmerge(ZERO,      REF_0_1,  TMP2);
 
-		vis_mul8x16au(REF_2, CONST_256, TMP4);
-		vis_pmerge(ZERO,      REF_2_1,  TMP6);
+                vis_mul8x16au(REF_2, CONST_256, TMP4);
+                vis_pmerge(ZERO,      REF_2_1,  TMP6);
 
-		vis_padd16(TMP0, CONST_2, TMP8);
-		vis_mul8x16au(REF_4, CONST_256, TMP0);
+                vis_padd16(TMP0, CONST_2, TMP8);
+                vis_mul8x16au(REF_4, CONST_256, TMP0);
 
-		vis_padd16(TMP2, CONST_1, TMP10);
-		vis_mul8x16au(REF_4_1, CONST_256, TMP2);
+                vis_padd16(TMP2, CONST_1, TMP10);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP2);
 
-		vis_padd16(TMP8, TMP4, TMP8);
-		vis_mul8x16au(REF_6, CONST_256, TMP4);
+                vis_padd16(TMP8, TMP4, TMP8);
+                vis_mul8x16au(REF_6, CONST_256, TMP4);
 
-		vis_padd16(TMP10, TMP6, TMP10);
-		vis_mul8x16au(REF_6_1, CONST_256, TMP6);
+                vis_padd16(TMP10, TMP6, TMP10);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP6);
 
-		vis_padd16(TMP12, TMP8, TMP12);
+                vis_padd16(TMP12, TMP8, TMP12);
 
-		vis_padd16(TMP14, TMP10, TMP14);
+                vis_padd16(TMP14, TMP10, TMP14);
 
-		vis_padd16(TMP12, TMP16, TMP12);
+                vis_padd16(TMP12, TMP16, TMP12);
 
-		vis_padd16(TMP14, TMP18, TMP14);
-		vis_pack16(TMP12, DST_0);
+                vis_padd16(TMP14, TMP18, TMP14);
+                vis_pack16(TMP12, DST_0);
 
-		vis_pack16(TMP14, DST_1);
-		vis_st64(DST_0, dest[0]);
-		vis_padd16(TMP0, CONST_1, TMP12);
+                vis_pack16(TMP14, DST_1);
+                vis_st64(DST_0, dest[0]);
+                vis_padd16(TMP0, CONST_1, TMP12);
 
-		vis_mul8x16au(REF_S0, CONST_256, TMP0);
-		vis_padd16(TMP2, CONST_1, TMP14);
+                vis_mul8x16au(REF_S0, CONST_256, TMP0);
+                vis_padd16(TMP2, CONST_1, TMP14);
 
-		vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
-		vis_padd16(TMP12, TMP4, TMP12);
+                vis_mul8x16au(REF_S0_1, CONST_256, TMP2);
+                vis_padd16(TMP12, TMP4, TMP12);
 
-		vis_mul8x16au(REF_S2, CONST_256, TMP4);
-		vis_padd16(TMP14, TMP6, TMP14);
+                vis_mul8x16au(REF_S2, CONST_256, TMP4);
+                vis_padd16(TMP14, TMP6, TMP14);
 
-		vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
-		vis_padd16(TMP20, TMP12, TMP20);
+                vis_mul8x16au(REF_S2_1, CONST_256, TMP6);
+                vis_padd16(TMP20, TMP12, TMP20);
 
-		vis_padd16(TMP22, TMP14, TMP22);
+                vis_padd16(TMP22, TMP14, TMP22);
 
-		vis_padd16(TMP20, TMP24, TMP20);
+                vis_padd16(TMP20, TMP24, TMP20);
 
-		vis_padd16(TMP22, TMP26, TMP22);
-		vis_pack16(TMP20, DST_2);
+                vis_padd16(TMP22, TMP26, TMP22);
+                vis_pack16(TMP20, DST_2);
 
-		vis_pack16(TMP22, DST_3);
-		vis_st64_2(DST_2, dest, 8);
-		dest += stride;
-		vis_padd16(TMP0, TMP4, TMP24);
+                vis_pack16(TMP22, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+                vis_padd16(TMP0, TMP4, TMP24);
 
-		vis_mul8x16au(REF_S4, CONST_256, TMP0);
-		vis_padd16(TMP2, TMP6, TMP26);
+                vis_mul8x16au(REF_S4, CONST_256, TMP0);
+                vis_padd16(TMP2, TMP6, TMP26);
 
-		vis_mul8x16au(REF_S4_1, CONST_256, TMP2);
-		vis_padd16(TMP24, TMP8, TMP24);
+                vis_mul8x16au(REF_S4_1, CONST_256, TMP2);
+                vis_padd16(TMP24, TMP8, TMP24);
 
-		vis_padd16(TMP26, TMP10, TMP26);
-		vis_pack16(TMP24, DST_0);
+                vis_padd16(TMP26, TMP10, TMP26);
+                vis_pack16(TMP24, DST_0);
 
-		vis_pack16(TMP26, DST_1);
-		vis_st64(DST_0, dest[0]);
-		vis_pmerge(ZERO, REF_S6, TMP4);
+                vis_pack16(TMP26, DST_1);
+                vis_st64(DST_0, dest[0]);
+                vis_pmerge(ZERO, REF_S6, TMP4);
 
-		vis_pmerge(ZERO,      REF_S6_1,  TMP6);
+                vis_pmerge(ZERO,      REF_S6_1,  TMP6);
 
-		vis_padd16(TMP0, TMP4, TMP0);
+                vis_padd16(TMP0, TMP4, TMP0);
 
-		vis_padd16(TMP2, TMP6, TMP2);
+                vis_padd16(TMP2, TMP6, TMP2);
 
-		vis_padd16(TMP0, TMP12, TMP0);
+                vis_padd16(TMP0, TMP12, TMP0);
 
-		vis_padd16(TMP2, TMP14, TMP2);
-		vis_pack16(TMP0, DST_2);
+                vis_padd16(TMP2, TMP14, TMP2);
+                vis_pack16(TMP0, DST_2);
 
-		vis_pack16(TMP2, DST_3);
-		vis_st64_2(DST_2, dest, 8);
-		dest += stride;
-	} while (--height);
+                vis_pack16(TMP2, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+        } while (--height);
 }
 
 static void MC_put_no_round_xy_8_vis (uint8_t * dest, const uint8_t * _ref,
-				      const int stride, int height)
+                                      const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
-	int stride_8 = stride + 8;
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_8 = stride + 8;
 
-	vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
+        vis_set_gsr(5 << VIS_GSR_SCALEFACT_SHIFT);
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[ 0], TMP0);
-	vis_fzero(ZERO);
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
 
-	vis_ld64(ref[ 8], TMP2);
+        vis_ld64(ref[ 8], TMP2);
 
-	vis_ld64(constants1[0], CONST_1);
+        vis_ld64(constants1[0], CONST_1);
 
-	vis_ld64(constants256_512[0], CONST_256);
-	vis_faligndata(TMP0, TMP2, REF_S0);
+        vis_ld64(constants256_512[0], CONST_256);
+        vis_faligndata(TMP0, TMP2, REF_S0);
 
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_S2);
-	} else {
-		vis_src1(TMP2, REF_S2);
-	}
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_S2);
+        } else {
+                vis_src1(TMP2, REF_S2);
+        }
 
-	height >>= 1;
-	do {	/* 26 cycles */
-		vis_ld64_2(ref, stride, TMP0);
-		vis_mul8x16au(REF_S0,   CONST_256, TMP8);
-		vis_pmerge(ZERO,        REF_S2,    TMP12);
+        height >>= 1;
+        do {    /* 26 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_mul8x16au(REF_S0,   CONST_256, TMP8);
+                vis_pmerge(ZERO,        REF_S2,    TMP12);
 
-		vis_alignaddr_g0((void *)off);
+                vis_alignaddr_g0((void *)off);
 
-		vis_ld64_2(ref, stride_8, TMP2);
-		ref += stride;
-		vis_mul8x16au(REF_S0_1, CONST_256, TMP10);
-		vis_pmerge(ZERO,        REF_S2_1,  TMP14);
+                vis_ld64_2(ref, stride_8, TMP2);
+                ref += stride;
+                vis_mul8x16au(REF_S0_1, CONST_256, TMP10);
+                vis_pmerge(ZERO,        REF_S2_1,  TMP14);
 
-		vis_ld64_2(ref, stride, TMP4);
+                vis_ld64_2(ref, stride, TMP4);
 
-		vis_ld64_2(ref, stride_8, TMP6);
-		ref += stride;
-		vis_faligndata(TMP0, TMP2, REF_S4);
+                vis_ld64_2(ref, stride_8, TMP6);
+                ref += stride;
+                vis_faligndata(TMP0, TMP2, REF_S4);
 
-		vis_pmerge(ZERO, REF_S4, TMP18);
+                vis_pmerge(ZERO, REF_S4, TMP18);
 
-		vis_pmerge(ZERO, REF_S4_1, TMP20);
+                vis_pmerge(ZERO, REF_S4_1, TMP20);
 
-		vis_faligndata(TMP4, TMP6, REF_S0);
+                vis_faligndata(TMP4, TMP6, REF_S0);
 
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_S6);
-			vis_faligndata(TMP4, TMP6, REF_S2);
-		} else {
-			vis_src1(TMP2, REF_S6);
-			vis_src1(TMP6, REF_S2);
-		}
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_S6);
+                        vis_faligndata(TMP4, TMP6, REF_S2);
+                } else {
+                        vis_src1(TMP2, REF_S6);
+                        vis_src1(TMP6, REF_S2);
+                }
 
-		vis_padd16(TMP18, CONST_1, TMP18);
-		vis_mul8x16au(REF_S6,   CONST_256, TMP22);
+                vis_padd16(TMP18, CONST_1, TMP18);
+                vis_mul8x16au(REF_S6,   CONST_256, TMP22);
 
-		vis_padd16(TMP20, CONST_1, TMP20);
-		vis_mul8x16au(REF_S6_1, CONST_256, TMP24);
+                vis_padd16(TMP20, CONST_1, TMP20);
+                vis_mul8x16au(REF_S6_1, CONST_256, TMP24);
 
-		vis_mul8x16au(REF_S0,   CONST_256, TMP26);
-		vis_pmerge(ZERO, REF_S0_1, TMP28);
+                vis_mul8x16au(REF_S0,   CONST_256, TMP26);
+                vis_pmerge(ZERO, REF_S0_1, TMP28);
 
-		vis_mul8x16au(REF_S2,   CONST_256, TMP30);
-		vis_padd16(TMP18, TMP22, TMP18);
+                vis_mul8x16au(REF_S2,   CONST_256, TMP30);
+                vis_padd16(TMP18, TMP22, TMP18);
 
-		vis_mul8x16au(REF_S2_1, CONST_256, TMP32);
-		vis_padd16(TMP20, TMP24, TMP20);
+                vis_mul8x16au(REF_S2_1, CONST_256, TMP32);
+                vis_padd16(TMP20, TMP24, TMP20);
 
-		vis_padd16(TMP8,  TMP18, TMP8);
+                vis_padd16(TMP8,  TMP18, TMP8);
 
-		vis_padd16(TMP10, TMP20, TMP10);
+                vis_padd16(TMP10, TMP20, TMP10);
 
-		vis_padd16(TMP8,  TMP12, TMP8);
+                vis_padd16(TMP8,  TMP12, TMP8);
 
-		vis_padd16(TMP10, TMP14, TMP10);
-		vis_pack16(TMP8,  DST_0);
+                vis_padd16(TMP10, TMP14, TMP10);
+                vis_pack16(TMP8,  DST_0);
 
-		vis_pack16(TMP10, DST_1);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
-		vis_padd16(TMP18, TMP26, TMP18);
+                vis_pack16(TMP10, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
+                vis_padd16(TMP18, TMP26, TMP18);
 
-		vis_padd16(TMP20, TMP28, TMP20);
+                vis_padd16(TMP20, TMP28, TMP20);
 
-		vis_padd16(TMP18, TMP30, TMP18);
+                vis_padd16(TMP18, TMP30, TMP18);
 
-		vis_padd16(TMP20, TMP32, TMP20);
-		vis_pack16(TMP18, DST_2);
+                vis_padd16(TMP20, TMP32, TMP20);
+                vis_pack16(TMP18, DST_2);
 
-		vis_pack16(TMP20, DST_3);
-		vis_st64(DST_2, dest[0]);
-		dest += stride;
-	} while (--height);
+                vis_pack16(TMP20, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
+        } while (--height);
 }
 
 static void MC_avg_no_round_xy_16_vis (uint8_t * dest, const uint8_t * _ref,
-				       const int stride, int height)
+                                       const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
-	int stride_8 = stride + 8;
-	int stride_16 = stride + 16;
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_8 = stride + 8;
+        int stride_16 = stride + 16;
 
-	vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
+        vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[ 0], TMP0);
-	vis_fzero(ZERO);
+        vis_ld64(ref[ 0], TMP0);
+        vis_fzero(ZERO);
 
-	vis_ld64(ref[ 8], TMP2);
+        vis_ld64(ref[ 8], TMP2);
 
-	vis_ld64(ref[16], TMP4);
+        vis_ld64(ref[16], TMP4);
 
-	vis_ld64(constants6[0], CONST_6);
-	vis_faligndata(TMP0, TMP2, REF_S0);
+        vis_ld64(constants6[0], CONST_6);
+        vis_faligndata(TMP0, TMP2, REF_S0);
 
-	vis_ld64(constants256_1024[0], CONST_256);
-	vis_faligndata(TMP2, TMP4, REF_S4);
+        vis_ld64(constants256_1024[0], CONST_256);
+        vis_faligndata(TMP2, TMP4, REF_S4);
 
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_S2);
-		vis_faligndata(TMP2, TMP4, REF_S6);
-	} else {
-		vis_src1(TMP2, REF_S2);
-		vis_src1(TMP4, REF_S6);
-	}
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_S2);
+                vis_faligndata(TMP2, TMP4, REF_S6);
+        } else {
+                vis_src1(TMP2, REF_S2);
+                vis_src1(TMP4, REF_S6);
+        }
 
-	height >>= 1;
-	do {	/* 55 cycles */
-		vis_ld64_2(ref, stride, TMP0);
-		vis_mul8x16au(REF_S0, CONST_256, TMP12);
-		vis_pmerge(ZERO,      REF_S0_1,  TMP14);
+        height >>= 1;
+        do {    /* 55 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_mul8x16au(REF_S0, CONST_256, TMP12);
+                vis_pmerge(ZERO,      REF_S0_1,  TMP14);
 
-		vis_alignaddr_g0((void *)off);
+                vis_alignaddr_g0((void *)off);
 
-		vis_ld64_2(ref, stride_8, TMP2);
-		vis_mul8x16au(REF_S2, CONST_256, TMP16);
-		vis_pmerge(ZERO,      REF_S2_1,  TMP18);
+                vis_ld64_2(ref, stride_8, TMP2);
+                vis_mul8x16au(REF_S2, CONST_256, TMP16);
+                vis_pmerge(ZERO,      REF_S2_1,  TMP18);
 
-		vis_ld64_2(ref, stride_16, TMP4);
-		ref += stride;
-		vis_mul8x16au(REF_S4, CONST_256, TMP20);
-		vis_pmerge(ZERO,      REF_S4_1,  TMP22);
+                vis_ld64_2(ref, stride_16, TMP4);
+                ref += stride;
+                vis_mul8x16au(REF_S4, CONST_256, TMP20);
+                vis_pmerge(ZERO,      REF_S4_1,  TMP22);
 
-		vis_ld64_2(ref, stride, TMP6);
-		vis_mul8x16au(REF_S6, CONST_256, TMP24);
-		vis_pmerge(ZERO,      REF_S6_1,  TMP26);
+                vis_ld64_2(ref, stride, TMP6);
+                vis_mul8x16au(REF_S6, CONST_256, TMP24);
+                vis_pmerge(ZERO,      REF_S6_1,  TMP26);
 
-		vis_ld64_2(ref, stride_8, TMP8);
-		vis_faligndata(TMP0, TMP2, REF_0);
+                vis_ld64_2(ref, stride_8, TMP8);
+                vis_faligndata(TMP0, TMP2, REF_0);
 
-		vis_ld64_2(ref, stride_16, TMP10);
-		ref += stride;
-		vis_faligndata(TMP2, TMP4, REF_4);
+                vis_ld64_2(ref, stride_16, TMP10);
+                ref += stride;
+                vis_faligndata(TMP2, TMP4, REF_4);
 
-		vis_ld64(dest[0], DST_0);
-		vis_faligndata(TMP6, TMP8, REF_S0);
+                vis_ld64(dest[0], DST_0);
+                vis_faligndata(TMP6, TMP8, REF_S0);
 
-		vis_ld64_2(dest, 8, DST_2);
-		vis_faligndata(TMP8, TMP10, REF_S4);
+                vis_ld64_2(dest, 8, DST_2);
+                vis_faligndata(TMP8, TMP10, REF_S4);
 
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_2);
-			vis_faligndata(TMP2, TMP4, REF_6);
-			vis_faligndata(TMP6, TMP8, REF_S2);
-			vis_faligndata(TMP8, TMP10, REF_S6);
-		} else {
-			vis_src1(TMP2, REF_2);
-			vis_src1(TMP4, REF_6);
-			vis_src1(TMP8, REF_S2);
-			vis_src1(TMP10, REF_S6);
-		}
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_2);
+                        vis_faligndata(TMP2, TMP4, REF_6);
+                        vis_faligndata(TMP6, TMP8, REF_S2);
+                        vis_faligndata(TMP8, TMP10, REF_S6);
+                } else {
+                        vis_src1(TMP2, REF_2);
+                        vis_src1(TMP4, REF_6);
+                        vis_src1(TMP8, REF_S2);
+                        vis_src1(TMP10, REF_S6);
+                }
 
-		vis_mul8x16al(DST_0,   CONST_1024, TMP30);
-		vis_pmerge(ZERO, REF_0, TMP0);
+                vis_mul8x16al(DST_0,   CONST_1024, TMP30);
+                vis_pmerge(ZERO, REF_0, TMP0);
 
-		vis_mul8x16al(DST_1,   CONST_1024, TMP32);
-		vis_pmerge(ZERO,      REF_0_1,  TMP2);
+                vis_mul8x16al(DST_1,   CONST_1024, TMP32);
+                vis_pmerge(ZERO,      REF_0_1,  TMP2);
 
-		vis_mul8x16au(REF_2, CONST_256, TMP4);
-		vis_pmerge(ZERO,      REF_2_1,  TMP6);
+                vis_mul8x16au(REF_2, CONST_256, TMP4);
+                vis_pmerge(ZERO,      REF_2_1,  TMP6);
 
-		vis_mul8x16al(DST_2,   CONST_1024, REF_0);
-		vis_padd16(TMP0, CONST_6, TMP0);
+                vis_mul8x16al(DST_2,   CONST_1024, REF_0);
+                vis_padd16(TMP0, CONST_6, TMP0);
 
-		vis_mul8x16al(DST_3,   CONST_1024, REF_2);
-		vis_padd16(TMP2, CONST_6, TMP2);
+                vis_mul8x16al(DST_3,   CONST_1024, REF_2);
+                vis_padd16(TMP2, CONST_6, TMP2);
 
-		vis_padd16(TMP0, TMP4, TMP0);
-		vis_mul8x16au(REF_4, CONST_256, TMP4);
+                vis_padd16(TMP0, TMP4, TMP0);
+                vis_mul8x16au(REF_4, CONST_256, TMP4);
 
-		vis_padd16(TMP2, TMP6, TMP2);
-		vis_mul8x16au(REF_4_1, CONST_256, TMP6);
+                vis_padd16(TMP2, TMP6, TMP2);
+                vis_mul8x16au(REF_4_1, CONST_256, TMP6);
 
-		vis_padd16(TMP12, TMP0, TMP12);
-		vis_mul8x16au(REF_6, CONST_256, TMP8);
+                vis_padd16(TMP12, TMP0, TMP12);
+                vis_mul8x16au(REF_6, CONST_256, TMP8);
 
-		vis_padd16(TMP14, TMP2, TMP14);
-		vis_mul8x16au(REF_6_1, CONST_256, TMP10);
+                vis_padd16(TMP14, TMP2, TMP14);
+                vis_mul8x16au(REF_6_1, CONST_256, TMP10);
 
-		vis_padd16(TMP12, TMP16, TMP12);
-		vis_mul8x16au(REF_S0, CONST_256, REF_4);
+                vis_padd16(TMP12, TMP16, TMP12);
+                vis_mul8x16au(REF_S0, CONST_256, REF_4);
 
-		vis_padd16(TMP14, TMP18, TMP14);
-		vis_mul8x16au(REF_S0_1, CONST_256, REF_6);
+                vis_padd16(TMP14, TMP18, TMP14);
+                vis_mul8x16au(REF_S0_1, CONST_256, REF_6);
 
-		vis_padd16(TMP12, TMP30, TMP12);
+                vis_padd16(TMP12, TMP30, TMP12);
 
-		vis_padd16(TMP14, TMP32, TMP14);
-		vis_pack16(TMP12, DST_0);
+                vis_padd16(TMP14, TMP32, TMP14);
+                vis_pack16(TMP12, DST_0);
 
-		vis_pack16(TMP14, DST_1);
-		vis_st64(DST_0, dest[0]);
-		vis_padd16(TMP4, CONST_6, TMP4);
+                vis_pack16(TMP14, DST_1);
+                vis_st64(DST_0, dest[0]);
+                vis_padd16(TMP4, CONST_6, TMP4);
 
-		vis_ld64_2(dest, stride, DST_0);
-		vis_padd16(TMP6, CONST_6, TMP6);
-		vis_mul8x16au(REF_S2, CONST_256, TMP12);
+                vis_ld64_2(dest, stride, DST_0);
+                vis_padd16(TMP6, CONST_6, TMP6);
+                vis_mul8x16au(REF_S2, CONST_256, TMP12);
 
-		vis_padd16(TMP4, TMP8, TMP4);
-		vis_mul8x16au(REF_S2_1, CONST_256,  TMP14);
+                vis_padd16(TMP4, TMP8, TMP4);
+                vis_mul8x16au(REF_S2_1, CONST_256,  TMP14);
 
-		vis_padd16(TMP6, TMP10, TMP6);
+                vis_padd16(TMP6, TMP10, TMP6);
 
-		vis_padd16(TMP20, TMP4, TMP20);
+                vis_padd16(TMP20, TMP4, TMP20);
 
-		vis_padd16(TMP22, TMP6, TMP22);
+                vis_padd16(TMP22, TMP6, TMP22);
 
-		vis_padd16(TMP20, TMP24, TMP20);
+                vis_padd16(TMP20, TMP24, TMP20);
 
-		vis_padd16(TMP22, TMP26, TMP22);
+                vis_padd16(TMP22, TMP26, TMP22);
 
-		vis_padd16(TMP20, REF_0, TMP20);
-		vis_mul8x16au(REF_S4, CONST_256, REF_0);
+                vis_padd16(TMP20, REF_0, TMP20);
+                vis_mul8x16au(REF_S4, CONST_256, REF_0);
 
-		vis_padd16(TMP22, REF_2, TMP22);
-		vis_pack16(TMP20, DST_2);
+                vis_padd16(TMP22, REF_2, TMP22);
+                vis_pack16(TMP20, DST_2);
 
-		vis_pack16(TMP22, DST_3);
-		vis_st64_2(DST_2, dest, 8);
-		dest += stride;
+                vis_pack16(TMP22, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
 
-		vis_ld64_2(dest, 8, DST_2);
-		vis_mul8x16al(DST_0,   CONST_1024, TMP30);
-		vis_pmerge(ZERO,      REF_S4_1,  REF_2);
+                vis_ld64_2(dest, 8, DST_2);
+                vis_mul8x16al(DST_0,   CONST_1024, TMP30);
+                vis_pmerge(ZERO,      REF_S4_1,  REF_2);
 
-		vis_mul8x16al(DST_1,   CONST_1024, TMP32);
-		vis_padd16(REF_4, TMP0, TMP8);
+                vis_mul8x16al(DST_1,   CONST_1024, TMP32);
+                vis_padd16(REF_4, TMP0, TMP8);
 
-		vis_mul8x16au(REF_S6, CONST_256, REF_4);
-		vis_padd16(REF_6, TMP2, TMP10);
+                vis_mul8x16au(REF_S6, CONST_256, REF_4);
+                vis_padd16(REF_6, TMP2, TMP10);
 
-		vis_mul8x16au(REF_S6_1, CONST_256, REF_6);
-		vis_padd16(TMP8, TMP12, TMP8);
+                vis_mul8x16au(REF_S6_1, CONST_256, REF_6);
+                vis_padd16(TMP8, TMP12, TMP8);
 
-		vis_padd16(TMP10, TMP14, TMP10);
+                vis_padd16(TMP10, TMP14, TMP10);
 
-		vis_padd16(TMP8, TMP30, TMP8);
+                vis_padd16(TMP8, TMP30, TMP8);
 
-		vis_padd16(TMP10, TMP32, TMP10);
-		vis_pack16(TMP8, DST_0);
+                vis_padd16(TMP10, TMP32, TMP10);
+                vis_pack16(TMP8, DST_0);
 
-		vis_pack16(TMP10, DST_1);
-		vis_st64(DST_0, dest[0]);
+                vis_pack16(TMP10, DST_1);
+                vis_st64(DST_0, dest[0]);
 
-		vis_padd16(REF_0, TMP4, REF_0);
+                vis_padd16(REF_0, TMP4, REF_0);
 
-		vis_mul8x16al(DST_2,   CONST_1024, TMP30);
-		vis_padd16(REF_2, TMP6, REF_2);
+                vis_mul8x16al(DST_2,   CONST_1024, TMP30);
+                vis_padd16(REF_2, TMP6, REF_2);
 
-		vis_mul8x16al(DST_3,   CONST_1024, TMP32);
-		vis_padd16(REF_0, REF_4, REF_0);
+                vis_mul8x16al(DST_3,   CONST_1024, TMP32);
+                vis_padd16(REF_0, REF_4, REF_0);
 
-		vis_padd16(REF_2, REF_6, REF_2);
+                vis_padd16(REF_2, REF_6, REF_2);
 
-		vis_padd16(REF_0, TMP30, REF_0);
+                vis_padd16(REF_0, TMP30, REF_0);
 
-		/* stall */
+                /* stall */
 
-		vis_padd16(REF_2, TMP32, REF_2);
-		vis_pack16(REF_0, DST_2);
+                vis_padd16(REF_2, TMP32, REF_2);
+                vis_pack16(REF_0, DST_2);
 
-		vis_pack16(REF_2, DST_3);
-		vis_st64_2(DST_2, dest, 8);
-		dest += stride;
-	} while (--height);
+                vis_pack16(REF_2, DST_3);
+                vis_st64_2(DST_2, dest, 8);
+                dest += stride;
+        } while (--height);
 }
 
 static void MC_avg_no_round_xy_8_vis (uint8_t * dest, const uint8_t * _ref,
-				      const int stride, int height)
+                                      const int stride, int height)
 {
-	uint8_t *ref = (uint8_t *) _ref;
-	unsigned long off = (unsigned long) ref & 0x7;
-	unsigned long off_plus_1 = off + 1;
-	int stride_8 = stride + 8;
+        uint8_t *ref = (uint8_t *) _ref;
+        unsigned long off = (unsigned long) ref & 0x7;
+        unsigned long off_plus_1 = off + 1;
+        int stride_8 = stride + 8;
 
-	vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
+        vis_set_gsr(4 << VIS_GSR_SCALEFACT_SHIFT);
 
-	ref = vis_alignaddr(ref);
+        ref = vis_alignaddr(ref);
 
-	vis_ld64(ref[0], TMP0);
-	vis_fzero(ZERO);
+        vis_ld64(ref[0], TMP0);
+        vis_fzero(ZERO);
 
-	vis_ld64_2(ref, 8, TMP2);
+        vis_ld64_2(ref, 8, TMP2);
 
-	vis_ld64(constants6[0], CONST_6);
+        vis_ld64(constants6[0], CONST_6);
 
-	vis_ld64(constants256_1024[0], CONST_256);
-	vis_faligndata(TMP0, TMP2, REF_S0);
+        vis_ld64(constants256_1024[0], CONST_256);
+        vis_faligndata(TMP0, TMP2, REF_S0);
 
-	if (off != 0x7) {
-		vis_alignaddr_g0((void *)off_plus_1);
-		vis_faligndata(TMP0, TMP2, REF_S2);
-	} else {
-		vis_src1(TMP2, REF_S2);
-	}
+        if (off != 0x7) {
+                vis_alignaddr_g0((void *)off_plus_1);
+                vis_faligndata(TMP0, TMP2, REF_S2);
+        } else {
+                vis_src1(TMP2, REF_S2);
+        }
 
-	height >>= 1;
-	do {	/* 31 cycles */
-		vis_ld64_2(ref, stride, TMP0);
-		vis_mul8x16au(REF_S0, CONST_256, TMP8);
-		vis_pmerge(ZERO,      REF_S0_1,  TMP10);
+        height >>= 1;
+        do {    /* 31 cycles */
+                vis_ld64_2(ref, stride, TMP0);
+                vis_mul8x16au(REF_S0, CONST_256, TMP8);
+                vis_pmerge(ZERO,      REF_S0_1,  TMP10);
 
-		vis_ld64_2(ref, stride_8, TMP2);
-		ref += stride;
-		vis_mul8x16au(REF_S2, CONST_256, TMP12);
-		vis_pmerge(ZERO,      REF_S2_1,  TMP14);
+                vis_ld64_2(ref, stride_8, TMP2);
+                ref += stride;
+                vis_mul8x16au(REF_S2, CONST_256, TMP12);
+                vis_pmerge(ZERO,      REF_S2_1,  TMP14);
 
-		vis_alignaddr_g0((void *)off);
+                vis_alignaddr_g0((void *)off);
 
-		vis_ld64_2(ref, stride, TMP4);
-		vis_faligndata(TMP0, TMP2, REF_S4);
+                vis_ld64_2(ref, stride, TMP4);
+                vis_faligndata(TMP0, TMP2, REF_S4);
 
-		vis_ld64_2(ref, stride_8, TMP6);
-		ref += stride;
+                vis_ld64_2(ref, stride_8, TMP6);
+                ref += stride;
 
-		vis_ld64(dest[0], DST_0);
-		vis_faligndata(TMP4, TMP6, REF_S0);
+                vis_ld64(dest[0], DST_0);
+                vis_faligndata(TMP4, TMP6, REF_S0);
 
-		vis_ld64_2(dest, stride, DST_2);
+                vis_ld64_2(dest, stride, DST_2);
 
-		if (off != 0x7) {
-			vis_alignaddr_g0((void *)off_plus_1);
-			vis_faligndata(TMP0, TMP2, REF_S6);
-			vis_faligndata(TMP4, TMP6, REF_S2);
-		} else {
-			vis_src1(TMP2, REF_S6);
-			vis_src1(TMP6, REF_S2);
-		}
+                if (off != 0x7) {
+                        vis_alignaddr_g0((void *)off_plus_1);
+                        vis_faligndata(TMP0, TMP2, REF_S6);
+                        vis_faligndata(TMP4, TMP6, REF_S2);
+                } else {
+                        vis_src1(TMP2, REF_S6);
+                        vis_src1(TMP6, REF_S2);
+                }
 
-		vis_mul8x16al(DST_0,   CONST_1024, TMP30);
-		vis_pmerge(ZERO, REF_S4, TMP22);
+                vis_mul8x16al(DST_0,   CONST_1024, TMP30);
+                vis_pmerge(ZERO, REF_S4, TMP22);
 
-		vis_mul8x16al(DST_1,   CONST_1024, TMP32);
-		vis_pmerge(ZERO,      REF_S4_1,  TMP24);
+                vis_mul8x16al(DST_1,   CONST_1024, TMP32);
+                vis_pmerge(ZERO,      REF_S4_1,  TMP24);
 
-		vis_mul8x16au(REF_S6, CONST_256, TMP26);
-		vis_pmerge(ZERO,      REF_S6_1,  TMP28);
+                vis_mul8x16au(REF_S6, CONST_256, TMP26);
+                vis_pmerge(ZERO,      REF_S6_1,  TMP28);
 
-		vis_mul8x16au(REF_S0, CONST_256, REF_S4);
-		vis_padd16(TMP22, CONST_6, TMP22);
+                vis_mul8x16au(REF_S0, CONST_256, REF_S4);
+                vis_padd16(TMP22, CONST_6, TMP22);
 
-		vis_mul8x16au(REF_S0_1, CONST_256, REF_S6);
-		vis_padd16(TMP24, CONST_6, TMP24);
+                vis_mul8x16au(REF_S0_1, CONST_256, REF_S6);
+                vis_padd16(TMP24, CONST_6, TMP24);
 
-		vis_mul8x16al(DST_2,   CONST_1024, REF_0);
-		vis_padd16(TMP22, TMP26, TMP22);
+                vis_mul8x16al(DST_2,   CONST_1024, REF_0);
+                vis_padd16(TMP22, TMP26, TMP22);
 
-		vis_mul8x16al(DST_3,   CONST_1024, REF_2);
-		vis_padd16(TMP24, TMP28, TMP24);
+                vis_mul8x16al(DST_3,   CONST_1024, REF_2);
+                vis_padd16(TMP24, TMP28, TMP24);
 
-		vis_mul8x16au(REF_S2, CONST_256, TMP26);
-		vis_padd16(TMP8, TMP22, TMP8);
+                vis_mul8x16au(REF_S2, CONST_256, TMP26);
+                vis_padd16(TMP8, TMP22, TMP8);
 
-		vis_mul8x16au(REF_S2_1, CONST_256, TMP28);
-		vis_padd16(TMP10, TMP24, TMP10);
+                vis_mul8x16au(REF_S2_1, CONST_256, TMP28);
+                vis_padd16(TMP10, TMP24, TMP10);
 
-		vis_padd16(TMP8, TMP12, TMP8);
+                vis_padd16(TMP8, TMP12, TMP8);
 
-		vis_padd16(TMP10, TMP14, TMP10);
+                vis_padd16(TMP10, TMP14, TMP10);
 
-		vis_padd16(TMP8, TMP30, TMP8);
+                vis_padd16(TMP8, TMP30, TMP8);
 
-		vis_padd16(TMP10, TMP32, TMP10);
-		vis_pack16(TMP8, DST_0);
+                vis_padd16(TMP10, TMP32, TMP10);
+                vis_pack16(TMP8, DST_0);
 
-		vis_pack16(TMP10, DST_1);
-		vis_st64(DST_0, dest[0]);
-		dest += stride;
+                vis_pack16(TMP10, DST_1);
+                vis_st64(DST_0, dest[0]);
+                dest += stride;
 
-		vis_padd16(REF_S4, TMP22, TMP12);
+                vis_padd16(REF_S4, TMP22, TMP12);
 
-		vis_padd16(REF_S6, TMP24, TMP14);
+                vis_padd16(REF_S6, TMP24, TMP14);
 
-		vis_padd16(TMP12, TMP26, TMP12);
+                vis_padd16(TMP12, TMP26, TMP12);
 
-		vis_padd16(TMP14, TMP28, TMP14);
+                vis_padd16(TMP14, TMP28, TMP14);
 
-		vis_padd16(TMP12, REF_0, TMP12);
+                vis_padd16(TMP12, REF_0, TMP12);
 
-		vis_padd16(TMP14, REF_2, TMP14);
-		vis_pack16(TMP12, DST_2);
+                vis_padd16(TMP14, REF_2, TMP14);
+                vis_pack16(TMP12, DST_2);
 
-		vis_pack16(TMP14, DST_3);
-		vis_st64(DST_2, dest[0]);
-		dest += stride;
-	} while (--height);
+                vis_pack16(TMP14, DST_3);
+                vis_st64(DST_2, dest[0]);
+                dest += stride;
+        } while (--height);
 }
 
 /* End of no rounding code */
 
 static sigjmp_buf jmpbuf;
 static volatile sig_atomic_t canjump = 0;
- 
+
 static void sigill_handler (int sig)
 {
     if (!canjump) {
@@ -4012,9 +4012,9 @@ static int vis_level ()
         signal (SIGILL, SIG_DFL);
         return accel;
     }
- 
+
     canjump = 1;
- 
+
     /* pdist %f0, %f0, %f0 */
     __asm__ __volatile__(".word\t0x81b007c0");
 
@@ -4050,37 +4050,37 @@ void dsputil_init_vis(DSPContext* c, AVCodecContext *avctx)
       c->put_pixels_tab[0][1] = MC_put_x_16_vis;
       c->put_pixels_tab[0][2] = MC_put_y_16_vis;
       c->put_pixels_tab[0][3] = MC_put_xy_16_vis;
-      
+
       c->put_pixels_tab[1][0] = MC_put_o_8_vis;
       c->put_pixels_tab[1][1] = MC_put_x_8_vis;
       c->put_pixels_tab[1][2] = MC_put_y_8_vis;
       c->put_pixels_tab[1][3] = MC_put_xy_8_vis;
-      
+
       c->avg_pixels_tab[0][0] = MC_avg_o_16_vis;
       c->avg_pixels_tab[0][1] = MC_avg_x_16_vis;
       c->avg_pixels_tab[0][2] = MC_avg_y_16_vis;
       c->avg_pixels_tab[0][3] = MC_avg_xy_16_vis;
-  
+
       c->avg_pixels_tab[1][0] = MC_avg_o_8_vis;
       c->avg_pixels_tab[1][1] = MC_avg_x_8_vis;
       c->avg_pixels_tab[1][2] = MC_avg_y_8_vis;
       c->avg_pixels_tab[1][3] = MC_avg_xy_8_vis;
-  
+
       c->put_no_rnd_pixels_tab[0][0] = MC_put_no_round_o_16_vis;
       c->put_no_rnd_pixels_tab[0][1] = MC_put_no_round_x_16_vis;
       c->put_no_rnd_pixels_tab[0][2] = MC_put_no_round_y_16_vis;
       c->put_no_rnd_pixels_tab[0][3] = MC_put_no_round_xy_16_vis;
-      
+
       c->put_no_rnd_pixels_tab[1][0] = MC_put_no_round_o_8_vis;
       c->put_no_rnd_pixels_tab[1][1] = MC_put_no_round_x_8_vis;
       c->put_no_rnd_pixels_tab[1][2] = MC_put_no_round_y_8_vis;
       c->put_no_rnd_pixels_tab[1][3] = MC_put_no_round_xy_8_vis;
-  
+
       c->avg_no_rnd_pixels_tab[0][0] = MC_avg_no_round_o_16_vis;
       c->avg_no_rnd_pixels_tab[0][1] = MC_avg_no_round_x_16_vis;
       c->avg_no_rnd_pixels_tab[0][2] = MC_avg_no_round_y_16_vis;
       c->avg_no_rnd_pixels_tab[0][3] = MC_avg_no_round_xy_16_vis;
-  
+
       c->avg_no_rnd_pixels_tab[1][0] = MC_avg_no_round_o_8_vis;
       c->avg_no_rnd_pixels_tab[1][1] = MC_avg_no_round_x_8_vis;
       c->avg_no_rnd_pixels_tab[1][2] = MC_avg_no_round_y_8_vis;
diff --git a/src/libffmpeg/libavcodec/sparc/vis.h b/src/libffmpeg/libavcodec/sparc/vis.h
index 07dda2949..dfdf2f619 100644
--- a/src/libffmpeg/libavcodec/sparc/vis.h
+++ b/src/libffmpeg/libavcodec/sparc/vis.h
@@ -17,7 +17,7 @@
  *
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /* You may be asking why I hard-code the instruction opcodes and don't
@@ -41,151 +41,151 @@
  * the assembler to keep the binary from becoming tainted.
  */
 
-#define vis_opc_base	((0x1 << 31) | (0x36 << 19))
-#define vis_opf(X)	((X) << 5)
-#define vis_sreg(X)	(X)
-#define vis_dreg(X)	(((X)&0x1f)|((X)>>5))
-#define vis_rs1_s(X)	(vis_sreg(X) << 14)
-#define vis_rs1_d(X)	(vis_dreg(X) << 14)
-#define vis_rs2_s(X)	(vis_sreg(X) << 0)
-#define vis_rs2_d(X)	(vis_dreg(X) << 0)
-#define vis_rd_s(X)	(vis_sreg(X) << 25)
-#define vis_rd_d(X)	(vis_dreg(X) << 25)
+#define vis_opc_base    ((0x1 << 31) | (0x36 << 19))
+#define vis_opf(X)      ((X) << 5)
+#define vis_sreg(X)     (X)
+#define vis_dreg(X)     (((X)&0x1f)|((X)>>5))
+#define vis_rs1_s(X)    (vis_sreg(X) << 14)
+#define vis_rs1_d(X)    (vis_dreg(X) << 14)
+#define vis_rs2_s(X)    (vis_sreg(X) << 0)
+#define vis_rs2_d(X)    (vis_dreg(X) << 0)
+#define vis_rd_s(X)     (vis_sreg(X) << 25)
+#define vis_rd_d(X)     (vis_dreg(X) << 25)
 
 #define vis_ss2s(opf,rs1,rs2,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
                                        vis_rs1_s(rs1) | \
                                        vis_rs2_s(rs2) | \
                                        vis_rd_s(rd)))
 
 #define vis_dd2d(opf,rs1,rs2,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
                                        vis_rs1_d(rs1) | \
                                        vis_rs2_d(rs2) | \
                                        vis_rd_d(rd)))
 
 #define vis_ss2d(opf,rs1,rs2,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
                                        vis_rs1_s(rs1) | \
                                        vis_rs2_s(rs2) | \
                                        vis_rd_d(rd)))
 
 #define vis_sd2d(opf,rs1,rs2,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
                                        vis_rs1_s(rs1) | \
                                        vis_rs2_d(rs2) | \
                                        vis_rd_d(rd)))
 
 #define vis_d2s(opf,rs2,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
                                        vis_rs2_d(rs2) | \
                                        vis_rd_s(rd)))
 
 #define vis_s2d(opf,rs2,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
                                        vis_rs2_s(rs2) | \
                                        vis_rd_d(rd)))
 
 #define vis_d12d(opf,rs1,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
                                        vis_rs1_d(rs1) | \
                                        vis_rd_d(rd)))
 
 #define vis_d22d(opf,rs2,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
                                        vis_rs2_d(rs2) | \
                                        vis_rd_d(rd)))
 
 #define vis_s12s(opf,rs1,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
                                        vis_rs1_s(rs1) | \
                                        vis_rd_s(rd)))
 
 #define vis_s22s(opf,rs2,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
                                        vis_rs2_s(rs2) | \
                                        vis_rd_s(rd)))
 
 #define vis_s(opf,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
                                        vis_rd_s(rd)))
 
 #define vis_d(opf,rd) \
-	__asm__ __volatile__ (".word %0" \
-			      : : "i" (vis_opc_base | vis_opf(opf) | \
+        __asm__ __volatile__ (".word %0" \
+                              : : "i" (vis_opc_base | vis_opf(opf) | \
                                        vis_rd_d(rd)))
 
 #define vis_r2m(op,rd,mem) \
-	__asm__ __volatile__ (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) )
+        __asm__ __volatile__ (#op "\t%%f" #rd ", [%0]" : : "r" (&(mem)) )
 
 #define vis_r2m_2(op,rd,mem1,mem2) \
-	__asm__ __volatile__ (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) )
+        __asm__ __volatile__ (#op "\t%%f" #rd ", [%0 + %1]" : : "r" (mem1), "r" (mem2) )
 
 #define vis_m2r(op,mem,rd) \
-	__asm__ __volatile__ (#op "\t[%0], %%f" #rd : : "r" (&(mem)) )
+        __asm__ __volatile__ (#op "\t[%0], %%f" #rd : : "r" (&(mem)) )
 
 #define vis_m2r_2(op,mem1,mem2,rd) \
-	__asm__ __volatile__ (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) )
+        __asm__ __volatile__ (#op "\t[%0 + %1], %%f" #rd : : "r" (mem1), "r" (mem2) )
 
 static inline void vis_set_gsr(unsigned int _val)
 {
-	register unsigned int val asm("g1");
+        register unsigned int val asm("g1");
 
-	val = _val;
-	__asm__ __volatile__(".word 0xa7804000"
-			     : : "r" (val));
+        val = _val;
+        __asm__ __volatile__(".word 0xa7804000"
+                             : : "r" (val));
 }
 
-#define VIS_GSR_ALIGNADDR_MASK	0x0000007
-#define VIS_GSR_ALIGNADDR_SHIFT	0
-#define VIS_GSR_SCALEFACT_MASK	0x0000078
-#define VIS_GSR_SCALEFACT_SHIFT	3
+#define VIS_GSR_ALIGNADDR_MASK          0x0000007
+#define VIS_GSR_ALIGNADDR_SHIFT         0
+#define VIS_GSR_SCALEFACT_MASK          0x0000078
+#define VIS_GSR_SCALEFACT_SHIFT         3
 
-#define vis_ld32(mem,rs1)		vis_m2r(ld, mem, rs1)
-#define vis_ld32_2(mem1,mem2,rs1)	vis_m2r_2(ld, mem1, mem2, rs1)
-#define vis_st32(rs1,mem)		vis_r2m(st, rs1, mem)
-#define vis_st32_2(rs1,mem1,mem2)	vis_r2m_2(st, rs1, mem1, mem2)
-#define vis_ld64(mem,rs1)		vis_m2r(ldd, mem, rs1)
-#define vis_ld64_2(mem1,mem2,rs1)	vis_m2r_2(ldd, mem1, mem2, rs1)
-#define vis_st64(rs1,mem)		vis_r2m(std, rs1, mem)
-#define vis_st64_2(rs1,mem1,mem2)	vis_r2m_2(std, rs1, mem1, mem2)
+#define vis_ld32(mem,rs1)               vis_m2r(ld, mem, rs1)
+#define vis_ld32_2(mem1,mem2,rs1)       vis_m2r_2(ld, mem1, mem2, rs1)
+#define vis_st32(rs1,mem)               vis_r2m(st, rs1, mem)
+#define vis_st32_2(rs1,mem1,mem2)       vis_r2m_2(st, rs1, mem1, mem2)
+#define vis_ld64(mem,rs1)               vis_m2r(ldd, mem, rs1)
+#define vis_ld64_2(mem1,mem2,rs1)       vis_m2r_2(ldd, mem1, mem2, rs1)
+#define vis_st64(rs1,mem)               vis_r2m(std, rs1, mem)
+#define vis_st64_2(rs1,mem1,mem2)       vis_r2m_2(std, rs1, mem1, mem2)
 
 #define vis_ldblk(mem, rd) \
-do {	register void *__mem asm("g1"); \
-	__mem = &(mem); \
-	__asm__ __volatile__(".word 0xc1985e00 | %1" \
-			     : \
-			     : "r" (__mem), \
-			       "i" (vis_rd_d(rd)) \
-			     : "memory"); \
+do {        register void *__mem asm("g1"); \
+        __mem = &(mem); \
+        __asm__ __volatile__(".word 0xc1985e00 | %1" \
+                             : \
+                             : "r" (__mem), \
+                               "i" (vis_rd_d(rd)) \
+                             : "memory"); \
 } while (0)
 
 #define vis_stblk(rd, mem) \
-do {	register void *__mem asm("g1"); \
-	__mem = &(mem); \
-	__asm__ __volatile__(".word 0xc1b85e00 | %1" \
-			     : \
-			     : "r" (__mem), \
-			       "i" (vis_rd_d(rd)) \
-			     : "memory"); \
+do {        register void *__mem asm("g1"); \
+        __mem = &(mem); \
+        __asm__ __volatile__(".word 0xc1b85e00 | %1" \
+                             : \
+                             : "r" (__mem), \
+                               "i" (vis_rd_d(rd)) \
+                             : "memory"); \
 } while (0)
 
-#define vis_membar_storestore()	\
-	__asm__ __volatile__(".word 0x8143e008" : : : "memory")
+#define vis_membar_storestore()        \
+        __asm__ __volatile__(".word 0x8143e008" : : : "memory")
 
-#define vis_membar_sync()	\
-	__asm__ __volatile__(".word 0x8143e040" : : : "memory")
+#define vis_membar_sync()        \
+        __asm__ __volatile__(".word 0x8143e040" : : : "memory")
 
 /* 16 and 32 bit partitioned addition and subtraction.  The normal
  * versions perform 4 16-bit or 2 32-bit additions or subtractions.
@@ -193,136 +193,136 @@ do {	register void *__mem asm("g1"); \
  * subtractions.
  */
 
-#define vis_padd16(rs1,rs2,rd)		vis_dd2d(0x50, rs1, rs2, rd)
-#define vis_padd16s(rs1,rs2,rd)		vis_ss2s(0x51, rs1, rs2, rd)
-#define vis_padd32(rs1,rs2,rd)		vis_dd2d(0x52, rs1, rs2, rd)
-#define vis_padd32s(rs1,rs2,rd)		vis_ss2s(0x53, rs1, rs2, rd)
-#define vis_psub16(rs1,rs2,rd)		vis_dd2d(0x54, rs1, rs2, rd)
-#define vis_psub16s(rs1,rs2,rd)		vis_ss2s(0x55, rs1, rs2, rd)
-#define vis_psub32(rs1,rs2,rd)		vis_dd2d(0x56, rs1, rs2, rd)
-#define vis_psub32s(rs1,rs2,rd)		vis_ss2s(0x57, rs1, rs2, rd)
+#define vis_padd16(rs1,rs2,rd)          vis_dd2d(0x50, rs1, rs2, rd)
+#define vis_padd16s(rs1,rs2,rd)         vis_ss2s(0x51, rs1, rs2, rd)
+#define vis_padd32(rs1,rs2,rd)          vis_dd2d(0x52, rs1, rs2, rd)
+#define vis_padd32s(rs1,rs2,rd)         vis_ss2s(0x53, rs1, rs2, rd)
+#define vis_psub16(rs1,rs2,rd)          vis_dd2d(0x54, rs1, rs2, rd)
+#define vis_psub16s(rs1,rs2,rd)         vis_ss2s(0x55, rs1, rs2, rd)
+#define vis_psub32(rs1,rs2,rd)          vis_dd2d(0x56, rs1, rs2, rd)
+#define vis_psub32s(rs1,rs2,rd)         vis_ss2s(0x57, rs1, rs2, rd)
 
 /* Pixel formatting instructions.  */
 
-#define vis_pack16(rs2,rd)		vis_d2s( 0x3b,      rs2, rd)
-#define vis_pack32(rs1,rs2,rd)		vis_dd2d(0x3a, rs1, rs2, rd)
-#define vis_packfix(rs2,rd)		vis_d2s( 0x3d,      rs2, rd)
-#define vis_expand(rs2,rd)		vis_s2d( 0x4d,      rs2, rd)
-#define vis_pmerge(rs1,rs2,rd)		vis_ss2d(0x4b, rs1, rs2, rd)
+#define vis_pack16(rs2,rd)              vis_d2s( 0x3b,      rs2, rd)
+#define vis_pack32(rs1,rs2,rd)          vis_dd2d(0x3a, rs1, rs2, rd)
+#define vis_packfix(rs2,rd)             vis_d2s( 0x3d,      rs2, rd)
+#define vis_expand(rs2,rd)              vis_s2d( 0x4d,      rs2, rd)
+#define vis_pmerge(rs1,rs2,rd)          vis_ss2d(0x4b, rs1, rs2, rd)
 
 /* Partitioned multiply instructions.  */
 
-#define vis_mul8x16(rs1,rs2,rd)		vis_sd2d(0x31, rs1, rs2, rd)
-#define vis_mul8x16au(rs1,rs2,rd)	vis_ss2d(0x33, rs1, rs2, rd)
-#define vis_mul8x16al(rs1,rs2,rd)	vis_ss2d(0x35, rs1, rs2, rd)
-#define vis_mul8sux16(rs1,rs2,rd)	vis_dd2d(0x36, rs1, rs2, rd)
-#define vis_mul8ulx16(rs1,rs2,rd)	vis_dd2d(0x37, rs1, rs2, rd)
-#define vis_muld8sux16(rs1,rs2,rd)	vis_ss2d(0x38, rs1, rs2, rd)
-#define vis_muld8ulx16(rs1,rs2,rd)	vis_ss2d(0x39, rs1, rs2, rd)
+#define vis_mul8x16(rs1,rs2,rd)         vis_sd2d(0x31, rs1, rs2, rd)
+#define vis_mul8x16au(rs1,rs2,rd)       vis_ss2d(0x33, rs1, rs2, rd)
+#define vis_mul8x16al(rs1,rs2,rd)       vis_ss2d(0x35, rs1, rs2, rd)
+#define vis_mul8sux16(rs1,rs2,rd)       vis_dd2d(0x36, rs1, rs2, rd)
+#define vis_mul8ulx16(rs1,rs2,rd)       vis_dd2d(0x37, rs1, rs2, rd)
+#define vis_muld8sux16(rs1,rs2,rd)      vis_ss2d(0x38, rs1, rs2, rd)
+#define vis_muld8ulx16(rs1,rs2,rd)      vis_ss2d(0x39, rs1, rs2, rd)
 
 /* Alignment instructions.  */
 
 static inline void *vis_alignaddr(void *_ptr)
 {
-	register void *ptr asm("g1");
+        register void *ptr asm("g1");
 
-	ptr = _ptr;
+        ptr = _ptr;
 
-	__asm__ __volatile__(".word %2"
-			     : "=&r" (ptr)
-			     : "0" (ptr),
-			       "i" (vis_opc_base | vis_opf(0x18) |
-				    vis_rs1_s(1) |
-				    vis_rs2_s(0) |
-				    vis_rd_s(1)));
+        __asm__ __volatile__(".word %2"
+                             : "=&r" (ptr)
+                             : "0" (ptr),
+                               "i" (vis_opc_base | vis_opf(0x18) |
+                                    vis_rs1_s(1) |
+                                    vis_rs2_s(0) |
+                                    vis_rd_s(1)));
 
-	return ptr;
+        return ptr;
 }
 
 static inline void vis_alignaddr_g0(void *_ptr)
 {
-	register void *ptr asm("g1");
+        register void *ptr asm("g1");
 
-	ptr = _ptr;
+        ptr = _ptr;
 
-	__asm__ __volatile__(".word %2"
-			     : "=&r" (ptr)
-			     : "0" (ptr),
-			       "i" (vis_opc_base | vis_opf(0x18) |
-				    vis_rs1_s(1) |
-				    vis_rs2_s(0) |
-				    vis_rd_s(0)));
+        __asm__ __volatile__(".word %2"
+                             : "=&r" (ptr)
+                             : "0" (ptr),
+                               "i" (vis_opc_base | vis_opf(0x18) |
+                                    vis_rs1_s(1) |
+                                    vis_rs2_s(0) |
+                                    vis_rd_s(0)));
 }
 
 static inline void *vis_alignaddrl(void *_ptr)
 {
-	register void *ptr asm("g1");
+        register void *ptr asm("g1");
 
-	ptr = _ptr;
+        ptr = _ptr;
 
-	__asm__ __volatile__(".word %2"
-			     : "=&r" (ptr)
-			     : "0" (ptr),
-			       "i" (vis_opc_base | vis_opf(0x19) |
-				    vis_rs1_s(1) |
-				    vis_rs2_s(0) |
-				    vis_rd_s(1)));
+        __asm__ __volatile__(".word %2"
+                             : "=&r" (ptr)
+                             : "0" (ptr),
+                               "i" (vis_opc_base | vis_opf(0x19) |
+                                    vis_rs1_s(1) |
+                                    vis_rs2_s(0) |
+                                    vis_rd_s(1)));
 
-	return ptr;
+        return ptr;
 }
 
 static inline void vis_alignaddrl_g0(void *_ptr)
 {
-	register void *ptr asm("g1");
+        register void *ptr asm("g1");
 
-	ptr = _ptr;
+        ptr = _ptr;
 
-	__asm__ __volatile__(".word %2"
-			     : "=&r" (ptr)
-			     : "0" (ptr),
-			       "i" (vis_opc_base | vis_opf(0x19) |
-				    vis_rs1_s(1) |
-				    vis_rs2_s(0) |
-				    vis_rd_s(0)));
+        __asm__ __volatile__(".word %2"
+                             : "=&r" (ptr)
+                             : "0" (ptr),
+                               "i" (vis_opc_base | vis_opf(0x19) |
+                                    vis_rs1_s(1) |
+                                    vis_rs2_s(0) |
+                                    vis_rd_s(0)));
 }
 
-#define vis_faligndata(rs1,rs2,rd)	vis_dd2d(0x48, rs1, rs2, rd)
+#define vis_faligndata(rs1,rs2,rd)        vis_dd2d(0x48, rs1, rs2, rd)
 
 /* Logical operate instructions.  */
 
-#define vis_fzero(rd)			vis_d(   0x60,           rd)
-#define vis_fzeros(rd)			vis_s(   0x61,           rd)
-#define vis_fone(rd)			vis_d(   0x7e,           rd)
-#define vis_fones(rd)			vis_s(   0x7f,           rd)
-#define vis_src1(rs1,rd)		vis_d12d(0x74, rs1,      rd)
-#define vis_src1s(rs1,rd)		vis_s12s(0x75, rs1,      rd)
-#define vis_src2(rs2,rd)		vis_d22d(0x78,      rs2, rd)
-#define vis_src2s(rs2,rd)		vis_s22s(0x79,      rs2, rd)
-#define vis_not1(rs1,rd)		vis_d12d(0x6a, rs1,      rd)
-#define vis_not1s(rs1,rd)		vis_s12s(0x6b, rs1,      rd)
-#define vis_not2(rs2,rd)		vis_d22d(0x66,      rs2, rd)
-#define vis_not2s(rs2,rd)		vis_s22s(0x67,      rs2, rd)
-#define vis_or(rs1,rs2,rd)		vis_dd2d(0x7c, rs1, rs2, rd)
-#define vis_ors(rs1,rs2,rd)		vis_ss2s(0x7d, rs1, rs2, rd)
-#define vis_nor(rs1,rs2,rd)		vis_dd2d(0x62, rs1, rs2, rd)
-#define vis_nors(rs1,rs2,rd)		vis_ss2s(0x63, rs1, rs2, rd)
-#define vis_and(rs1,rs2,rd)		vis_dd2d(0x70, rs1, rs2, rd)
-#define vis_ands(rs1,rs2,rd)		vis_ss2s(0x71, rs1, rs2, rd)
-#define vis_nand(rs1,rs2,rd)		vis_dd2d(0x6e, rs1, rs2, rd)
-#define vis_nands(rs1,rs2,rd)		vis_ss2s(0x6f, rs1, rs2, rd)
-#define vis_xor(rs1,rs2,rd)		vis_dd2d(0x6c, rs1, rs2, rd)
-#define vis_xors(rs1,rs2,rd)		vis_ss2s(0x6d, rs1, rs2, rd)
-#define vis_xnor(rs1,rs2,rd)		vis_dd2d(0x72, rs1, rs2, rd)
-#define vis_xnors(rs1,rs2,rd)		vis_ss2s(0x73, rs1, rs2, rd)
-#define vis_ornot1(rs1,rs2,rd)		vis_dd2d(0x7a, rs1, rs2, rd)
-#define vis_ornot1s(rs1,rs2,rd)		vis_ss2s(0x7b, rs1, rs2, rd)
-#define vis_ornot2(rs1,rs2,rd)		vis_dd2d(0x76, rs1, rs2, rd)
-#define vis_ornot2s(rs1,rs2,rd)		vis_ss2s(0x77, rs1, rs2, rd)
-#define vis_andnot1(rs1,rs2,rd)		vis_dd2d(0x68, rs1, rs2, rd)
-#define vis_andnot1s(rs1,rs2,rd)	vis_ss2s(0x69, rs1, rs2, rd)
-#define vis_andnot2(rs1,rs2,rd)		vis_dd2d(0x64, rs1, rs2, rd)
-#define vis_andnot2s(rs1,rs2,rd)	vis_ss2s(0x65, rs1, rs2, rd)
+#define vis_fzero(rd)                   vis_d(   0x60,           rd)
+#define vis_fzeros(rd)                  vis_s(   0x61,           rd)
+#define vis_fone(rd)                    vis_d(   0x7e,           rd)
+#define vis_fones(rd)                   vis_s(   0x7f,           rd)
+#define vis_src1(rs1,rd)                vis_d12d(0x74, rs1,      rd)
+#define vis_src1s(rs1,rd)               vis_s12s(0x75, rs1,      rd)
+#define vis_src2(rs2,rd)                vis_d22d(0x78,      rs2, rd)
+#define vis_src2s(rs2,rd)               vis_s22s(0x79,      rs2, rd)
+#define vis_not1(rs1,rd)                vis_d12d(0x6a, rs1,      rd)
+#define vis_not1s(rs1,rd)               vis_s12s(0x6b, rs1,      rd)
+#define vis_not2(rs2,rd)                vis_d22d(0x66,      rs2, rd)
+#define vis_not2s(rs2,rd)               vis_s22s(0x67,      rs2, rd)
+#define vis_or(rs1,rs2,rd)              vis_dd2d(0x7c, rs1, rs2, rd)
+#define vis_ors(rs1,rs2,rd)             vis_ss2s(0x7d, rs1, rs2, rd)
+#define vis_nor(rs1,rs2,rd)             vis_dd2d(0x62, rs1, rs2, rd)
+#define vis_nors(rs1,rs2,rd)            vis_ss2s(0x63, rs1, rs2, rd)
+#define vis_and(rs1,rs2,rd)             vis_dd2d(0x70, rs1, rs2, rd)
+#define vis_ands(rs1,rs2,rd)            vis_ss2s(0x71, rs1, rs2, rd)
+#define vis_nand(rs1,rs2,rd)            vis_dd2d(0x6e, rs1, rs2, rd)
+#define vis_nands(rs1,rs2,rd)           vis_ss2s(0x6f, rs1, rs2, rd)
+#define vis_xor(rs1,rs2,rd)             vis_dd2d(0x6c, rs1, rs2, rd)
+#define vis_xors(rs1,rs2,rd)            vis_ss2s(0x6d, rs1, rs2, rd)
+#define vis_xnor(rs1,rs2,rd)            vis_dd2d(0x72, rs1, rs2, rd)
+#define vis_xnors(rs1,rs2,rd)           vis_ss2s(0x73, rs1, rs2, rd)
+#define vis_ornot1(rs1,rs2,rd)          vis_dd2d(0x7a, rs1, rs2, rd)
+#define vis_ornot1s(rs1,rs2,rd)         vis_ss2s(0x7b, rs1, rs2, rd)
+#define vis_ornot2(rs1,rs2,rd)          vis_dd2d(0x76, rs1, rs2, rd)
+#define vis_ornot2s(rs1,rs2,rd)         vis_ss2s(0x77, rs1, rs2, rd)
+#define vis_andnot1(rs1,rs2,rd)         vis_dd2d(0x68, rs1, rs2, rd)
+#define vis_andnot1s(rs1,rs2,rd)        vis_ss2s(0x69, rs1, rs2, rd)
+#define vis_andnot2(rs1,rs2,rd)         vis_dd2d(0x64, rs1, rs2, rd)
+#define vis_andnot2s(rs1,rs2,rd)        vis_ss2s(0x65, rs1, rs2, rd)
 
 /* Pixel component distance.  */
 
-#define vis_pdist(rs1,rs2,rd)		vis_dd2d(0x3e, rs1, rs2, rd)
+#define vis_pdist(rs1,rs2,rd)           vis_dd2d(0x3e, rs1, rs2, rd)
diff --git a/src/libffmpeg/libavcodec/svq1.c b/src/libffmpeg/libavcodec/svq1.c
index b94472e34..98a7a3cd8 100644
--- a/src/libffmpeg/libavcodec/svq1.c
+++ b/src/libffmpeg/libavcodec/svq1.c
@@ -1,8 +1,8 @@
 /*
- * 
+ *
  * Copyright (C) 2002 the xine project
  * Copyright (C) 2002 the ffmpeg project
- * 
+ *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
@@ -15,7 +15,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * (SVQ1 Decoder)
  * Ported to mplayer by Arpi <arpi@thot.banki.hu>
@@ -57,10 +57,10 @@ static VLC svq1_inter_multistage[6];
 static VLC svq1_intra_mean;
 static VLC svq1_inter_mean;
 
-#define SVQ1_BLOCK_SKIP		0
-#define SVQ1_BLOCK_INTER	1
-#define SVQ1_BLOCK_INTER_4V	2
-#define SVQ1_BLOCK_INTRA	3
+#define SVQ1_BLOCK_SKIP         0
+#define SVQ1_BLOCK_INTER        1
+#define SVQ1_BLOCK_INTER_4V     2
+#define SVQ1_BLOCK_INTRA        3
 
 typedef struct SVQ1Context {
     MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independant of MpegEncContext, so this will be removed then (FIXME/XXX)
@@ -71,7 +71,7 @@ typedef struct SVQ1Context {
     AVFrame last_picture;
     PutBitContext pb;
     GetBitContext gb;
-    
+
     PutBitContext reorder_pb[6]; //why ooh why this sick breadth first order, everything is slower and more complex
 
     int frame_width;
@@ -84,7 +84,7 @@ typedef struct SVQ1Context {
     /* U & V plane (C planes) block dimensions */
     int c_block_width;
     int c_block_height;
-    
+
     uint16_t *mb_type;
     uint32_t *dummy;
     int16_t (*motion_val8[3])[2];
@@ -95,8 +95,8 @@ typedef struct SVQ1Context {
 
 /* motion vector (prediction) */
 typedef struct svq1_pmv_s {
-  int		 x;
-  int		 y;
+  int           x;
+  int           y;
 } svq1_pmv_t;
 
 #include "svq1_cb.h"
@@ -176,65 +176,65 @@ static const uint8_t string_table[256] = {
     for (; level > 0; i++) {\
       /* process next depth */\
       if (i == m) {\
-	m = n;\
-	if (--level == 0)\
-	  break;\
+        m = n;\
+        if (--level == 0)\
+          break;\
       }\
       /* divide block if next bit set */\
       if (get_bits (bitbuf, 1) == 0)\
-	break;\
+        break;\
       /* add child nodes */\
       list[n++] = list[i];\
       list[n++] = list[i] + (((level & 1) ? pitch : 1) << ((level / 2) + 1));\
     }
 
 #define SVQ1_ADD_CODEBOOK()\
-	  /* add codebook entries to vector */\
-	  for (j=0; j < stages; j++) {\
-	    n3  = codebook[entries[j]] ^ 0x80808080;\
-	    n1 += ((n3 & 0xFF00FF00) >> 8);\
-	    n2 +=  (n3 & 0x00FF00FF);\
-	  }\
+          /* add codebook entries to vector */\
+          for (j=0; j < stages; j++) {\
+            n3  = codebook[entries[j]] ^ 0x80808080;\
+            n1 += ((n3 & 0xFF00FF00) >> 8);\
+            n2 +=  (n3 & 0x00FF00FF);\
+          }\
 \
-	  /* clip to [0..255] */\
-	  if (n1 & 0xFF00FF00) {\
-	    n3  = ((( n1 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
-	    n1 += 0x7F007F00;\
-	    n1 |= (((~n1 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
-	    n1 &= (n3 & 0x00FF00FF);\
-	  }\
+          /* clip to [0..255] */\
+          if (n1 & 0xFF00FF00) {\
+            n3  = ((( n1 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
+            n1 += 0x7F007F00;\
+            n1 |= (((~n1 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
+            n1 &= (n3 & 0x00FF00FF);\
+          }\
 \
-	  if (n2 & 0xFF00FF00) {\
-	    n3  = ((( n2 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
-	    n2 += 0x7F007F00;\
-	    n2 |= (((~n2 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
-	    n2 &= (n3 & 0x00FF00FF);\
-	  }
+          if (n2 & 0xFF00FF00) {\
+            n3  = ((( n2 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
+            n2 += 0x7F007F00;\
+            n2 |= (((~n2 >> 15) & 0x00010001) | 0x01000100) - 0x00010001;\
+            n2 &= (n3 & 0x00FF00FF);\
+          }
 
 #define SVQ1_DO_CODEBOOK_INTRA()\
       for (y=0; y < height; y++) {\
-	for (x=0; x < (width / 4); x++, codebook++) {\
-	n1 = n4;\
-	n2 = n4;\
-	SVQ1_ADD_CODEBOOK()\
-	/* store result */\
-	dst[x] = (n1 << 8) | n2;\
-	}\
-	dst += (pitch / 4);\
+        for (x=0; x < (width / 4); x++, codebook++) {\
+        n1 = n4;\
+        n2 = n4;\
+        SVQ1_ADD_CODEBOOK()\
+        /* store result */\
+        dst[x] = (n1 << 8) | n2;\
+        }\
+        dst += (pitch / 4);\
       }
 
 #define SVQ1_DO_CODEBOOK_NONINTRA()\
       for (y=0; y < height; y++) {\
-	for (x=0; x < (width / 4); x++, codebook++) {\
-	n3 = dst[x];\
-	/* add mean value to vector */\
-	n1 = ((n3 & 0xFF00FF00) >> 8) + n4;\
-	n2 =  (n3 & 0x00FF00FF)	  + n4;\
-	SVQ1_ADD_CODEBOOK()\
-	/* store result */\
-	dst[x] = (n1 << 8) | n2;\
-	}\
-	dst += (pitch / 4);\
+        for (x=0; x < (width / 4); x++, codebook++) {\
+        n3 = dst[x];\
+        /* add mean value to vector */\
+        n1 = ((n3 & 0xFF00FF00) >> 8) + n4;\
+        n2 =  (n3 & 0x00FF00FF)          + n4;\
+        SVQ1_ADD_CODEBOOK()\
+        /* store result */\
+        dst[x] = (n1 << 8) | n2;\
+        }\
+        dst += (pitch / 4);\
       }
 
 #define SVQ1_CALC_CODEBOOK_ENTRIES(cbook)\
@@ -242,7 +242,7 @@ static const uint8_t string_table[256] = {
       bit_cache = get_bits (bitbuf, 4*stages);\
       /* calculate codebook entries for this vector */\
       for (j=0; j < stages; j++) {\
-	entries[j] = (((bit_cache >> (4*(stages - j - 1))) & 0xF) + 16*j) << (level + 1);\
+        entries[j] = (((bit_cache >> (4*(stages - j - 1))) & 0xF) + 16*j) << (level + 1);\
       }\
       mean -= (stages * 128);\
       n4    = ((mean + (mean >> 31)) << 16) | (mean & 0xFFFF);
@@ -252,9 +252,9 @@ static int svq1_decode_block_intra (GetBitContext *bitbuf, uint8_t *pixels, int
   uint8_t    *list[63];
   uint32_t   *dst;
   const uint32_t *codebook;
-  int	      entries[6];
-  int	      i, j, m, n;
-  int	      mean, stages;
+  int         entries[6];
+  int         i, j, m, n;
+  int         mean, stages;
   unsigned    x, y, width, height, level;
   uint32_t    n1, n2, n3, n4;
 
@@ -274,24 +274,24 @@ static int svq1_decode_block_intra (GetBitContext *bitbuf, uint8_t *pixels, int
     stages = get_vlc2(bitbuf, svq1_intra_multistage[level].table, 3, 3) - 1;
 
     if (stages == -1) {
-	for (y=0; y < height; y++) {
-	  memset (&dst[y*(pitch / 4)], 0, width);
-	}
-      continue;		/* skip vector */
+        for (y=0; y < height; y++) {
+          memset (&dst[y*(pitch / 4)], 0, width);
+        }
+      continue;                 /* skip vector */
     }
 
     if ((stages > 0) && (level >= 4)) {
 #ifdef DEBUG_SVQ1
     av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_intra): invalid vector: stages=%i level=%i\n",stages,level);
 #endif
-      return -1;	/* invalid vector */
+      return -1;        /* invalid vector */
     }
 
     mean = get_vlc2(bitbuf, svq1_intra_mean.table, 8, 3);
 
     if (stages == 0) {
       for (y=0; y < height; y++) {
-	memset (&dst[y*(pitch / 4)], mean, width);
+        memset (&dst[y*(pitch / 4)], mean, width);
       }
     } else {
       SVQ1_CALC_CODEBOOK_ENTRIES(svq1_intra_codebooks);
@@ -307,10 +307,10 @@ static int svq1_decode_block_non_intra (GetBitContext *bitbuf, uint8_t *pixels,
   uint8_t    *list[63];
   uint32_t   *dst;
   const uint32_t *codebook;
-  int	      entries[6];
-  int	      i, j, m, n;
-  int	      mean, stages;
-  int	      x, y, width, height, level;
+  int         entries[6];
+  int         i, j, m, n;
+  int         mean, stages;
+  int         x, y, width, height, level;
   uint32_t    n1, n2, n3, n4;
 
   /* initialize list for breadth first processing of vectors */
@@ -328,13 +328,13 @@ static int svq1_decode_block_non_intra (GetBitContext *bitbuf, uint8_t *pixels,
     /* get number of stages (-1 skips vector, 0 for mean only) */
     stages = get_vlc2(bitbuf, svq1_inter_multistage[level].table, 3, 2) - 1;
 
-    if (stages == -1) continue;	/* skip vector */
+    if (stages == -1) continue; /* skip vector */
 
     if ((stages > 0) && (level >= 4)) {
 #ifdef DEBUG_SVQ1
     av_log(s->avctx, AV_LOG_INFO, "Error (svq1_decode_block_non_intra): invalid vector: stages=%i level=%i\n",stages,level);
 #endif
-      return -1;	/* invalid vector */
+      return -1;        /* invalid vector */
     }
 
     mean = get_vlc2(bitbuf, svq1_inter_mean.table, 9, 3) - 256;
@@ -346,14 +346,14 @@ static int svq1_decode_block_non_intra (GetBitContext *bitbuf, uint8_t *pixels,
 }
 
 static int svq1_decode_motion_vector (GetBitContext *bitbuf, svq1_pmv_t *mv, svq1_pmv_t **pmv) {
-  int	      diff;
-  int	      i;
+  int        diff;
+  int        i;
 
   for (i=0; i < 2; i++) {
 
     /* get motion code */
     diff = get_vlc2(bitbuf, svq1_motion_component.table, 7, 2);
-    if(diff<0) 
+    if(diff<0)
         return -1;
     else if(diff){
         if(get_bits1(bitbuf)) diff= -diff;
@@ -372,7 +372,7 @@ static int svq1_decode_motion_vector (GetBitContext *bitbuf, svq1_pmv_t *mv, svq
 static void svq1_skip_block (uint8_t *current, uint8_t *previous, int pitch, int x, int y) {
   uint8_t *src;
   uint8_t *dst;
-  int	   i;
+  int      i;
 
   src = &previous[x + y*pitch];
   dst = current;
@@ -385,13 +385,13 @@ static void svq1_skip_block (uint8_t *current, uint8_t *previous, int pitch, int
 }
 
 static int svq1_motion_inter_block (MpegEncContext *s, GetBitContext *bitbuf,
-			       uint8_t *current, uint8_t *previous, int pitch,
-			       svq1_pmv_t *motion, int x, int y) {
+                               uint8_t *current, uint8_t *previous, int pitch,
+                               svq1_pmv_t *motion, int x, int y) {
   uint8_t    *src;
   uint8_t    *dst;
   svq1_pmv_t  mv;
   svq1_pmv_t *pmv[3];
-  int	      result;
+  int         result;
 
   /* predict and decode motion vector */
   pmv[0] = &motion[0];
@@ -409,13 +409,13 @@ static int svq1_motion_inter_block (MpegEncContext *s, GetBitContext *bitbuf,
   if (result != 0)
     return result;
 
-  motion[0].x		=
-  motion[(x / 8) + 2].x	=
-  motion[(x / 8) + 3].x	= mv.x;
-  motion[0].y		=
-  motion[(x / 8) + 2].y	=
-  motion[(x / 8) + 3].y	= mv.y;
-  
+  motion[0].x                =
+  motion[(x / 8) + 2].x      =
+  motion[(x / 8) + 3].x      = mv.x;
+  motion[0].y                =
+  motion[(x / 8) + 2].y      =
+  motion[(x / 8) + 3].y      = mv.y;
+
   if(y + (mv.y >> 1)<0)
      mv.y= 0;
   if(x + (mv.x >> 1)<0)
@@ -427,7 +427,7 @@ static int svq1_motion_inter_block (MpegEncContext *s, GetBitContext *bitbuf,
   if(x + (mv.x >> 1)<0 || y + (mv.y >> 1)<0 || x + (mv.x >> 1) + 16 > w || y + (mv.y >> 1) + 16> h)
       av_log(s->avctx, AV_LOG_INFO, "%d %d %d %d\n", x, y, x + (mv.x >> 1), y + (mv.y >> 1));
 #endif
- 
+
   src = &previous[(x + (mv.x >> 1)) + (y + (mv.y >> 1))*pitch];
   dst = current;
 
@@ -437,13 +437,13 @@ static int svq1_motion_inter_block (MpegEncContext *s, GetBitContext *bitbuf,
 }
 
 static int svq1_motion_inter_4v_block (MpegEncContext *s, GetBitContext *bitbuf,
-				  uint8_t *current, uint8_t *previous, int pitch,
-				  svq1_pmv_t *motion,int x, int y) {
+                                  uint8_t *current, uint8_t *previous, int pitch,
+                                  svq1_pmv_t *motion,int x, int y) {
   uint8_t    *src;
   uint8_t    *dst;
   svq1_pmv_t  mv;
   svq1_pmv_t *pmv[4];
-  int	      i, result;
+  int         i, result;
 
   /* predict and decode motion vector (0) */
   pmv[0] = &motion[0];
@@ -497,7 +497,7 @@ static int svq1_motion_inter_4v_block (MpegEncContext *s, GetBitContext *bitbuf,
   for (i=0; i < 4; i++) {
     int mvx= pmv[i]->x + (i&1)*16;
     int mvy= pmv[i]->y + (i>>1)*16;
-  
+
     ///XXX /FIXME cliping or padding?
     if(y + (mvy >> 1)<0)
        mvy= 0;
@@ -512,7 +512,7 @@ static int svq1_motion_inter_4v_block (MpegEncContext *s, GetBitContext *bitbuf,
 #endif
     src = &previous[(x + (mvx >> 1)) + (y + (mvy >> 1))*pitch];
     dst = current;
-    
+
     s->dsp.put_pixels_tab[1][((mvy & 1) << 1) | (mvx & 1)](dst,src,pitch,8);
 
     /* select next block */
@@ -527,18 +527,18 @@ static int svq1_motion_inter_4v_block (MpegEncContext *s, GetBitContext *bitbuf,
 }
 
 static int svq1_decode_delta_block (MpegEncContext *s, GetBitContext *bitbuf,
-			uint8_t *current, uint8_t *previous, int pitch,
-			svq1_pmv_t *motion, int x, int y) {
+                        uint8_t *current, uint8_t *previous, int pitch,
+                        svq1_pmv_t *motion, int x, int y) {
   uint32_t block_type;
-  int	   result = 0;
+  int      result = 0;
 
   /* get block type */
   block_type = get_vlc2(bitbuf, svq1_block_type.table, 2, 2);
 
   /* reset motion vectors */
   if (block_type == SVQ1_BLOCK_SKIP || block_type == SVQ1_BLOCK_INTRA) {
-    motion[0].x		  =
-    motion[0].y		  =
+    motion[0].x                 =
+    motion[0].y                 =
     motion[(x / 8) + 2].x =
     motion[(x / 8) + 2].y =
     motion[(x / 8) + 3].x =
@@ -639,9 +639,9 @@ static int svq1_decode_frame_header (GetBitContext *bitbuf,MpegEncContext *s) {
 
   /* frame type */
   s->pict_type= get_bits (bitbuf, 2)+1;
-  if(s->pict_type==4) 
+  if(s->pict_type==4)
       return -1;
-      
+
   if (s->pict_type == I_TYPE) {
 
     /* unknown fields */
@@ -702,18 +702,18 @@ static int svq1_decode_frame_header (GetBitContext *bitbuf,MpegEncContext *s) {
       skip_bits (bitbuf, 8);
     }
   }
-  
+
   return 0;
 }
 
-static int svq1_decode_frame(AVCodecContext *avctx, 
+static int svq1_decode_frame(AVCodecContext *avctx,
                              void *data, int *data_size,
                              uint8_t *buf, int buf_size)
 {
   MpegEncContext *s=avctx->priv_data;
-  uint8_t      *current, *previous;
-  int		result, i, x, y, width, height;
-  AVFrame *pict = data; 
+  uint8_t        *current, *previous;
+  int             result, i, x, y, width, height;
+  AVFrame *pict = data;
 
   /* initialize bit buffer */
   init_get_bits(&s->gb,buf,buf_size*8);
@@ -742,16 +742,16 @@ static int svq1_decode_frame(AVCodecContext *avctx,
 #endif
     return result;
   }
-  
+
   //FIXME this avoids some confusion for "B frames" without 2 references
   //this should be removed after libavcodec can handle more flexible picture types & ordering
   if(s->pict_type==B_TYPE && s->last_picture_ptr==NULL) return buf_size;
-  
+
   if(avctx->hurry_up && s->pict_type==B_TYPE) return buf_size;
   if(  (avctx->skip_frame >= AVDISCARD_NONREF && s->pict_type==B_TYPE)
      ||(avctx->skip_frame >= AVDISCARD_NONKEY && s->pict_type!=I_TYPE)
      || avctx->skip_frame >= AVDISCARD_ALL)
-      return buf_size;                            
+      return buf_size;
 
   if(MPV_frame_start(s, avctx) < 0)
       return -1;
@@ -781,17 +781,17 @@ static int svq1_decode_frame(AVCodecContext *avctx,
     if (s->pict_type == I_TYPE) {
       /* keyframe */
       for (y=0; y < height; y+=16) {
-	for (x=0; x < width; x+=16) {
-	  result = svq1_decode_block_intra (&s->gb, &current[x], linesize);
-	  if (result != 0)
-	  {
+        for (x=0; x < width; x+=16) {
+          result = svq1_decode_block_intra (&s->gb, &current[x], linesize);
+          if (result != 0)
+          {
 //#ifdef DEBUG_SVQ1
-	    av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_block %i (keyframe)\n",result);
+            av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_block %i (keyframe)\n",result);
 //#endif
-	    return result;
-	  }
-	}
-	current += 16*linesize;
+            return result;
+          }
+        }
+        current += 16*linesize;
       }
     } else {
       svq1_pmv_t pmv[width/8+3];
@@ -799,31 +799,31 @@ static int svq1_decode_frame(AVCodecContext *avctx,
       memset (pmv, 0, ((width / 8) + 3) * sizeof(svq1_pmv_t));
 
       for (y=0; y < height; y+=16) {
-	for (x=0; x < width; x+=16) {
-	  result = svq1_decode_delta_block (s, &s->gb, &current[x], previous,
-					    linesize, pmv, x, y);
-	  if (result != 0)
-	  {
+        for (x=0; x < width; x+=16) {
+          result = svq1_decode_delta_block (s, &s->gb, &current[x], previous,
+                                            linesize, pmv, x, y);
+          if (result != 0)
+          {
 #ifdef DEBUG_SVQ1
     av_log(s->avctx, AV_LOG_INFO, "Error in svq1_decode_delta_block %i\n",result);
 #endif
-	    return result;
-	  }
-	}
+            return result;
+          }
+        }
 
-	pmv[0].x =
-	pmv[0].y = 0;
+        pmv[0].x =
+        pmv[0].y = 0;
 
-	current += 16*linesize;
+        current += 16*linesize;
       }
     }
   }
-  
+
   *pict = *(AVFrame*)&s->current_picture;
 
 
   MPV_frame_end(s);
-  
+
   *data_size=sizeof(AVFrame);
   return buf_size;
 }
@@ -902,22 +902,22 @@ static void svq1_write_header(SVQ1Context *s, int frame_type)
         /* output 5 unknown bits (2 + 2 + 1) */
         put_bits(&s->pb, 5, 0);
 
-	for (i = 0; i < 7; i++)
-	{
-	    if ((svq1_frame_size_table[i].width == s->frame_width) &&
-		(svq1_frame_size_table[i].height == s->frame_height))
-	    {
-		put_bits(&s->pb, 3, i);
-		break;
-	    }
-	}
-	
-	if (i == 7)
-	{
-	    put_bits(&s->pb, 3, 7);
-    	    put_bits(&s->pb, 12, s->frame_width);
-    	    put_bits(&s->pb, 12, s->frame_height);
-	}
+        for (i = 0; i < 7; i++)
+        {
+            if ((svq1_frame_size_table[i].width == s->frame_width) &&
+                (svq1_frame_size_table[i].height == s->frame_height))
+            {
+                put_bits(&s->pb, 3, i);
+                break;
+            }
+        }
+
+        if (i == 7)
+        {
+            put_bits(&s->pb, 3, 7);
+                put_bits(&s->pb, 12, s->frame_width);
+                put_bits(&s->pb, 12, s->frame_height);
+        }
     }
 
     /* no checksum or extra data (next 2 bits get 0) */
@@ -984,14 +984,14 @@ static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *dec
             int best_vector_sum=-999, best_vector_mean=-999;
             const int stage= count-1;
             const int8_t *vector;
-    
+
             for(i=0; i<16; i++){
                 int sum= codebook_sum[stage*16 + i];
                 int sqr=0;
                 int diff, mean, score;
-    
+
                 vector = codebook + stage*size*16 + i*size;
-    
+
                 for(j=0; j<size; j++){
                     int v= vector[j];
                     sqr += (v - block[stage][j])*(v - block[stage][j]);
@@ -1015,11 +1015,11 @@ static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *dec
                 block[stage+1][j] = block[stage][j] - vector[j];
             }
             block_sum[stage+1]= block_sum[stage] - best_vector_sum;
-            best_vector_score += 
+            best_vector_score +=
                 lambda*(+ 1 + 4*count
                         + multistage_vlc[1+count][1]
                         + mean_vlc[best_vector_mean][1]);
-    
+
             if(best_vector_score < best_score){
                 best_score= best_vector_score;
                 best_count= count;
@@ -1027,7 +1027,7 @@ static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *dec
             }
         }
     }
-    
+
     split=0;
     if(best_score > threshold && level){
         int score=0;
@@ -1040,7 +1040,7 @@ static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *dec
         score += encode_block(s, src         , ref         , decoded         , stride, level-1, threshold>>1, lambda, intra);
         score += encode_block(s, src + offset, ref + offset, decoded + offset, stride, level-1, threshold>>1, lambda, intra);
         score += lambda;
-        
+
         if(score < best_score){
             best_score= score;
             split=1;
@@ -1058,9 +1058,9 @@ static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *dec
         assert(best_mean >= -256 && best_mean<256);
         assert(best_count >=0 && best_count<7);
         assert(level<4 || best_count==0);
-            
+
         /* output the encoding */
-        put_bits(&s->reorder_pb[level], 
+        put_bits(&s->reorder_pb[level],
             multistage_vlc[1 + best_count][1],
             multistage_vlc[1 + best_count][0]);
         put_bits(&s->reorder_pb[level], mean_vlc[best_mean][1],
@@ -1070,7 +1070,7 @@ static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *dec
             assert(best_vector[i]>=0 && best_vector[i]<16);
             put_bits(&s->reorder_pb[level], 4, best_vector[i]);
         }
-        
+
         for(y=0; y<h; y++){
             for(x=0; x<w; x++){
                 decoded[x + y*stride]= src[x + y*stride] - block[best_count][x + w*y] + best_mean;
@@ -1107,8 +1107,8 @@ static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane
         s->m.last_picture_ptr   = &s->m.last_picture;
         s->m.last_picture.data[0]= ref_plane;
         s->m.linesize=
-        s->m.last_picture.linesize[0]= 
-        s->m.new_picture.linesize[0]= 
+        s->m.last_picture.linesize[0]=
+        s->m.new_picture.linesize[0]=
         s->m.current_picture.linesize[0]= stride;
         s->m.width= width;
         s->m.height= height;
@@ -1123,37 +1123,37 @@ static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane
         s->m.flags= s->avctx->flags;
 //        s->m.out_format = FMT_H263;
 //        s->m.unrestricted_mv= 1;
-        
+
         s->m.lambda= s->picture.quality;
         s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
         s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
-        
+
         if(!s->motion_val8[plane]){
             s->motion_val8 [plane]= av_mallocz((s->m.b8_stride*block_height*2 + 2)*2*sizeof(int16_t));
             s->motion_val16[plane]= av_mallocz((s->m.mb_stride*(block_height + 2) + 1)*2*sizeof(int16_t));
         }
 
         s->m.mb_type= s->mb_type;
-        
+
         //dummies, to avoid segfaults
         s->m.current_picture.mb_mean=   (uint8_t *)s->dummy;
         s->m.current_picture.mb_var=    (uint16_t*)s->dummy;
         s->m.current_picture.mc_mb_var= (uint16_t*)s->dummy;
         s->m.current_picture.mb_type= s->dummy;
-        
+
         s->m.current_picture.motion_val[0]= s->motion_val8[plane] + 2;
         s->m.p_mv_table= s->motion_val16[plane] + s->m.mb_stride + 1;
         s->m.dsp= s->dsp; //move
         ff_init_me(&s->m);
-    
+
         s->m.me.dia_size= s->avctx->dia_size;
         s->m.first_slice_line=1;
         for (y = 0; y < block_height; y++) {
             uint8_t src[stride*16];
-            
+
             s->m.new_picture.data[0]= src - y*16*stride; //ugly
             s->m.mb_y= y;
-    
+
             for(i=0; i<16 && i + 16*y<height; i++){
                 memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
                 for(x=width; x<16*block_width; x++)
@@ -1161,25 +1161,25 @@ static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane
             }
             for(; i<16 && i + 16*y<16*block_height; i++)
                 memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width);
-    
+
             for (x = 0; x < block_width; x++) {
                 s->m.mb_x= x;
                 ff_init_block_index(&s->m);
                 ff_update_block_index(&s->m);
-                
+
                 ff_estimate_p_frame_motion(&s->m, x, y);
             }
             s->m.first_slice_line=0;
         }
-    
+
         ff_fix_long_p_mvs(&s->m);
         ff_fix_long_mvs(&s->m, NULL, 0, s->m.p_mv_table, s->m.f_code, CANDIDATE_MB_TYPE_INTER, 0);
     }
-        
+
     s->m.first_slice_line=1;
     for (y = 0; y < block_height; y++) {
         uint8_t src[stride*16];
-        
+
         for(i=0; i<16 && i + 16*y<height; i++){
             memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width);
             for(x=width; x<16*block_width; x++)
@@ -1197,7 +1197,7 @@ static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane
             uint8_t *ref= ref_plane + offset;
             int score[4]={0,0,0,0}, best;
             uint8_t temp[16*stride];
-            
+
             if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 3000){ //FIXME check size
                 av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
                 return -1;
@@ -1206,7 +1206,7 @@ static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane
             s->m.mb_x= x;
             ff_init_block_index(&s->m);
             ff_update_block_index(&s->m);
-            
+
             if(s->picture.pict_type == I_TYPE || (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTRA)){
                 for(i=0; i<6; i++){
                     init_put_bits(&s->reorder_pb[i], reorder_buffer[0][i], 7*32);
@@ -1223,9 +1223,9 @@ static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane
                 }
             }else
                 score[0]= INT_MAX;
-            
+
             best=0;
-            
+
             if(s->picture.pict_type == P_TYPE){
                 const uint8_t *vlc= svq1_block_type_vlc[SVQ1_BLOCK_INTER];
                 int mx, my, pred_x, pred_y, dxy;
@@ -1237,8 +1237,8 @@ static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane
                         init_put_bits(&s->reorder_pb[i], reorder_buffer[1][i], 7*32);
 
                     put_bits(&s->reorder_pb[5], vlc[1], vlc[0]);
-    
-                    s->m.pb= s->reorder_pb[5];                
+
+                    s->m.pb= s->reorder_pb[5];
                     mx= motion_ptr[0];
                     my= motion_ptr[1];
                     assert(mx>=-32 && mx<=31);
@@ -1249,11 +1249,11 @@ static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane
                     ff_h263_encode_motion(&s->m, my - pred_y, 1);
                     s->reorder_pb[5]= s->m.pb;
                     score[1] += lambda*put_bits_count(&s->reorder_pb[5]);
-    
+
                     dxy= (mx&1) + 2*(my&1);
-                    
+
                     s->dsp.put_pixels_tab[0][dxy](temp+16, ref + (mx>>1) + stride*(my>>1), stride, 16);
-                    
+
                     score[1]+= encode_block(s, src+16*x, temp+16, decoded, stride, 5, 64, lambda, 0);
                     best= score[1] <= score[0];
 
@@ -1282,7 +1282,7 @@ static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane
                     motion_ptr[2+2*s->m.b8_stride] = motion_ptr[3+2*s->m.b8_stride]=0;
                 }
             }
-                
+
             s->rd_total += score[best];
 
             for(i=5; i>=0; i--){
@@ -1315,17 +1315,17 @@ static int svq1_encode_init(AVCodecContext *avctx)
 
     s->avctx= avctx;
     s->m.avctx= avctx;
-    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); 
+    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
     s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
     s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
     s->mb_type        = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int16_t));
     s->dummy          = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int32_t));
     h263_encode_init(&s->m); //mv_penalty
-    
+
     return 0;
 }
 
-static int svq1_encode_frame(AVCodecContext *avctx, unsigned char *buf, 
+static int svq1_encode_frame(AVCodecContext *avctx, unsigned char *buf,
     int buf_size, void *data)
 {
     SVQ1Context * const s = avctx->priv_data;
@@ -1338,16 +1338,16 @@ static int svq1_encode_frame(AVCodecContext *avctx, unsigned char *buf,
         av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n");
         return -1;
     }
-    
+
     if(!s->current_picture.data[0]){
         avctx->get_buffer(avctx, &s->current_picture);
         avctx->get_buffer(avctx, &s->last_picture);
     }
-    
+
     temp= s->current_picture;
     s->current_picture= s->last_picture;
     s->last_picture= temp;
-    
+
     init_put_bits(&s->pb, buf, buf_size);
 
     *p = *pict;
@@ -1358,7 +1358,7 @@ static int svq1_encode_frame(AVCodecContext *avctx, unsigned char *buf,
     for(i=0; i<3; i++){
         if(svq1_encode_plane(s, i,
             s->picture.data[i], s->last_picture.data[i], s->current_picture.data[i],
-            s->frame_width / (i?4:1), s->frame_height / (i?4:1), 
+            s->frame_width / (i?4:1), s->frame_height / (i?4:1),
             s->picture.linesize[i], s->current_picture.linesize[i]) < 0)
                 return -1;
     }
@@ -1366,7 +1366,7 @@ static int svq1_encode_frame(AVCodecContext *avctx, unsigned char *buf,
 //    align_put_bits(&s->pb);
     while(put_bits_count(&s->pb) & 31)
         put_bits(&s->pb, 1, 0);
-        
+
     flush_put_bits(&s->pb);
 
     return (put_bits_count(&s->pb) / 8);
@@ -1378,8 +1378,8 @@ static int svq1_encode_end(AVCodecContext *avctx)
     int i;
 
     av_log(avctx, AV_LOG_DEBUG, "RD: %f\n", s->rd_total/(double)(avctx->width*avctx->height*avctx->frame_number));
-    
-    av_freep(&s->m.me.scratchpad);     
+
+    av_freep(&s->m.me.scratchpad);
     av_freep(&s->m.me.map);
     av_freep(&s->m.me.score_map);
     av_freep(&s->mb_type);
diff --git a/src/libffmpeg/libavcodec/svq1_cb.h b/src/libffmpeg/libavcodec/svq1_cb.h
index c6735fe8e..5c98c8047 100644
--- a/src/libffmpeg/libavcodec/svq1_cb.h
+++ b/src/libffmpeg/libavcodec/svq1_cb.h
@@ -1,8 +1,8 @@
 /*
- * 
+ *
  * Copyright (C) 2002 the xine project
  * Copyright (C) 2002 the ffmpeg project
- * 
+ *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
  * License as published by the Free Software Foundation; either
@@ -15,7 +15,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  * Ported to mplayer by Arpi <arpi@thot.banki.hu>
  * Ported to libavcodec by Nick Kurshev <nickols_k@mail.ru>
@@ -26,7 +26,7 @@
  * @file svq1_cb.h
  * svq1 code books.
  */
- 
+
 /* 6x16-entry codebook for inter-coded 4x2 vectors */
 static const int8_t svq1_inter_codebook_4x2[768] = {
     7,  2, -6, -7,  7,  3, -3, -4, -7, -2,  7,  8, -8, -4,  3,  4,
diff --git a/src/libffmpeg/libavcodec/svq1_vlc.h b/src/libffmpeg/libavcodec/svq1_vlc.h
index 8a30acb26..4d405334d 100644
--- a/src/libffmpeg/libavcodec/svq1_vlc.h
+++ b/src/libffmpeg/libavcodec/svq1_vlc.h
@@ -5,7 +5,7 @@
 static const uint8_t svq1_block_type_vlc[4][2] = {
  /* { code, length } */
     { 0x1, 1 },  { 0x1, 2 },  { 0x1, 3 },  { 0x0, 3 }
-    
+
 };
 
 /* values in this table range from -1..6; adjust retrieved value by -1 */
diff --git a/src/libffmpeg/libavcodec/svq3.c b/src/libffmpeg/libavcodec/svq3.c
index f0f995acf..cfe7f7d22 100644
--- a/src/libffmpeg/libavcodec/svq3.c
+++ b/src/libffmpeg/libavcodec/svq3.c
@@ -13,7 +13,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  *
  * How to use this decoder:
@@ -26,10 +26,10 @@
  * the calling app must make the SVQ3 ImageDescription atom available
  * via the AVCodecContext's extradata[_size] field:
  *
- * AVCodecContext.extradata = pointer to ImageDescription, first characters 
+ * AVCodecContext.extradata = pointer to ImageDescription, first characters
  * are expected to be 'S', 'V', 'Q', and '3', NOT the 4-byte atom length
- * AVCodecContext.extradata_size = size of ImageDescription atom memory 
- * buffer (which will be the same as the ImageDescription atom size field 
+ * AVCodecContext.extradata_size = size of ImageDescription atom memory
+ * buffer (which will be the same as the ImageDescription atom size field
  * from the QT file, minus 4 bytes since the length is missing)
  *
  * You will know you have these parameters passed correctly when the decoder
@@ -37,24 +37,24 @@
  *  ftp://ftp.mplayerhq.hu/MPlayer/samples/V-codecs/SVQ3/Vertical400kbit.sorenson3.mov
  *
  */
- 
+
 /**
  * @file svq3.c
  * svq3 decoder.
  */
 
-#define FULLPEL_MODE  1 
-#define HALFPEL_MODE  2 
+#define FULLPEL_MODE  1
+#define HALFPEL_MODE  2
 #define THIRDPEL_MODE 3
 #define PREDICT_MODE  4
- 
+
 /* dual scan (from some older h264 draft)
  o-->o-->o   o
          |  /|
  o   o   o / o
  | / |   |/  |
  o   o   o   o
-   / 
+   /
  o-->o-->o-->o
 */
 static const uint8_t svq3_scan[16]={
@@ -179,8 +179,8 @@ static void svq3_add_idct_c (uint8_t *dst, DCTELEM *block, int stride, int qp, i
 }
 
 static void pred4x4_down_left_svq3_c(uint8_t *src, uint8_t *topright, int stride){
-    LOAD_TOP_EDGE    
-    LOAD_LEFT_EDGE    
+    LOAD_TOP_EDGE
+    LOAD_LEFT_EDGE
     const __attribute__((unused)) int unu0= t0;
     const __attribute__((unused)) int unu1= l0;
 
@@ -207,7 +207,7 @@ static void pred16x16_plane_svq3_c(uint8_t *src, int stride){
 }
 
 static inline int svq3_decode_block (GetBitContext *gb, DCTELEM *block,
-				     int index, const int type) {
+                                     int index, const int type) {
 
   static const uint8_t *const scan_patterns[4] =
   { luma_dc_zigzag_scan, zigzag_scan, svq3_scan, chroma_dc_scan };
@@ -220,37 +220,37 @@ static inline int svq3_decode_block (GetBitContext *gb, DCTELEM *block,
     for (; (vlc = svq3_get_ue_golomb (gb)) != 0; index++) {
 
       if (vlc == INVALID_VLC)
-	return -1;
+        return -1;
 
       sign = (vlc & 0x1) - 1;
       vlc  = (vlc + 1) >> 1;
 
       if (type == 3) {
-	if (vlc < 3) {
-	  run   = 0;
-	  level = vlc;
-	} else if (vlc < 4) {
-	  run   = 1;
-	  level = 1;
-	} else {
-	  run   = (vlc & 0x3);
-	  level = ((vlc + 9) >> 2) - run;
-	}
+        if (vlc < 3) {
+          run   = 0;
+          level = vlc;
+        } else if (vlc < 4) {
+          run   = 1;
+          level = 1;
+        } else {
+          run   = (vlc & 0x3);
+          level = ((vlc + 9) >> 2) - run;
+        }
       } else {
-	if (vlc < 16) {
-	  run   = svq3_dct_tables[intra][vlc].run;
-	  level = svq3_dct_tables[intra][vlc].level;
-	} else if (intra) {
-	  run   = (vlc & 0x7);
-	  level = (vlc >> 3) + ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
-	} else {
-	  run   = (vlc & 0xF);
-	  level = (vlc >> 4) + ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
-	}
+        if (vlc < 16) {
+          run   = svq3_dct_tables[intra][vlc].run;
+          level = svq3_dct_tables[intra][vlc].level;
+        } else if (intra) {
+          run   = (vlc & 0x7);
+          level = (vlc >> 3) + ((run == 0) ? 8 : ((run < 2) ? 2 : ((run < 5) ? 0 : -1)));
+        } else {
+          run   = (vlc & 0xF);
+          level = (vlc >> 4) + ((run == 0) ? 4 : ((run < 3) ? 2 : ((run < 10) ? 1 : 0)));
+        }
       }
 
       if ((index += run) >= limit)
-	return -1;
+        return -1;
 
       block[scan[index]] = (level ^ sign) - sign;
     }
@@ -264,9 +264,9 @@ static inline int svq3_decode_block (GetBitContext *gb, DCTELEM *block,
 }
 
 static inline void svq3_mc_dir_part (MpegEncContext *s,
-				     int x, int y, int width, int height,
-				     int mx, int my, int dxy,
-				     int thirdpel, int dir, int avg) {
+                                     int x, int y, int width, int height,
+                                     int mx, int my, int dxy,
+                                     int thirdpel, int dir, int avg) {
 
   const Picture *pic = (dir == 0) ? &s->last_picture : &s->next_picture;
   uint8_t *src, *dest;
@@ -275,7 +275,7 @@ static inline void svq3_mc_dir_part (MpegEncContext *s,
 
   mx += x;
   my += y;
-  
+
   if (mx < 0 || mx >= (s->h_edge_pos - width  - 1) ||
       my < 0 || my >= (s->v_edge_pos - height - 1)) {
 
@@ -293,7 +293,7 @@ static inline void svq3_mc_dir_part (MpegEncContext *s,
 
   if (emu) {
     ff_emulated_edge_mc (s->edge_emu_buffer, src, s->linesize, (width + 1), (height + 1),
-			 mx, my, s->h_edge_pos, s->v_edge_pos);
+                         mx, my, s->h_edge_pos, s->v_edge_pos);
     src = s->edge_emu_buffer;
   }
   if(thirdpel)
@@ -302,8 +302,8 @@ static inline void svq3_mc_dir_part (MpegEncContext *s,
     (avg ? s->dsp.avg_pixels_tab : s->dsp.put_pixels_tab)[blocksize][dxy](dest, src, s->linesize, height);
 
   if (!(s->flags & CODEC_FLAG_GRAY)) {
-    mx	   = (mx + (mx < (int) x)) >> 1;
-    my	   = (my + (my < (int) y)) >> 1;
+    mx     = (mx + (mx < (int) x)) >> 1;
+    my     = (my + (my < (int) y)) >> 1;
     width  = (width  >> 1);
     height = (height >> 1);
     blocksize++;
@@ -314,7 +314,7 @@ static inline void svq3_mc_dir_part (MpegEncContext *s,
 
       if (emu) {
         ff_emulated_edge_mc (s->edge_emu_buffer, src, s->uvlinesize, (width + 1), (height + 1),
-			     mx, my, (s->h_edge_pos >> 1), (s->v_edge_pos >> 1));
+                             mx, my, (s->h_edge_pos >> 1), (s->v_edge_pos >> 1));
         src = s->edge_emu_buffer;
       }
       if(thirdpel)
@@ -344,18 +344,18 @@ static inline int svq3_mc_dir (H264Context *h, int size, int mode, int dir, int
       k = ((j>>2)&1) + ((i>>1)&2) + ((j>>1)&4) + (i&8);
 
       if (mode != PREDICT_MODE) {
-	pred_motion (h, k, (part_width >> 2), dir, 1, &mx, &my);
+        pred_motion (h, k, (part_width >> 2), dir, 1, &mx, &my);
       } else {
-	mx = s->next_picture.motion_val[0][b_xy][0]<<1;
-	my = s->next_picture.motion_val[0][b_xy][1]<<1;
-
-	if (dir == 0) {
-	  mx = ((mx * h->frame_num_offset) / h->prev_frame_num_offset + 1)>>1;
-	  my = ((my * h->frame_num_offset) / h->prev_frame_num_offset + 1)>>1;
-	} else {
-	  mx = ((mx * (h->frame_num_offset - h->prev_frame_num_offset)) / h->prev_frame_num_offset + 1)>>1;
-	  my = ((my * (h->frame_num_offset - h->prev_frame_num_offset)) / h->prev_frame_num_offset + 1)>>1;
-	}
+        mx = s->next_picture.motion_val[0][b_xy][0]<<1;
+        my = s->next_picture.motion_val[0][b_xy][1]<<1;
+
+        if (dir == 0) {
+          mx = ((mx * h->frame_num_offset) / h->prev_frame_num_offset + 1)>>1;
+          my = ((my * h->frame_num_offset) / h->prev_frame_num_offset + 1)>>1;
+        } else {
+          mx = ((mx * (h->frame_num_offset - h->prev_frame_num_offset)) / h->prev_frame_num_offset + 1)>>1;
+          my = ((my * (h->frame_num_offset - h->prev_frame_num_offset)) / h->prev_frame_num_offset + 1)>>1;
+        }
       }
 
       /* clip motion vector prediction to frame border */
@@ -364,63 +364,63 @@ static inline int svq3_mc_dir (H264Context *h, int size, int mode, int dir, int
 
       /* get (optional) motion vector differential */
       if (mode == PREDICT_MODE) {
-	dx = dy = 0;
+        dx = dy = 0;
       } else {
-	dy = svq3_get_se_golomb (&s->gb);
-	dx = svq3_get_se_golomb (&s->gb);
+        dy = svq3_get_se_golomb (&s->gb);
+        dx = svq3_get_se_golomb (&s->gb);
 
-	if (dx == INVALID_VLC || dy == INVALID_VLC) {
+        if (dx == INVALID_VLC || dy == INVALID_VLC) {
           av_log(h->s.avctx, AV_LOG_ERROR, "invalid MV vlc\n");
-	  return -1;
-	}
+          return -1;
+        }
       }
 
       /* compute motion vector */
       if (mode == THIRDPEL_MODE) {
-	int fx, fy;
-	mx = ((mx + 1)>>1) + dx;
-	my = ((my + 1)>>1) + dy;
-	fx= ((unsigned)(mx + 0x3000))/3 - 0x1000;
-	fy= ((unsigned)(my + 0x3000))/3 - 0x1000;
-	dxy= (mx - 3*fx) + 4*(my - 3*fy);
-
-	svq3_mc_dir_part (s, x, y, part_width, part_height, fx, fy, dxy, 1, dir, avg);
-	mx += mx;
-	my += my;
+        int fx, fy;
+        mx = ((mx + 1)>>1) + dx;
+        my = ((my + 1)>>1) + dy;
+        fx= ((unsigned)(mx + 0x3000))/3 - 0x1000;
+        fy= ((unsigned)(my + 0x3000))/3 - 0x1000;
+        dxy= (mx - 3*fx) + 4*(my - 3*fy);
+
+        svq3_mc_dir_part (s, x, y, part_width, part_height, fx, fy, dxy, 1, dir, avg);
+        mx += mx;
+        my += my;
       } else if (mode == HALFPEL_MODE || mode == PREDICT_MODE) {
-	mx = ((unsigned)(mx + 1 + 0x3000))/3 + dx - 0x1000;
-	my = ((unsigned)(my + 1 + 0x3000))/3 + dy - 0x1000;
-	dxy= (mx&1) + 2*(my&1);
+        mx = ((unsigned)(mx + 1 + 0x3000))/3 + dx - 0x1000;
+        my = ((unsigned)(my + 1 + 0x3000))/3 + dy - 0x1000;
+        dxy= (mx&1) + 2*(my&1);
 
-	svq3_mc_dir_part (s, x, y, part_width, part_height, mx>>1, my>>1, dxy, 0, dir, avg);
-	mx *= 3;
-	my *= 3;
+        svq3_mc_dir_part (s, x, y, part_width, part_height, mx>>1, my>>1, dxy, 0, dir, avg);
+        mx *= 3;
+        my *= 3;
       } else {
-	mx = ((unsigned)(mx + 3 + 0x6000))/6 + dx - 0x1000;
-	my = ((unsigned)(my + 3 + 0x6000))/6 + dy - 0x1000;
+        mx = ((unsigned)(mx + 3 + 0x6000))/6 + dx - 0x1000;
+        my = ((unsigned)(my + 3 + 0x6000))/6 + dy - 0x1000;
 
-	svq3_mc_dir_part (s, x, y, part_width, part_height, mx, my, 0, 0, dir, avg);
-	mx *= 6;
-	my *= 6;
+        svq3_mc_dir_part (s, x, y, part_width, part_height, mx, my, 0, 0, dir, avg);
+        mx *= 6;
+        my *= 6;
       }
 
       /* update mv_cache */
       if (mode != PREDICT_MODE) {
-	int32_t mv = pack16to32(mx,my);
-
-	if (part_height == 8 && i < 8) {
-	  *(int32_t *) h->mv_cache[dir][scan8[k] + 1*8] = mv;
-
-	  if (part_width == 8 && j < 8) {
-	    *(int32_t *) h->mv_cache[dir][scan8[k] + 1 + 1*8] = mv;
-	  }
-	}
-	if (part_width == 8 && j < 8) {
-	  *(int32_t *) h->mv_cache[dir][scan8[k] + 1] = mv;
-	}
-	if (part_width == 4 || part_height == 4) {
-	  *(int32_t *) h->mv_cache[dir][scan8[k]] = mv;
-	}
+        int32_t mv = pack16to32(mx,my);
+
+        if (part_height == 8 && i < 8) {
+          *(int32_t *) h->mv_cache[dir][scan8[k] + 1*8] = mv;
+
+          if (part_width == 8 && j < 8) {
+            *(int32_t *) h->mv_cache[dir][scan8[k] + 1 + 1*8] = mv;
+          }
+        }
+        if (part_width == 8 && j < 8) {
+          *(int32_t *) h->mv_cache[dir][scan8[k] + 1] = mv;
+        }
+        if (part_width == 4 || part_height == 4) {
+          *(int32_t *) h->mv_cache[dir][scan8[k]] = mv;
+        }
       }
 
       /* write back motion vectors */
@@ -440,16 +440,16 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
   const int mb_xy = s->mb_x + s->mb_y*s->mb_stride;
   const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
 
-  h->top_samples_available	= (s->mb_y == 0) ? 0x33FF : 0xFFFF;
-  h->left_samples_available	= (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
-  h->topright_samples_available	= 0xFFFF;
+  h->top_samples_available        = (s->mb_y == 0) ? 0x33FF : 0xFFFF;
+  h->left_samples_available        = (s->mb_x == 0) ? 0x5F5F : 0xFFFF;
+  h->topright_samples_available        = 0xFFFF;
 
-  if (mb_type == 0) {		/* SKIP */
+  if (mb_type == 0) {           /* SKIP */
     if (s->pict_type == P_TYPE || s->next_picture.mb_type[mb_xy] == -1) {
       svq3_mc_dir_part (s, 16*s->mb_x, 16*s->mb_y, 16, 16, 0, 0, 0, 0, 0, 0);
 
       if (s->pict_type == B_TYPE) {
-	svq3_mc_dir_part (s, 16*s->mb_x, 16*s->mb_y, 16, 16, 0, 0, 0, 0, 1, 1);
+        svq3_mc_dir_part (s, 16*s->mb_x, 16*s->mb_y, 16, 16, 0, 0, 0, 0, 1, 1);
       }
 
       mb_type = MB_TYPE_SKIP;
@@ -462,7 +462,7 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
 
       mb_type = MB_TYPE_16x16;
     }
-  } else if (mb_type < 8) {	/* INTER */
+  } else if (mb_type < 8) {     /* INTER */
     if (h->thirdpel_flag && h->halfpel_flag == !get_bits (&s->gb, 1)) {
       mode = THIRDPEL_MODE;
     } else if (h->halfpel_flag && h->thirdpel_flag == !get_bits (&s->gb, 1)) {
@@ -480,110 +480,110 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
         N??11111
         N
     */
-    
+
     for (m=0; m < 2; m++) {
       if (s->mb_x > 0 && h->intra4x4_pred_mode[mb_xy - 1][0] != -1) {
-	for (i=0; i < 4; i++) {
-	  *(uint32_t *) h->mv_cache[m][scan8[0] - 1 + i*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - 1 + i*h->b_stride];
-	}
+        for (i=0; i < 4; i++) {
+          *(uint32_t *) h->mv_cache[m][scan8[0] - 1 + i*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - 1 + i*h->b_stride];
+        }
       } else {
-	for (i=0; i < 4; i++) {
-	  *(uint32_t *) h->mv_cache[m][scan8[0] - 1 + i*8] = 0;
-	}
+        for (i=0; i < 4; i++) {
+          *(uint32_t *) h->mv_cache[m][scan8[0] - 1 + i*8] = 0;
+        }
       }
       if (s->mb_y > 0) {
-	memcpy (h->mv_cache[m][scan8[0] - 1*8], s->current_picture.motion_val[m][b_xy - h->b_stride], 4*2*sizeof(int16_t));
-	memset (&h->ref_cache[m][scan8[0] - 1*8], (h->intra4x4_pred_mode[mb_xy - s->mb_stride][4] == -1) ? PART_NOT_AVAILABLE : 1, 4);
-
-	if (s->mb_x < (s->mb_width - 1)) {
-	  *(uint32_t *) h->mv_cache[m][scan8[0] + 4 - 1*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - h->b_stride + 4];
-	  h->ref_cache[m][scan8[0] + 4 - 1*8] =
-		  (h->intra4x4_pred_mode[mb_xy - s->mb_stride + 1][0] == -1 ||
-		   h->intra4x4_pred_mode[mb_xy - s->mb_stride][4] == -1) ? PART_NOT_AVAILABLE : 1;
-	}else
-	  h->ref_cache[m][scan8[0] + 4 - 1*8] = PART_NOT_AVAILABLE;
-	if (s->mb_x > 0) {
-	  *(uint32_t *) h->mv_cache[m][scan8[0] - 1 - 1*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - h->b_stride - 1];
-	  h->ref_cache[m][scan8[0] - 1 - 1*8] = (h->intra4x4_pred_mode[mb_xy - s->mb_stride - 1][3] == -1) ? PART_NOT_AVAILABLE : 1;
-	}else
-	  h->ref_cache[m][scan8[0] - 1 - 1*8] = PART_NOT_AVAILABLE;
+        memcpy (h->mv_cache[m][scan8[0] - 1*8], s->current_picture.motion_val[m][b_xy - h->b_stride], 4*2*sizeof(int16_t));
+        memset (&h->ref_cache[m][scan8[0] - 1*8], (h->intra4x4_pred_mode[mb_xy - s->mb_stride][4] == -1) ? PART_NOT_AVAILABLE : 1, 4);
+
+        if (s->mb_x < (s->mb_width - 1)) {
+          *(uint32_t *) h->mv_cache[m][scan8[0] + 4 - 1*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - h->b_stride + 4];
+          h->ref_cache[m][scan8[0] + 4 - 1*8] =
+                  (h->intra4x4_pred_mode[mb_xy - s->mb_stride + 1][0] == -1 ||
+                   h->intra4x4_pred_mode[mb_xy - s->mb_stride][4] == -1) ? PART_NOT_AVAILABLE : 1;
+        }else
+          h->ref_cache[m][scan8[0] + 4 - 1*8] = PART_NOT_AVAILABLE;
+        if (s->mb_x > 0) {
+          *(uint32_t *) h->mv_cache[m][scan8[0] - 1 - 1*8] = *(uint32_t *) s->current_picture.motion_val[m][b_xy - h->b_stride - 1];
+          h->ref_cache[m][scan8[0] - 1 - 1*8] = (h->intra4x4_pred_mode[mb_xy - s->mb_stride - 1][3] == -1) ? PART_NOT_AVAILABLE : 1;
+        }else
+          h->ref_cache[m][scan8[0] - 1 - 1*8] = PART_NOT_AVAILABLE;
       }else
-	memset (&h->ref_cache[m][scan8[0] - 1*8 - 1], PART_NOT_AVAILABLE, 8);
+        memset (&h->ref_cache[m][scan8[0] - 1*8 - 1], PART_NOT_AVAILABLE, 8);
 
       if (s->pict_type != B_TYPE)
-	break;
+        break;
     }
 
     /* decode motion vector(s) and form prediction(s) */
     if (s->pict_type == P_TYPE) {
       if(svq3_mc_dir (h, (mb_type - 1), mode, 0, 0) < 0)
         return -1;
-    } else {	/* B_TYPE */
+    } else {        /* B_TYPE */
       if (mb_type != 2) {
-	if(svq3_mc_dir (h, 0, mode, 0, 0) < 0)
+        if(svq3_mc_dir (h, 0, mode, 0, 0) < 0)
           return -1;
       } else {
-	for (i=0; i < 4; i++) {
-	  memset (s->current_picture.motion_val[0][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
-	}
+        for (i=0; i < 4; i++) {
+          memset (s->current_picture.motion_val[0][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
+        }
       }
       if (mb_type != 1) {
-	if(svq3_mc_dir (h, 0, mode, 1, (mb_type == 3)) < 0)
+        if(svq3_mc_dir (h, 0, mode, 1, (mb_type == 3)) < 0)
           return -1;
       } else {
-	for (i=0; i < 4; i++) {
-	  memset (s->current_picture.motion_val[1][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
-	}
+        for (i=0; i < 4; i++) {
+          memset (s->current_picture.motion_val[1][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
+        }
       }
     }
 
     mb_type = MB_TYPE_16x16;
-  } else if (mb_type == 8 || mb_type == 33) {	/* INTRA4x4 */
+  } else if (mb_type == 8 || mb_type == 33) {   /* INTRA4x4 */
     memset (h->intra4x4_pred_mode_cache, -1, 8*5*sizeof(int8_t));
 
     if (mb_type == 8) {
       if (s->mb_x > 0) {
-	for (i=0; i < 4; i++) {
-	  h->intra4x4_pred_mode_cache[scan8[0] - 1 + i*8] = h->intra4x4_pred_mode[mb_xy - 1][i];
-	}
-	if (h->intra4x4_pred_mode_cache[scan8[0] - 1] == -1) {
-	  h->left_samples_available = 0x5F5F;
-	}
+        for (i=0; i < 4; i++) {
+          h->intra4x4_pred_mode_cache[scan8[0] - 1 + i*8] = h->intra4x4_pred_mode[mb_xy - 1][i];
+        }
+        if (h->intra4x4_pred_mode_cache[scan8[0] - 1] == -1) {
+          h->left_samples_available = 0x5F5F;
+        }
       }
       if (s->mb_y > 0) {
-	h->intra4x4_pred_mode_cache[4+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][4];
-	h->intra4x4_pred_mode_cache[5+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][5];
-	h->intra4x4_pred_mode_cache[6+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][6];
-	h->intra4x4_pred_mode_cache[7+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][3];
-
-	if (h->intra4x4_pred_mode_cache[4+8*0] == -1) {
-	  h->top_samples_available = 0x33FF;
-	}
+        h->intra4x4_pred_mode_cache[4+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][4];
+        h->intra4x4_pred_mode_cache[5+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][5];
+        h->intra4x4_pred_mode_cache[6+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][6];
+        h->intra4x4_pred_mode_cache[7+8*0] = h->intra4x4_pred_mode[mb_xy - s->mb_stride][3];
+
+        if (h->intra4x4_pred_mode_cache[4+8*0] == -1) {
+          h->top_samples_available = 0x33FF;
+        }
       }
 
       /* decode prediction codes for luma blocks */
       for (i=0; i < 16; i+=2) {
-	vlc = svq3_get_ue_golomb (&s->gb);
+        vlc = svq3_get_ue_golomb (&s->gb);
 
-	if (vlc >= 25){
+        if (vlc >= 25){
           av_log(h->s.avctx, AV_LOG_ERROR, "luma prediction:%d\n", vlc);
-	  return -1;
+          return -1;
         }
 
-	left	= &h->intra4x4_pred_mode_cache[scan8[i] - 1];
-	top	= &h->intra4x4_pred_mode_cache[scan8[i] - 8];
+        left    = &h->intra4x4_pred_mode_cache[scan8[i] - 1];
+        top     = &h->intra4x4_pred_mode_cache[scan8[i] - 8];
 
-	left[1]	= svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
-	left[2]	= svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
+        left[1] = svq3_pred_1[top[0] + 1][left[0] + 1][svq3_pred_0[vlc][0]];
+        left[2] = svq3_pred_1[top[1] + 1][left[1] + 1][svq3_pred_0[vlc][1]];
 
-	if (left[1] == -1 || left[2] == -1){
+        if (left[1] == -1 || left[2] == -1){
           av_log(h->s.avctx, AV_LOG_ERROR, "weird prediction\n");
-	  return -1;
+          return -1;
         }
       }
-    } else {	/* mb_type == 33, DC_128_PRED block type */
+    } else {    /* mb_type == 33, DC_128_PRED block type */
       for (i=0; i < 4; i++) {
-	memset (&h->intra4x4_pred_mode_cache[scan8[0] + 8*i], DC_PRED, 4);
+        memset (&h->intra4x4_pred_mode_cache[scan8[0] + 8*i], DC_PRED, 4);
       }
     }
 
@@ -604,7 +604,7 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
     }
 
     mb_type = MB_TYPE_INTRA4x4;
-  } else {			/* INTRA16x16 */
+  } else {                      /* INTRA16x16 */
     dir = i_mb_type_info[mb_type - 8].pred_mode;
     dir = (dir >> 1) ^ 3*(dir & 1) ^ 1;
 
@@ -623,7 +623,7 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
     }
     if (s->pict_type == B_TYPE) {
       for (i=0; i < 4; i++) {
-	memset (s->current_picture.motion_val[1][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
+        memset (s->current_picture.motion_val[1][b_xy + i*h->b_stride], 0, 4*2*sizeof(int16_t));
       }
     }
   }
@@ -664,35 +664,35 @@ static int svq3_decode_mb (H264Context *h, unsigned int mb_type) {
 
     for (i=0; i < 4; i++) {
       if ((cbp & (1 << i))) {
-	for (j=0; j < 4; j++) {
-	  k = index ? ((j&1) + 2*(i&1) + 2*(j&2) + 4*(i&2)) : (4*i + j);
-	  h->non_zero_count_cache[ scan8[k] ] = 1;
+        for (j=0; j < 4; j++) {
+          k = index ? ((j&1) + 2*(i&1) + 2*(j&2) + 4*(i&2)) : (4*i + j);
+          h->non_zero_count_cache[ scan8[k] ] = 1;
 
-	  if (svq3_decode_block (&s->gb, &h->mb[16*k], index, type)){
+          if (svq3_decode_block (&s->gb, &h->mb[16*k], index, type)){
             av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding block\n");
-	    return -1;
+            return -1;
           }
-	}
+        }
       }
     }
 
     if ((cbp & 0x30)) {
       for (i=0; i < 2; ++i) {
-	if (svq3_decode_block (&s->gb, &h->mb[16*(16 + 4*i)], 0, 3)){
+        if (svq3_decode_block (&s->gb, &h->mb[16*(16 + 4*i)], 0, 3)){
           av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma dc block\n");
-	  return -1;
+          return -1;
         }
       }
 
       if ((cbp & 0x20)) {
-	for (i=0; i < 8; i++) {
-	  h->non_zero_count_cache[ scan8[16+i] ] = 1;
+        for (i=0; i < 8; i++) {
+          h->non_zero_count_cache[ scan8[16+i] ] = 1;
 
-	  if (svq3_decode_block (&s->gb, &h->mb[16*(16 + i)], 1, 1)){
+          if (svq3_decode_block (&s->gb, &h->mb[16*(16 + i)], 1, 1)){
             av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding chroma ac block\n");
-	    return -1;
+            return -1;
           }
-	}
+        }
       }
     }
   }
@@ -786,8 +786,8 @@ static int svq3_decode_slice_header (H264Context *h) {
 }
 
 static int svq3_decode_frame (AVCodecContext *avctx,
-			      void *data, int *data_size,
-			      uint8_t *buf, int buf_size) {
+                              void *data, int *data_size,
+                              uint8_t *buf, int buf_size) {
   MpegEncContext *const s = avctx->priv_data;
   H264Context *const h = avctx->priv_data;
   int m, mb_type;
@@ -829,12 +829,12 @@ static int svq3_decode_frame (AVCodecContext *avctx,
       GetBitContext gb;
 
       size = BE_32(&extradata[4]);
-      init_get_bits (&gb, extradata + 8, size);
+      init_get_bits (&gb, extradata + 8, size*8);
 
       /* 'frame size code' and optional 'width, height' */
       if (get_bits (&gb, 3) == 7) {
-	get_bits (&gb, 12);
-	get_bits (&gb, 12);
+        get_bits (&gb, 12);
+        get_bits (&gb, 12);
       }
 
       h->halfpel_flag = get_bits1 (&gb);
@@ -852,7 +852,7 @@ static int svq3_decode_frame (AVCodecContext *avctx,
       get_bits1 (&gb);
 
       while (get_bits1 (&gb)) {
-	get_bits (&gb, 8);
+        get_bits (&gb, 8);
       }
 
       h->unknown_svq3_flag = get_bits1 (&gb);
@@ -880,7 +880,7 @@ static int svq3_decode_frame (AVCodecContext *avctx,
   s->picture_number = h->slice_num;
 
   if(avctx->debug&FF_DEBUG_PICT_INFO){
-      av_log(h->s.avctx, AV_LOG_DEBUG, "%c hpel:%d, tpel:%d aqp:%d qp:%d\n", 
+      av_log(h->s.avctx, AV_LOG_DEBUG, "%c hpel:%d, tpel:%d aqp:%d qp:%d\n",
       av_get_pict_type_char(s->pict_type), h->halfpel_flag, h->thirdpel_flag,
       s->adaptive_quant, s->qscale
       );
@@ -935,45 +935,45 @@ static int svq3_decode_frame (AVCodecContext *avctx,
     for(i=0; i<4; i++){
       int j;
       for(j=-1; j<4; j++)
-	h->ref_cache[m][scan8[0] + 8*i + j]= 1;
+        h->ref_cache[m][scan8[0] + 8*i + j]= 1;
       h->ref_cache[m][scan8[0] + 8*i + j]= PART_NOT_AVAILABLE;
     }
   }
-  
+
   for (s->mb_y=0; s->mb_y < s->mb_height; s->mb_y++) {
     for (s->mb_x=0; s->mb_x < s->mb_width; s->mb_x++) {
 
       if ( (get_bits_count(&s->gb) + 7) >= s->gb.size_in_bits &&
-	  ((get_bits_count(&s->gb) & 7) == 0 || show_bits (&s->gb, (-get_bits_count(&s->gb) & 7)) == 0)) {
+          ((get_bits_count(&s->gb) & 7) == 0 || show_bits (&s->gb, (-get_bits_count(&s->gb) & 7)) == 0)) {
 
-	skip_bits(&s->gb, h->next_slice_index - get_bits_count(&s->gb));
-	s->gb.size_in_bits = 8*buf_size;
+        skip_bits(&s->gb, h->next_slice_index - get_bits_count(&s->gb));
+        s->gb.size_in_bits = 8*buf_size;
 
-	if (svq3_decode_slice_header (h))
-	  return -1;
+        if (svq3_decode_slice_header (h))
+          return -1;
 
-	/* TODO: support s->mb_skip_run */
+        /* TODO: support s->mb_skip_run */
       }
 
       mb_type = svq3_get_ue_golomb (&s->gb);
 
       if (s->pict_type == I_TYPE) {
-	mb_type += 8;
+        mb_type += 8;
       } else if (s->pict_type == B_TYPE && mb_type >= 4) {
-	mb_type += 4;
+        mb_type += 4;
       }
       if (mb_type > 33 || svq3_decode_mb (h, mb_type)) {
-	av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
-	return -1;
+        av_log(h->s.avctx, AV_LOG_ERROR, "error while decoding MB %d %d\n", s->mb_x, s->mb_y);
+        return -1;
       }
 
       if (mb_type != 0) {
-	hl_decode_mb (h);
+        hl_decode_mb (h);
       }
 
       if (s->pict_type != B_TYPE && !s->low_delay) {
-	s->current_picture.mb_type[s->mb_x + s->mb_y*s->mb_stride] =
-			(s->pict_type == P_TYPE && mb_type < 8) ? (mb_type - 1) : -1;
+        s->current_picture.mb_type[s->mb_x + s->mb_y*s->mb_stride] =
+                        (s->pict_type == P_TYPE && mb_type < 8) ? (mb_type - 1) : -1;
       }
     }
 
diff --git a/src/libffmpeg/libavcodec/truemotion1.c b/src/libffmpeg/libavcodec/truemotion1.c
index b382e2cfa..d2c9efbf8 100644
--- a/src/libffmpeg/libavcodec/truemotion1.c
+++ b/src/libffmpeg/libavcodec/truemotion1.c
@@ -14,12 +14,12 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file truemotion1.c
- * Duck TrueMotion v1 Video Decoder by 
+ * Duck TrueMotion v1 Video Decoder by
  * Alex Beregszaszi (alex@fsn.hu) and
  * Mike Melanson (melanson@pcisys.net)
  *
@@ -53,12 +53,12 @@ typedef struct TrueMotion1Context {
 
     int flags;
     int x, y, w, h;
-    
+
     uint32_t y_predictor_table[1024];
     uint32_t c_predictor_table[1024];
     uint32_t fat_y_predictor_table[1024];
     uint32_t fat_c_predictor_table[1024];
-    
+
     int compression;
     int block_type;
     int block_width;
@@ -68,7 +68,7 @@ typedef struct TrueMotion1Context {
     int16_t cdt[8];
     int16_t fat_ydt[8];
     int16_t fat_cdt[8];
-    
+
     int last_deltaset, last_vectable;
 
     unsigned int *vert_pred;
@@ -171,7 +171,7 @@ static int make_ydt15_entry(int p1, int p2, int16_t *ydt)
 #endif
 {
     int lo, hi;
-    
+
     lo = ydt[p1];
     lo += (lo << 5) + (lo << 10);
     hi = ydt[p2];
@@ -186,7 +186,7 @@ static int make_cdt15_entry(int p1, int p2, int16_t *cdt)
 #endif
 {
     int r, b, lo;
-    
+
     b = cdt[p2];
     r = cdt[p1] << 10;
     lo = b + r;
@@ -200,7 +200,7 @@ static int make_ydt16_entry(int p1, int p2, int16_t *ydt)
 #endif
 {
     int lo, hi;
-    
+
     lo = ydt[p1];
     lo += (lo << 6) + (lo << 11);
     hi = ydt[p2];
@@ -215,7 +215,7 @@ static int make_cdt16_entry(int p1, int p2, int16_t *cdt)
 #endif
 {
     int r, b, lo;
-    
+
     b = cdt[p2];
     r = cdt[p1] << 11;
     lo = b + r;
@@ -229,10 +229,10 @@ static int make_ydt24_entry(int p1, int p2, int16_t *ydt)
 #endif
 {
     int lo, hi;
-    
+
     lo = ydt[p1];
     hi = ydt[p2];
-    return ((lo + (hi << 8)) << 1);
+    return ((lo + (hi << 8) + (hi << 16)) << 1);
 }
 
 #ifdef WORDS_BIGENDIAN
@@ -242,7 +242,7 @@ static int make_cdt24_entry(int p1, int p2, int16_t *cdt)
 #endif
 {
     int r, b;
-    
+
     b = cdt[p2];
     r = cdt[p1]<<16;
     return ((b+r) << 1);
@@ -252,16 +252,16 @@ static void gen_vector_table15(TrueMotion1Context *s, const uint8_t *sel_vector_
 {
     int len, i, j;
     unsigned char delta_pair;
-    
+
     for (i = 0; i < 1024; i += 4)
     {
         len = *sel_vector_table++ / 2;
         for (j = 0; j < len; j++)
         {
             delta_pair = *sel_vector_table++;
-            s->y_predictor_table[i+j] = 0xfffffffe & 
+            s->y_predictor_table[i+j] = 0xfffffffe &
                 make_ydt15_entry(delta_pair >> 4, delta_pair & 0xf, s->ydt);
-            s->c_predictor_table[i+j] = 0xfffffffe & 
+            s->c_predictor_table[i+j] = 0xfffffffe &
                 make_cdt15_entry(delta_pair >> 4, delta_pair & 0xf, s->cdt);
         }
         s->y_predictor_table[i+(j-1)] |= 1;
@@ -273,16 +273,16 @@ static void gen_vector_table16(TrueMotion1Context *s, const uint8_t *sel_vector_
 {
     int len, i, j;
     unsigned char delta_pair;
-    
+
     for (i = 0; i < 1024; i += 4)
     {
         len = *sel_vector_table++ / 2;
         for (j = 0; j < len; j++)
         {
             delta_pair = *sel_vector_table++;
-            s->y_predictor_table[i+j] = 0xfffffffe & 
+            s->y_predictor_table[i+j] = 0xfffffffe &
                 make_ydt16_entry(delta_pair >> 4, delta_pair & 0xf, s->ydt);
-            s->c_predictor_table[i+j] = 0xfffffffe & 
+            s->c_predictor_table[i+j] = 0xfffffffe &
                 make_cdt16_entry(delta_pair >> 4, delta_pair & 0xf, s->cdt);
         }
         s->y_predictor_table[i+(j-1)] |= 1;
@@ -294,20 +294,20 @@ static void gen_vector_table24(TrueMotion1Context *s, const uint8_t *sel_vector_
 {
     int len, i, j;
     unsigned char delta_pair;
-    
+
     for (i = 0; i < 1024; i += 4)
     {
         len = *sel_vector_table++ / 2;
         for (j = 0; j < len; j++)
         {
             delta_pair = *sel_vector_table++;
-            s->y_predictor_table[i+j] = 0xfffffffe & 
+            s->y_predictor_table[i+j] = 0xfffffffe &
                 make_ydt24_entry(delta_pair >> 4, delta_pair & 0xf, s->ydt);
-            s->c_predictor_table[i+j] = 0xfffffffe & 
+            s->c_predictor_table[i+j] = 0xfffffffe &
                 make_cdt24_entry(delta_pair >> 4, delta_pair & 0xf, s->cdt);
-            s->fat_y_predictor_table[i+j] = 0xfffffffe & 
+            s->fat_y_predictor_table[i+j] = 0xfffffffe &
                 make_ydt24_entry(delta_pair >> 4, delta_pair & 0xf, s->fat_ydt);
-            s->fat_c_predictor_table[i+j] = 0xfffffffe & 
+            s->fat_c_predictor_table[i+j] = 0xfffffffe &
                 make_cdt24_entry(delta_pair >> 4, delta_pair & 0xf, s->fat_cdt);
         }
         s->y_predictor_table[i+(j-1)] |= 1;
@@ -318,7 +318,7 @@ static void gen_vector_table24(TrueMotion1Context *s, const uint8_t *sel_vector_
 }
 
 /* Returns the number of bytes consumed from the bytestream. Returns -1 if
- * there was an error while decoding the header */ 
+ * there was an error while decoding the header */
 static int truemotion1_decode_header(TrueMotion1Context *s)
 {
     int i;
@@ -334,14 +334,14 @@ static int truemotion1_decode_header(TrueMotion1Context *s)
     header.header_size = ((s->buf[0] >> 5) | (s->buf[0] << 3)) & 0x7f;
     if (s->buf[0] < 0x10)
     {
-	av_log(s->avctx, AV_LOG_ERROR, "invalid header size (%d)\n", s->buf[0]);
+        av_log(s->avctx, AV_LOG_ERROR, "invalid header size (%d)\n", s->buf[0]);
         return -1;
     }
 
     /* unscramble the header bytes with a XOR operation */
     memset(header_buffer, 0, 128);
     for (i = 1; i < header.header_size; i++)
-	header_buffer[i - 1] = s->buf[i] ^ s->buf[i + 1];
+        header_buffer[i - 1] = s->buf[i] ^ s->buf[i + 1];
 
     header.compression = header_buffer[0];
     header.deltaset = header_buffer[1];
@@ -369,9 +369,9 @@ static int truemotion1_decode_header(TrueMotion1Context *s)
             s->flags = FLAG_KEYFRAME;
     } else /* Version 1 */
         s->flags = FLAG_KEYFRAME;
-    
+
     if (s->flags & FLAG_SPRITE) {
-	av_log(s->avctx, AV_LOG_INFO, "SPRITE frame found, please report the sample to the developers\n");
+        av_log(s->avctx, AV_LOG_INFO, "SPRITE frame found, please report the sample to the developers\n");
         s->w = header.width;
         s->h = header.height;
         s->x = header.xoffset;
@@ -381,10 +381,10 @@ static int truemotion1_decode_header(TrueMotion1Context *s)
         s->h = header.ysize;
         if (header.header_type < 2) {
             if ((s->w < 213) && (s->h >= 176))
-	    {
+            {
                 s->flags |= FLAG_INTERPOLATED;
-	        av_log(s->avctx, AV_LOG_INFO, "INTERPOLATION selected, please report the sample to the developers\n");
-	    }
+                av_log(s->avctx, AV_LOG_INFO, "INTERPOLATION selected, please report the sample to the developers\n");
+            }
         }
     }
 
@@ -392,8 +392,8 @@ static int truemotion1_decode_header(TrueMotion1Context *s)
         av_log(s->avctx, AV_LOG_ERROR, "invalid compression type (%d)\n", header.compression);
         return -1;
     }
-    
-    if ((header.deltaset != s->last_deltaset) || 
+
+    if ((header.deltaset != s->last_deltaset) ||
         (header.vectable != s->last_vectable))
         select_delta_tables(s, header.deltaset);
 
@@ -407,21 +407,21 @@ static int truemotion1_decode_header(TrueMotion1Context *s)
             return -1;
         }
     }
-    
+
     // FIXME: where to place this ?!?!
     if (compression_types[header.compression].algorithm == ALGO_RGB24H)
-        s->avctx->pix_fmt = PIX_FMT_BGR24;
+        s->avctx->pix_fmt = PIX_FMT_RGBA32;
     else
-	s->avctx->pix_fmt = PIX_FMT_RGB555; // RGB565 is supported aswell
+        s->avctx->pix_fmt = PIX_FMT_RGB555; // RGB565 is supported aswell
 
     if ((header.deltaset != s->last_deltaset) || (header.vectable != s->last_vectable))
     {
         if (compression_types[header.compression].algorithm == ALGO_RGB24H)
             gen_vector_table24(s, sel_vector_table);
         else
-	if (s->avctx->pix_fmt == PIX_FMT_RGB555)
+        if (s->avctx->pix_fmt == PIX_FMT_RGB555)
             gen_vector_table15(s, sel_vector_table);
-	else
+        else
             gen_vector_table16(s, sel_vector_table);
     }
 
@@ -432,7 +432,7 @@ static int truemotion1_decode_header(TrueMotion1Context *s)
         s->index_stream = s->mb_change_bits;
     } else {
         /* one change bit per 4x4 block */
-        s->index_stream = s->mb_change_bits + 
+        s->index_stream = s->mb_change_bits +
             (s->mb_change_bits_row_size * (s->avctx->height >> 2));
     }
     s->index_stream_size = s->size - (s->index_stream - s->buf);
@@ -445,15 +445,15 @@ static int truemotion1_decode_header(TrueMotion1Context *s)
     s->block_type = compression_types[header.compression].block_type;
 
     if (s->avctx->debug & FF_DEBUG_PICT_INFO)
-	av_log(s->avctx, AV_LOG_INFO, "tables: %d / %d c:%d %dx%d t:%d %s%s%s%s\n",
-	    s->last_deltaset, s->last_vectable, s->compression, s->block_width,
-	    s->block_height, s->block_type,
-	    s->flags & FLAG_KEYFRAME ? " KEY" : "",
-	    s->flags & FLAG_INTERFRAME ? " INTER" : "",
-	    s->flags & FLAG_SPRITE ? " SPRITE" : "",
-	    s->flags & FLAG_INTERPOLATED ? " INTERPOL" : "");
-
-    return header.header_size;    
+        av_log(s->avctx, AV_LOG_INFO, "tables: %d / %d c:%d %dx%d t:%d %s%s%s%s\n",
+            s->last_deltaset, s->last_vectable, s->compression, s->block_width,
+            s->block_height, s->block_type,
+            s->flags & FLAG_KEYFRAME ? " KEY" : "",
+            s->flags & FLAG_INTERFRAME ? " INTER" : "",
+            s->flags & FLAG_SPRITE ? " SPRITE" : "",
+            s->flags & FLAG_INTERPOLATED ? " INTERPOL" : "");
+
+    return header.header_size;
 }
 
 static int truemotion1_decode_init(AVCodecContext *avctx)
@@ -464,17 +464,17 @@ static int truemotion1_decode_init(AVCodecContext *avctx)
 
     // FIXME: it may change ?
 //    if (avctx->bits_per_sample == 24)
-//	avctx->pix_fmt = PIX_FMT_RGB24;
+//        avctx->pix_fmt = PIX_FMT_RGB24;
 //    else
-//	avctx->pix_fmt = PIX_FMT_RGB555;
+//        avctx->pix_fmt = PIX_FMT_RGB555;
 
     avctx->has_b_frames = 0;
     s->frame.data[0] = s->prev_frame.data[0] = NULL;
 
     /* there is a vertical predictor for each pixel in a line; each vertical
      * predictor is 0 to start with */
-    s->vert_pred = 
-        (unsigned int *)av_malloc(s->avctx->width * sizeof(unsigned short));
+    s->vert_pred =
+        (unsigned int *)av_malloc(s->avctx->width * sizeof(unsigned int));
 
     return 0;
 }
@@ -533,21 +533,20 @@ hres,vres,i,i%vres (0 < i < 4)
 
 #define APPLY_C_PREDICTOR_24() \
     predictor_pair = s->c_predictor_table[index]; \
-    c_horiz_pred += (predictor_pair >> 1); \
+    horiz_pred += (predictor_pair >> 1); \
     if (predictor_pair & 1) { \
         GET_NEXT_INDEX() \
         if (!index) { \
             GET_NEXT_INDEX() \
             predictor_pair = s->fat_c_predictor_table[index]; \
-            c_horiz_pred += (predictor_pair >> 1); \
+            horiz_pred += (predictor_pair >> 1); \
             if (predictor_pair & 1) \
                 GET_NEXT_INDEX() \
             else \
                 index++; \
         } \
     } else \
-        index++; 
-//    c_last+coff = clast+c_horiz_pred;
+        index++;
 
 
 #define APPLY_Y_PREDICTOR() \
@@ -613,7 +612,7 @@ static void truemotion1_decode_16bit(TrueMotion1Context *s)
     int index;
 
     /* clean out the line buffer */
-    memset(s->vert_pred, 0, s->avctx->width * sizeof(unsigned short));
+    memset(s->vert_pred, 0, s->avctx->width * sizeof(unsigned int));
 
     GET_NEXT_INDEX();
 
@@ -635,7 +634,7 @@ static void truemotion1_decode_16bit(TrueMotion1Context *s)
 
                 switch (y & 3) {
                 case 0:
-                    /* if macroblock width is 2, apply C-Y-C-Y; else 
+                    /* if macroblock width is 2, apply C-Y-C-Y; else
                      * apply C-Y-Y */
                     if (s->block_width == 2) {
                         APPLY_C_PREDICTOR();
@@ -663,7 +662,7 @@ static void truemotion1_decode_16bit(TrueMotion1Context *s)
                     break;
 
                 case 2:
-                    /* this iteration might be C-Y-C-Y, Y-Y, or C-Y-Y 
+                    /* this iteration might be C-Y-C-Y, Y-Y, or C-Y-Y
                      * depending on the macroblock type */
                     if (s->block_type == BLOCK_2x2) {
                         APPLY_C_PREDICTOR();
@@ -689,14 +688,14 @@ static void truemotion1_decode_16bit(TrueMotion1Context *s)
 
             } else {
 
-                /* skip (copy) four pixels, but reassign the horizontal 
+                /* skip (copy) four pixels, but reassign the horizontal
                  * predictor */
                 *current_pixel_pair = *prev_pixel_pair++;
                 *vert_pred++ = *current_pixel_pair++;
                 *current_pixel_pair = *prev_pixel_pair++;
                 horiz_pred = *current_pixel_pair - *vert_pred;
                 *vert_pred++ = *current_pixel_pair++;
-                
+
             }
 
             if (!keyframe) {
@@ -727,7 +726,6 @@ static void truemotion1_decode_24bit(TrueMotion1Context *s)
     int pixels_left;  /* remaining pixels on this line */
     unsigned int predictor_pair;
     unsigned int horiz_pred;
-    unsigned int c_horiz_pred;
     unsigned int *vert_pred;
     unsigned int *current_pixel_pair;
     unsigned int *prev_pixel_pair;
@@ -746,14 +744,14 @@ static void truemotion1_decode_24bit(TrueMotion1Context *s)
     int index;
 
     /* clean out the line buffer */
-    memset(s->vert_pred, 0, s->avctx->width * sizeof(unsigned short));
+    memset(s->vert_pred, 0, s->avctx->width * sizeof(unsigned int));
 
     GET_NEXT_INDEX();
 
     for (y = 0; y < s->avctx->height; y++) {
 
         /* re-init variables for the next line iteration */
-        horiz_pred = c_horiz_pred = 0;
+        horiz_pred = 0;
         current_pixel_pair = (unsigned int *)current_line;
         prev_pixel_pair = (unsigned int *)prev_line;
         vert_pred = s->vert_pred;
@@ -768,25 +766,21 @@ static void truemotion1_decode_24bit(TrueMotion1Context *s)
 
                 switch (y & 3) {
                 case 0:
-                    /* if macroblock width is 2, apply C-Y-C-Y; else 
+                    /* if macroblock width is 2, apply C-Y-C-Y; else
                      * apply C-Y-Y */
                     if (s->block_width == 2) {
                         APPLY_C_PREDICTOR_24();
                         APPLY_Y_PREDICTOR_24();
                         OUTPUT_PIXEL_PAIR();
-//                        OUTPUT_PIXEL_PAIR_24_C();
                         APPLY_C_PREDICTOR_24();
                         APPLY_Y_PREDICTOR_24();
                         OUTPUT_PIXEL_PAIR();
-//                        OUTPUT_PIXEL_PAIR_24_C();
                     } else {
                         APPLY_C_PREDICTOR_24();
                         APPLY_Y_PREDICTOR_24();
                         OUTPUT_PIXEL_PAIR();
-//                        OUTPUT_PIXEL_PAIR_24_C();
                         APPLY_Y_PREDICTOR_24();
                         OUTPUT_PIXEL_PAIR();
-//                        OUTPUT_PIXEL_PAIR_24_C();
                     }
                     break;
 
@@ -800,25 +794,21 @@ static void truemotion1_decode_24bit(TrueMotion1Context *s)
                     break;
 
                 case 2:
-                    /* this iteration might be C-Y-C-Y, Y-Y, or C-Y-Y 
+                    /* this iteration might be C-Y-C-Y, Y-Y, or C-Y-Y
                      * depending on the macroblock type */
                     if (s->block_type == BLOCK_2x2) {
                         APPLY_C_PREDICTOR_24();
                         APPLY_Y_PREDICTOR_24();
                         OUTPUT_PIXEL_PAIR();
-//                        OUTPUT_PIXEL_PAIR_24_C();
                         APPLY_C_PREDICTOR_24();
                         APPLY_Y_PREDICTOR_24();
                         OUTPUT_PIXEL_PAIR();
-//                        OUTPUT_PIXEL_PAIR_24_C();
                     } else if (s->block_type == BLOCK_4x2) {
                         APPLY_C_PREDICTOR_24();
                         APPLY_Y_PREDICTOR_24();
                         OUTPUT_PIXEL_PAIR();
-//                        OUTPUT_PIXEL_PAIR_24_C();
                         APPLY_Y_PREDICTOR_24();
                         OUTPUT_PIXEL_PAIR();
-//                        OUTPUT_PIXEL_PAIR_24_C();
                     } else {
                         APPLY_Y_PREDICTOR_24();
                         OUTPUT_PIXEL_PAIR();
@@ -830,15 +820,14 @@ static void truemotion1_decode_24bit(TrueMotion1Context *s)
 
             } else {
 
-                /* skip (copy) four pixels, but reassign the horizontal 
+                /* skip (copy) four pixels, but reassign the horizontal
                  * predictor */
                 *current_pixel_pair = *prev_pixel_pair++;
                 *vert_pred++ = *current_pixel_pair++;
                 *current_pixel_pair = *prev_pixel_pair++;
                 horiz_pred = *current_pixel_pair - *vert_pred;
-//		c_horiz_pred = *current_pixel_pair - *vert_pred;
                 *vert_pred++ = *current_pixel_pair++;
-                
+
             }
 
             if (!keyframe) {
diff --git a/src/libffmpeg/libavcodec/truemotion1data.h b/src/libffmpeg/libavcodec/truemotion1data.h
index 59b863713..800bb306b 100644
--- a/src/libffmpeg/libavcodec/truemotion1data.h
+++ b/src/libffmpeg/libavcodec/truemotion1data.h
@@ -11,7 +11,7 @@
 
 /* Y delta tables, skinny and fat */
 static const int16_t ydt1[8] = { 0, -2, 2, -6, 6, -12, 12, -12 };
-static const int16_t ydt2[8] = { 0, -2, 2, -6, 6, -12, 12, -12 };
+static const int16_t ydt2[8] = { 0, -2, 4, -6, 8, -12, 12, -12 };
 static const int16_t ydt3[8] = { 4, -6, 20, -20, 46, -46, 94, -94 };
 static const int16_t fat_ydt3[8] = { 0, -15, 50, -50, 115, -115, 235, -235 };
 static const int16_t ydt4[8] = { 0, -4, 4, -16, 16, -36, 36, -80 };
@@ -29,7 +29,7 @@ static const int16_t fat_cdt3[8] = { 0, 40, 80, -76, 160, -154, 236, -236 };
 /* all the delta tables to choose from, at all 4 delta levels */
 static const int16_t *ydts[] = { ydt1, ydt2, ydt3, ydt4, NULL };
 static const int16_t *fat_ydts[] = { fat_ydt3, fat_ydt3, fat_ydt3, fat_ydt4, NULL };
-static const int16_t *cdts[] = { cdt1, cdt2, cdt3, cdt3, NULL };
+static const int16_t *cdts[] = { cdt1, cdt1, cdt2, cdt3, NULL };
 static const int16_t *fat_cdts[] = { fat_cdt2, fat_cdt2, fat_cdt2, fat_cdt3, NULL };
 
 static const uint8_t pc_tbl2[] = {
diff --git a/src/libffmpeg/libavcodec/truemotion2.c b/src/libffmpeg/libavcodec/truemotion2.c
index 42487bf32..84b940d42 100644
--- a/src/libffmpeg/libavcodec/truemotion2.c
+++ b/src/libffmpeg/libavcodec/truemotion2.c
@@ -14,15 +14,15 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
- 
+
 /**
  * @file truemotion2.c
  * Duck TrueMotion2 decoder.
  */
- 
+
 #include "avcodec.h"
 #include "common.h"
 #include "bitstream.h"
@@ -43,7 +43,7 @@ typedef struct TM2Context{
 
     GetBitContext gb;
     DSPContext dsp;
-    
+
     /* TM2 streams */
     int *tokens[TM2_NUM_STREAMS];
     int tok_lens[TM2_NUM_STREAMS];
@@ -54,7 +54,7 @@ typedef struct TM2Context{
     int CD[4];
     int *last;
     int *clast;
-    
+
     /* data for current and previous frame */
     int *Y1, *U1, *V1, *Y2, *U2, *V2;
     int cur;
@@ -118,13 +118,13 @@ static int tm2_build_huff_table(TM2Context *ctx, TM2Codes *code)
 {
     TM2Huff huff;
     int res = 0;
-    
+
     huff.val_bits = get_bits(&ctx->gb, 5);
     huff.max_bits = get_bits(&ctx->gb, 5);
     huff.min_bits = get_bits(&ctx->gb, 5);
     huff.nodes = get_bits_long(&ctx->gb, 17);
     huff.num = 0;
-    
+
     /* check for correct codes parameters */
     if((huff.val_bits < 1) || (huff.val_bits > 32) ||
        (huff.max_bits < 0) || (huff.max_bits > 32)) {
@@ -139,33 +139,33 @@ static int tm2_build_huff_table(TM2Context *ctx, TM2Codes *code)
     /* one-node tree */
     if(huff.max_bits == 0)
         huff.max_bits = 1;
-    
+
     /* allocate space for codes - it is exactly ceil(nodes / 2) entries */
     huff.max_num = (huff.nodes + 1) >> 1;
     huff.nums = av_mallocz(huff.max_num * sizeof(int));
     huff.bits = av_mallocz(huff.max_num * sizeof(uint32_t));
     huff.lens = av_mallocz(huff.max_num * sizeof(int));
-    
+
     if(tm2_read_tree(ctx, 0, 0, &huff) == -1)
         res = -1;
-    
+
     if(huff.num != huff.max_num) {
         av_log(ctx->avctx, AV_LOG_ERROR, "Got less codes than expected: %i of %i\n",
                huff.num, huff.max_num);
         res = -1;
     }
-    
+
     /* convert codes to vlc_table */
     if(res != -1) {
         int i;
-        
+
         res = init_vlc(&code->vlc, huff.max_bits, huff.max_num,
                     huff.lens, sizeof(int), sizeof(int),
                     huff.bits, sizeof(uint32_t), sizeof(uint32_t), 0);
         if(res < 0) {
             av_log(ctx->avctx, AV_LOG_ERROR, "Cannot build VLC table\n");
             res = -1;
-        } else 
+        } else
             res = 0;
         if(res != -1) {
             code->bits = huff.max_bits;
@@ -179,7 +179,7 @@ static int tm2_build_huff_table(TM2Context *ctx, TM2Codes *code)
     av_free(huff.nums);
     av_free(huff.bits);
     av_free(huff.lens);
-    
+
     return res;
 }
 
@@ -203,21 +203,21 @@ static inline int tm2_read_header(TM2Context *ctx, uint8_t *buf)
     uint32_t magic;
     uint8_t *obuf;
     int length;
-    
+
     obuf = buf;
-    
+
     magic = LE_32(buf);
     buf += 4;
-    
+
     if(magic == 0x00000100) { /* old header */
 /*      av_log (ctx->avctx, AV_LOG_ERROR, "TM2 old header: not implemented (yet)\n"); */
         return 40;
     } else if(magic == 0x00000101) { /* new header */
         int w, h, size, flags, xr, yr;
-        
+
         length = LE_32(buf);
         buf += 4;
-        
+
         init_get_bits(&ctx->gb, buf, 32 * 8);
         size = get_bits_long(&ctx->gb, 31);
         h = get_bits(&ctx->gb, 15);
@@ -225,28 +225,28 @@ static inline int tm2_read_header(TM2Context *ctx, uint8_t *buf)
         flags = get_bits_long(&ctx->gb, 31);
         yr = get_bits(&ctx->gb, 9);
         xr = get_bits(&ctx->gb, 9);
-        
+
         return 40;
     } else {
         av_log (ctx->avctx, AV_LOG_ERROR, "Not a TM2 header: 0x%08X\n", magic);
         return -1;
     }
-    
+
     return (buf - obuf);
 }
 
 static int tm2_read_deltas(TM2Context *ctx, int stream_id) {
     int d, mb;
     int i, v;
-    
+
     d = get_bits(&ctx->gb, 9);
     mb = get_bits(&ctx->gb, 5);
-    
+
     if((d < 1) || (d > TM2_DELTAS) || (mb < 1) || (mb > 32)) {
         av_log(ctx->avctx, AV_LOG_ERROR, "Incorrect delta table: %i deltas x %i bits\n", d, mb);
         return -1;
     }
-    
+
     for(i = 0; i < d; i++) {
         v = get_bits_long(&ctx->gb, mb);
         if(v & (1 << (mb - 1)))
@@ -256,7 +256,7 @@ static int tm2_read_deltas(TM2Context *ctx, int stream_id) {
     }
     for(; i < TM2_DELTAS; i++)
         ctx->deltas[stream_id][i] = 0;
-    
+
     return 0;
 }
 
@@ -266,14 +266,14 @@ static int tm2_read_stream(TM2Context *ctx, uint8_t *buf, int stream_id) {
     int skip = 0;
     int len, toks;
     TM2Codes codes;
-    
+
     /* get stream length in dwords */
     len = BE_32(buf); buf += 4; cur += 4;
     skip = len * 4 + 4;
-    
+
     if(len == 0)
         return 4;
-    
+
     toks = BE_32(buf); buf += 4; cur += 4;
     if(toks & 1) {
         len = BE_32(buf); buf += 4; cur += 4;
@@ -294,13 +294,13 @@ static int tm2_read_stream(TM2Context *ctx, uint8_t *buf, int stream_id) {
     }
     buf += 4; cur += 4;
     buf += 4; cur += 4; /* unused by decoder */
-    
+
     init_get_bits(&ctx->gb, buf, (skip - cur) * 8);
     if(tm2_build_huff_table(ctx, &codes) == -1)
         return -1;
     buf += ((get_bits_count(&ctx->gb) + 31) >> 5) << 2;
     cur += ((get_bits_count(&ctx->gb) + 31) >> 5) << 2;
-    
+
     toks >>= 1;
     /* check if we have sane number of tokens */
     if((toks < 0) || (toks > 0xFFFFFF)){
@@ -320,7 +320,7 @@ static int tm2_read_stream(TM2Context *ctx, uint8_t *buf, int stream_id) {
             ctx->tokens[stream_id][i] = codes.recode[0];
     }
     tm2_free_codes(&codes);
-    
+
     return skip;
 }
 
@@ -375,7 +375,7 @@ static inline void tm2_apply_deltas(TM2Context *ctx, int* Y, int stride, int *de
 {
     int ct, d;
     int i, j;
-    
+
     for(j = 0; j < 4; j++){
         ct = ctx->D[j];
         for(i = 0; i < 4; i++){
@@ -417,7 +417,7 @@ static inline void tm2_low_chroma(int *data, int stride, int *clast, int *CD, in
     CD[1] = CD[0] + CD[1] - t;
     CD[0] = t;
     clast[0] = l;
-    
+
     tm2_high_chroma(data, stride, clast, CD, deltas);
 }
 
@@ -434,11 +434,11 @@ static inline void tm2_hi_res_block(TM2Context *ctx, AVFrame *pic, int bx, int b
     }
     tm2_high_chroma(U, Ustride, clast, ctx->CD, deltas);
     tm2_high_chroma(V, Vstride, clast + 2, ctx->CD + 2, deltas + 4);
-    
+
     /* hi-res luma */
     for(i = 0; i < 16; i++)
         deltas[i] = GET_TOK(ctx, TM2_L_HI);
-    
+
     tm2_apply_deltas(ctx, Y, Ystride, deltas, last);
 }
 
@@ -447,7 +447,7 @@ static inline void tm2_med_res_block(TM2Context *ctx, AVFrame *pic, int bx, int
     int i;
     int deltas[16];
     TM2_INIT_POINTERS();
-    
+
     /* low-res chroma */
     deltas[0] = GET_TOK(ctx, TM2_C_LO);
     deltas[1] = deltas[2] = deltas[3] = 0;
@@ -460,7 +460,7 @@ static inline void tm2_med_res_block(TM2Context *ctx, AVFrame *pic, int bx, int
     /* hi-res luma */
     for(i = 0; i < 16; i++)
         deltas[i] = GET_TOK(ctx, TM2_L_HI);
-    
+
     tm2_apply_deltas(ctx, Y, Ystride, deltas, last);
 }
 
@@ -483,12 +483,12 @@ static inline void tm2_low_res_block(TM2Context *ctx, AVFrame *pic, int bx, int
     /* low-res luma */
     for(i = 0; i < 16; i++)
         deltas[i] = 0;
-        
+
     deltas[ 0] = GET_TOK(ctx, TM2_L_LO);
     deltas[ 2] = GET_TOK(ctx, TM2_L_LO);
     deltas[ 8] = GET_TOK(ctx, TM2_L_LO);
     deltas[10] = GET_TOK(ctx, TM2_L_LO);
-    
+
     if(bx > 0)
         last[0] = (last[-1] - ctx->D[0] - ctx->D[1] - ctx->D[2] - ctx->D[3] + last[1]) >> 1;
     else
@@ -501,7 +501,7 @@ static inline void tm2_low_res_block(TM2Context *ctx, AVFrame *pic, int bx, int
     t2 = ctx->D[2] + ctx->D[3];
     ctx->D[2] = t2 >> 1;
     ctx->D[3] = t2 - (t2 >> 1);
-    
+
     tm2_apply_deltas(ctx, Y, Ystride, deltas, last);
 }
 
@@ -512,25 +512,25 @@ static inline void tm2_null_res_block(TM2Context *ctx, AVFrame *pic, int bx, int
     int left, right, diff;
     int deltas[16];
     TM2_INIT_POINTERS();
-    
+
     /* null chroma */
     deltas[0] = deltas[1] = deltas[2] = deltas[3] = 0;
     tm2_low_chroma(U, Ustride, clast, ctx->CD, deltas, bx);
 
     deltas[0] = deltas[1] = deltas[2] = deltas[3] = 0;
     tm2_low_chroma(V, Vstride, clast + 2, ctx->CD + 2, deltas, bx);
-    
+
     /* null luma */
     for(i = 0; i < 16; i++)
         deltas[i] = 0;
 
     ct = ctx->D[0] + ctx->D[1] + ctx->D[2] + ctx->D[3];
-    
+
     if(bx > 0)
         left = last[-1] - ct;
     else
         left = 0;
-    
+
     right = last[3];
     diff = right - left;
     last[0] = left + (diff >> 2);
@@ -539,7 +539,7 @@ static inline void tm2_null_res_block(TM2Context *ctx, AVFrame *pic, int bx, int
     last[3] = right;
     {
         int tp = left;
-        
+
         ctx->D[0] = (tp + (ct >> 2)) - left;
         left += ctx->D[0];
         ctx->D[1] = (tp + (ct >> 1)) - left;
@@ -591,7 +591,7 @@ static inline void tm2_update_block(TM2Context *ctx, AVFrame *pic, int bx, int b
     int i, j;
     int d;
     TM2_INIT_POINTERS_2();
-    
+
     /* update chroma */
     for(j = 0; j < 2; j++){
         for(i = 0; i < 2; i++){
@@ -632,11 +632,11 @@ static inline void tm2_motion_block(TM2Context *ctx, AVFrame *pic, int bx, int b
 
     mx = GET_TOK(ctx, TM2_MOT);
     my = GET_TOK(ctx, TM2_MOT);
-    
+
     Yo += my * oYstride + mx;
     Uo += (my >> 1) * oUstride + (mx >> 1);
     Vo += (my >> 1) * oVstride + (mx >> 1);
-    
+
     /* copy chroma */
     for(j = 0; j < 2; j++){
         for(i = 0; i < 2; i++){
@@ -677,18 +677,18 @@ static int tm2_decode_blocks(TM2Context *ctx, AVFrame *p)
     int keyframe = 1;
     uint8_t *Y, *U, *V;
     int *src;
-    
+
     bw = ctx->avctx->width >> 2;
     bh = ctx->avctx->height >> 2;
 
     for(i = 0; i < TM2_NUM_STREAMS; i++)
         ctx->tok_ptrs[i] = 0;
-    
+
     if (ctx->tok_lens[TM2_TYPE]<bw*bh){
         av_log(ctx->avctx,AV_LOG_ERROR,"Got %i tokens for %i blocks\n",ctx->tok_lens[TM2_TYPE],bw*bh);
         return -1;
     }
-    
+
     memset(ctx->last, 0, 4 * bw * sizeof(int));
     memset(ctx->clast, 0, 4 * bw * sizeof(int));
 
@@ -727,8 +727,8 @@ static int tm2_decode_blocks(TM2Context *ctx, AVFrame *p)
             }
         }
     }
-    
-    /* copy data from our buffer to AVFrame */    
+
+    /* copy data from our buffer to AVFrame */
     Y = p->data[0];
     src = (ctx->cur?ctx->Y2:ctx->Y1);
     for(j = 0; j < ctx->avctx->height; j++){
@@ -753,11 +753,11 @@ static int tm2_decode_blocks(TM2Context *ctx, AVFrame *p)
         }
         V += p->linesize[1];
     }
-    
+
     return keyframe;
 }
 
-static int decode_frame(AVCodecContext *avctx, 
+static int decode_frame(AVCodecContext *avctx,
                         void *data, int *data_size,
                         uint8_t *buf, int buf_size)
 {
@@ -774,10 +774,10 @@ static int decode_frame(AVCodecContext *avctx,
 
     l->dsp.bswap_buf((uint32_t*)buf, (uint32_t*)buf, buf_size >> 2);
     skip = tm2_read_header(l, buf);
-    
+
     if(skip == -1)
         return -1;
-    
+
     t = tm2_read_stream(l, buf + skip, TM2_C_HI);
     if(t == -1)
         return -1;
@@ -810,11 +810,11 @@ static int decode_frame(AVCodecContext *avctx,
         p->pict_type = FF_I_TYPE;
     else
         p->pict_type = FF_P_TYPE;
-    
+
     l->cur = !l->cur;
     *data_size = sizeof(AVFrame);
     *(AVFrame*)data = l->pic;
-    
+
     return buf_size;
 }
 
@@ -829,22 +829,22 @@ static int decode_init(AVCodecContext *avctx){
         av_log(avctx, AV_LOG_ERROR, "Width and height must be multiple of 4\n");
         return -1;
     }
-    
+
     l->avctx = avctx;
     l->pic.data[0]=NULL;
     avctx->has_b_frames = 0;
     avctx->pix_fmt = PIX_FMT_YUV420P;
 
     dsputil_init(&l->dsp, avctx);
-    
+
     l->last = av_malloc(4 * sizeof(int) * (avctx->width >> 2));
     l->clast = av_malloc(4 * sizeof(int) * (avctx->width >> 2));
-    
+
     for(i = 0; i < TM2_NUM_STREAMS; i++) {
         l->tokens[i] = NULL;
         l->tok_lens[i] = 0;
     }
-    
+
     l->Y1 = av_malloc(sizeof(int) * avctx->width * avctx->height);
     l->U1 = av_malloc(sizeof(int) * ((avctx->width + 1) >> 1) * ((avctx->height + 1) >> 1));
     l->V1 = av_malloc(sizeof(int) * ((avctx->width + 1) >> 1) * ((avctx->height + 1) >> 1));
@@ -852,7 +852,7 @@ static int decode_init(AVCodecContext *avctx){
     l->U2 = av_malloc(sizeof(int) * ((avctx->width + 1) >> 1) * ((avctx->height + 1) >> 1));
     l->V2 = av_malloc(sizeof(int) * ((avctx->width + 1) >> 1) * ((avctx->height + 1) >> 1));
     l->cur = 0;
-    
+
     return 0;
 }
 
diff --git a/src/libffmpeg/libavcodec/tscc.c b/src/libffmpeg/libavcodec/tscc.c
index 109404404..8bc53bf89 100644
--- a/src/libffmpeg/libavcodec/tscc.c
+++ b/src/libffmpeg/libavcodec/tscc.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -71,13 +71,13 @@ typedef struct TsccContext {
  *              and enhanced to bigger color depths
  *
  */
- 
+
 static int decode_rle(CamtasiaContext *c, unsigned int srcsize)
 {
     unsigned char *src = c->decomp_buf;
     unsigned char *output, *output_end;
     int p1, p2, line=c->height, pos=0, i;
-    
+
     output = c->pic.data[0] + (c->height - 1) * c->pic.linesize[0];
     output_end = c->pic.data[0] + (c->height) * c->pic.linesize[0];
     while(src < c->decomp_buf + srcsize) {
@@ -110,10 +110,10 @@ static int decode_rle(CamtasiaContext *c, unsigned int srcsize)
             for(i = 0; i < p2 * (c->bpp / 8); i++) {
                 *output++ = *src++;
             }
-	    // RLE8 copy is actually padded - and runs are not!
-	    if(c->bpp == 8 && (p2 & 1)) {
-		src++;
-	    }
+            // RLE8 copy is actually padded - and runs are not!
+            if(c->bpp == 8 && (p2 & 1)) {
+                src++;
+            }
             pos += p2;
         } else { //Run of pixels
             int pix[4]; //original pixel
@@ -156,8 +156,8 @@ static int decode_rle(CamtasiaContext *c, unsigned int srcsize)
             pos += p1;
         }
     }
-    
-    av_log(c->avctx, AV_LOG_ERROR, "Camtasia warning: no End-of-picture code\n");        
+
+    av_log(c->avctx, AV_LOG_ERROR, "Camtasia warning: no End-of-picture code\n");
     return 1;
 }
 
@@ -208,7 +208,7 @@ static int decode_frame(AVCodecContext *avctx, void *data, int *data_size, uint8
 
     if(zret != Z_DATA_ERROR)
         decode_rle(c, c->zstream.avail_out);
-    
+
     /* make the palette available on the way out */
     if (c->avctx->pix_fmt == PIX_FMT_PAL8) {
         memcpy(c->pic.data[1], c->avctx->palctrl->palette, AVPALETTE_SIZE);
@@ -254,7 +254,7 @@ static int decode_init(AVCodecContext *avctx)
 
 #ifdef CONFIG_ZLIB
     // Needed if zlib unused or init aborted before inflateInit
-    memset(&(c->zstream), 0, sizeof(z_stream)); 
+    memset(&(c->zstream), 0, sizeof(z_stream));
 #else
     av_log(avctx, AV_LOG_ERROR, "Zlib support not compiled.\n");
     return 1;
@@ -267,7 +267,7 @@ static int decode_init(AVCodecContext *avctx)
              break;
     case 32: avctx->pix_fmt = PIX_FMT_RGBA32; break;
     default: av_log(avctx, AV_LOG_ERROR, "Camtasia error: unknown depth %i bpp\n", avctx->bits_per_sample);
-             return -1;             
+             return -1;
     }
     c->bpp = avctx->bits_per_sample;
     c->decomp_size = (avctx->width * c->bpp + (avctx->width + 254) / 255 + 2) * avctx->height + 2;//RLE in the 'best' case
@@ -279,7 +279,7 @@ static int decode_init(AVCodecContext *avctx)
             return 1;
         }
     }
-  
+
 #ifdef CONFIG_ZLIB
     c->zstream.zalloc = Z_NULL;
     c->zstream.zfree = Z_NULL;
diff --git a/src/libffmpeg/libavcodec/ulti.c b/src/libffmpeg/libavcodec/ulti.c
index 1b47bddbd..484eef7c7 100755
--- a/src/libffmpeg/libavcodec/ulti.c
+++ b/src/libffmpeg/libavcodec/ulti.c
@@ -14,15 +14,15 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
 /**
- * @file ulti.c 
+ * @file ulti.c
  * IBM Ultimotion Video Decoder.
  */
- 
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -71,56 +71,56 @@ static uint8_t ulti_lumas[64] =
       0x9B, 0x9F, 0xA2, 0xA5, 0xA9, 0xAC, 0xB0, 0xB3,
       0xB7, 0xBA, 0xBE, 0xC1, 0xC5, 0xC8, 0xCC, 0xCF,
       0xD3, 0xD6, 0xDA, 0xDD, 0xE1, 0xE4, 0xE8, 0xEB};
-      
+
 static uint8_t ulti_chromas[16] =
     { 0x60, 0x67, 0x6D, 0x73, 0x7A, 0x80, 0x86, 0x8D,
       0x93, 0x99, 0xA0, 0xA6, 0xAC, 0xB3, 0xB9, 0xC0};
-      
+
 /* convert Ultimotion YUV block (sixteen 6-bit Y samples and
  two 4-bit chroma samples) into standard YUV and put it into frame */
 static void ulti_convert_yuv(AVFrame *frame, int x, int y,
-			     uint8_t *luma,int chroma)
+                             uint8_t *luma,int chroma)
 {
     uint8_t *y_plane, *cr_plane, *cb_plane;
     int i;
-    
+
     y_plane = frame->data[0] + x + y * frame->linesize[0];
     cr_plane = frame->data[1] + (x / 4) + (y / 4) * frame->linesize[1];
     cb_plane = frame->data[2] + (x / 4) + (y / 4) * frame->linesize[2];
-    
+
     cr_plane[0] = ulti_chromas[chroma >> 4];
-    
+
     cb_plane[0] = ulti_chromas[chroma & 0xF];
 
-    
+
     for(i = 0; i < 16; i++){
-	y_plane[i & 3] = ulti_lumas[luma[i]];
-	if((i & 3) == 3) { //next row
-	    y_plane += frame->linesize[0];
-	}
+        y_plane[i & 3] = ulti_lumas[luma[i]];
+        if((i & 3) == 3) { //next row
+            y_plane += frame->linesize[0];
+        }
     }
 }
 
 /* generate block like in MS Video1 */
 static void ulti_pattern(AVFrame *frame, int x, int y,
-			 int f0, int f1, int Y0, int Y1, int chroma)
+                         int f0, int f1, int Y0, int Y1, int chroma)
 {
     uint8_t Luma[16];
     int mask, i;
     for(mask = 0x80, i = 0; mask; mask >>= 1, i++) {
-	if(f0 & mask)
-	    Luma[i] = Y1;
-	else
-	    Luma[i] = Y0;
+        if(f0 & mask)
+            Luma[i] = Y1;
+        else
+            Luma[i] = Y0;
     }
-    
+
     for(mask = 0x80, i = 8; mask; mask >>= 1, i++) {
-	if(f1 & mask)
-	    Luma[i] = Y1;
-	else
-	    Luma[i] = Y0;
+        if(f1 & mask)
+            Luma[i] = Y1;
+        else
+            Luma[i] = Y0;
     }
-    
+
     ulti_convert_yuv(frame, x, y, Luma, chroma);
 }
 
@@ -129,76 +129,76 @@ static void ulti_grad(AVFrame *frame, int x, int y, uint8_t *Y, int chroma, int
 {
     uint8_t Luma[16];
     if(angle & 8) { //reverse order
-	int t;
-	angle &= 0x7;
-	t = Y[0];
-	Y[0] = Y[3];
-	Y[3] = t;
-	t = Y[1];
-	Y[1] = Y[2];
-	Y[2] = t;
+        int t;
+        angle &= 0x7;
+        t = Y[0];
+        Y[0] = Y[3];
+        Y[3] = t;
+        t = Y[1];
+        Y[1] = Y[2];
+        Y[2] = t;
     }
     switch(angle){
     case 0:
-	Luma[0]  = Y[0]; Luma[1]  = Y[1]; Luma[2]  = Y[2]; Luma[3]  = Y[3];
-	Luma[4]  = Y[0]; Luma[5]  = Y[1]; Luma[6]  = Y[2]; Luma[7]  = Y[3];
-	Luma[8]  = Y[0]; Luma[9]  = Y[1]; Luma[10] = Y[2]; Luma[11] = Y[3];
-	Luma[12] = Y[0]; Luma[13] = Y[1]; Luma[14] = Y[2]; Luma[15] = Y[3];	
-	break;
+        Luma[0]  = Y[0]; Luma[1]  = Y[1]; Luma[2]  = Y[2]; Luma[3]  = Y[3];
+        Luma[4]  = Y[0]; Luma[5]  = Y[1]; Luma[6]  = Y[2]; Luma[7]  = Y[3];
+        Luma[8]  = Y[0]; Luma[9]  = Y[1]; Luma[10] = Y[2]; Luma[11] = Y[3];
+        Luma[12] = Y[0]; Luma[13] = Y[1]; Luma[14] = Y[2]; Luma[15] = Y[3];
+        break;
     case 1:
-	Luma[0]  = Y[1]; Luma[1]  = Y[2]; Luma[2]  = Y[3]; Luma[3]  = Y[3];
-	Luma[4]  = Y[0]; Luma[5]  = Y[1]; Luma[6]  = Y[2]; Luma[7]  = Y[3];
-	Luma[8]  = Y[0]; Luma[9]  = Y[1]; Luma[10] = Y[2]; Luma[11] = Y[3];
-	Luma[12] = Y[0]; Luma[13] = Y[0]; Luma[14] = Y[1]; Luma[15] = Y[2];	
-	break;
+        Luma[0]  = Y[1]; Luma[1]  = Y[2]; Luma[2]  = Y[3]; Luma[3]  = Y[3];
+        Luma[4]  = Y[0]; Luma[5]  = Y[1]; Luma[6]  = Y[2]; Luma[7]  = Y[3];
+        Luma[8]  = Y[0]; Luma[9]  = Y[1]; Luma[10] = Y[2]; Luma[11] = Y[3];
+        Luma[12] = Y[0]; Luma[13] = Y[0]; Luma[14] = Y[1]; Luma[15] = Y[2];
+        break;
     case 2:
-	Luma[0]  = Y[1]; Luma[1]  = Y[2]; Luma[2]  = Y[3]; Luma[3]  = Y[3];
-	Luma[4]  = Y[1]; Luma[5]  = Y[2]; Luma[6]  = Y[2]; Luma[7]  = Y[3];
-	Luma[8]  = Y[0]; Luma[9]  = Y[1]; Luma[10] = Y[1]; Luma[11] = Y[2];
-	Luma[12] = Y[0]; Luma[13] = Y[0]; Luma[14] = Y[1]; Luma[15] = Y[2];	
-	break;
+        Luma[0]  = Y[1]; Luma[1]  = Y[2]; Luma[2]  = Y[3]; Luma[3]  = Y[3];
+        Luma[4]  = Y[1]; Luma[5]  = Y[2]; Luma[6]  = Y[2]; Luma[7]  = Y[3];
+        Luma[8]  = Y[0]; Luma[9]  = Y[1]; Luma[10] = Y[1]; Luma[11] = Y[2];
+        Luma[12] = Y[0]; Luma[13] = Y[0]; Luma[14] = Y[1]; Luma[15] = Y[2];
+        break;
     case 3:
-	Luma[0]  = Y[2]; Luma[1]  = Y[3]; Luma[2]  = Y[3]; Luma[3]  = Y[3];
-	Luma[4]  = Y[1]; Luma[5]  = Y[2]; Luma[6]  = Y[2]; Luma[7]  = Y[3];
-	Luma[8]  = Y[0]; Luma[9]  = Y[1]; Luma[10] = Y[1]; Luma[11] = Y[2];
-	Luma[12] = Y[0]; Luma[13] = Y[0]; Luma[14] = Y[0]; Luma[15] = Y[1];	
-	break;
+        Luma[0]  = Y[2]; Luma[1]  = Y[3]; Luma[2]  = Y[3]; Luma[3]  = Y[3];
+        Luma[4]  = Y[1]; Luma[5]  = Y[2]; Luma[6]  = Y[2]; Luma[7]  = Y[3];
+        Luma[8]  = Y[0]; Luma[9]  = Y[1]; Luma[10] = Y[1]; Luma[11] = Y[2];
+        Luma[12] = Y[0]; Luma[13] = Y[0]; Luma[14] = Y[0]; Luma[15] = Y[1];
+        break;
     case 4:
-	Luma[0]  = Y[3]; Luma[1]  = Y[3]; Luma[2]  = Y[3]; Luma[3]  = Y[3];
-	Luma[4]  = Y[2]; Luma[5]  = Y[2]; Luma[6]  = Y[2]; Luma[7]  = Y[2];
-	Luma[8]  = Y[1]; Luma[9]  = Y[1]; Luma[10] = Y[1]; Luma[11] = Y[1];
-	Luma[12] = Y[0]; Luma[13] = Y[0]; Luma[14] = Y[0]; Luma[15] = Y[0];	
-	break;
+        Luma[0]  = Y[3]; Luma[1]  = Y[3]; Luma[2]  = Y[3]; Luma[3]  = Y[3];
+        Luma[4]  = Y[2]; Luma[5]  = Y[2]; Luma[6]  = Y[2]; Luma[7]  = Y[2];
+        Luma[8]  = Y[1]; Luma[9]  = Y[1]; Luma[10] = Y[1]; Luma[11] = Y[1];
+        Luma[12] = Y[0]; Luma[13] = Y[0]; Luma[14] = Y[0]; Luma[15] = Y[0];
+        break;
     case 5:
-	Luma[0]  = Y[3]; Luma[1]  = Y[3]; Luma[2]  = Y[3]; Luma[3]  = Y[2];
-	Luma[4]  = Y[3]; Luma[5]  = Y[2]; Luma[6]  = Y[2]; Luma[7]  = Y[1];
-	Luma[8]  = Y[2]; Luma[9]  = Y[1]; Luma[10] = Y[1]; Luma[11] = Y[0];
-	Luma[12] = Y[1]; Luma[13] = Y[0]; Luma[14] = Y[0]; Luma[15] = Y[0];	
-	break;
+        Luma[0]  = Y[3]; Luma[1]  = Y[3]; Luma[2]  = Y[3]; Luma[3]  = Y[2];
+        Luma[4]  = Y[3]; Luma[5]  = Y[2]; Luma[6]  = Y[2]; Luma[7]  = Y[1];
+        Luma[8]  = Y[2]; Luma[9]  = Y[1]; Luma[10] = Y[1]; Luma[11] = Y[0];
+        Luma[12] = Y[1]; Luma[13] = Y[0]; Luma[14] = Y[0]; Luma[15] = Y[0];
+        break;
     case 6:
-	Luma[0]  = Y[3]; Luma[1]  = Y[3]; Luma[2]  = Y[2]; Luma[3]  = Y[2];
-	Luma[4]  = Y[3]; Luma[5]  = Y[2]; Luma[6]  = Y[1]; Luma[7]  = Y[1];
-	Luma[8]  = Y[2]; Luma[9]  = Y[2]; Luma[10] = Y[1]; Luma[11] = Y[0];
-	Luma[12] = Y[1]; Luma[13] = Y[1]; Luma[14] = Y[0]; Luma[15] = Y[0];	
-	break;
+        Luma[0]  = Y[3]; Luma[1]  = Y[3]; Luma[2]  = Y[2]; Luma[3]  = Y[2];
+        Luma[4]  = Y[3]; Luma[5]  = Y[2]; Luma[6]  = Y[1]; Luma[7]  = Y[1];
+        Luma[8]  = Y[2]; Luma[9]  = Y[2]; Luma[10] = Y[1]; Luma[11] = Y[0];
+        Luma[12] = Y[1]; Luma[13] = Y[1]; Luma[14] = Y[0]; Luma[15] = Y[0];
+        break;
     case 7:
-	Luma[0]  = Y[3]; Luma[1]  = Y[3]; Luma[2]  = Y[2]; Luma[3]  = Y[1];
-	Luma[4]  = Y[3]; Luma[5]  = Y[2]; Luma[6]  = Y[1]; Luma[7]  = Y[0];
-	Luma[8]  = Y[3]; Luma[9]  = Y[2]; Luma[10] = Y[1]; Luma[11] = Y[0];
-	Luma[12] = Y[2]; Luma[13] = Y[1]; Luma[14] = Y[0]; Luma[15] = Y[0];	
-	break;
+        Luma[0]  = Y[3]; Luma[1]  = Y[3]; Luma[2]  = Y[2]; Luma[3]  = Y[1];
+        Luma[4]  = Y[3]; Luma[5]  = Y[2]; Luma[6]  = Y[1]; Luma[7]  = Y[0];
+        Luma[8]  = Y[3]; Luma[9]  = Y[2]; Luma[10] = Y[1]; Luma[11] = Y[0];
+        Luma[12] = Y[2]; Luma[13] = Y[1]; Luma[14] = Y[0]; Luma[15] = Y[0];
+        break;
     default:
-	Luma[0]  = Y[0]; Luma[1]  = Y[0]; Luma[2]  = Y[1]; Luma[3]  = Y[1];
-	Luma[4]  = Y[0]; Luma[5]  = Y[0]; Luma[6]  = Y[1]; Luma[7]  = Y[1];
-	Luma[8]  = Y[2]; Luma[9]  = Y[2]; Luma[10] = Y[3]; Luma[11] = Y[3];
-	Luma[12] = Y[2]; Luma[13] = Y[2]; Luma[14] = Y[3]; Luma[15] = Y[3];	
-	break;
+        Luma[0]  = Y[0]; Luma[1]  = Y[0]; Luma[2]  = Y[1]; Luma[3]  = Y[1];
+        Luma[4]  = Y[0]; Luma[5]  = Y[0]; Luma[6]  = Y[1]; Luma[7]  = Y[1];
+        Luma[8]  = Y[2]; Luma[9]  = Y[2]; Luma[10] = Y[3]; Luma[11] = Y[3];
+        Luma[12] = Y[2]; Luma[13] = Y[2]; Luma[14] = Y[3]; Luma[15] = Y[3];
+        break;
     }
-    
+
     ulti_convert_yuv(frame, x, y, Luma, chroma);
 }
 
-static int ulti_decode_frame(AVCodecContext *avctx, 
+static int ulti_decode_frame(AVCodecContext *avctx,
                              void *data, int *data_size,
                              uint8_t *buf, int buf_size)
 {
@@ -222,182 +222,182 @@ static int ulti_decode_frame(AVCodecContext *avctx,
         av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
         return -1;
     }
-    
+
     while(!done) {
-	int idx;
-	if(blocks >= s->blocks || y >= s->height)
-	    break;//all blocks decoded
-	
-	idx = *buf++;
-	if((idx & 0xF8) == 0x70) {
-	    switch(idx) {
-	    case 0x70: //change modifier
-		modifier = *buf++;
-		if(modifier>1)
-		    av_log(avctx, AV_LOG_INFO, "warning: modifier must be 0 or 1, got %i\n", modifier);
-		break;
-	    case 0x71: // set uniq flag
-		uniq = 1;
-		break;
-	    case 0x72: //toggle mode
-		mode = !mode;
-		break;
-	    case 0x73: //end-of-frame
-		done = 1;
-		break;
-	    case 0x74: //skip some blocks
-		skip = *buf++;
-		if ((blocks + skip) >= s->blocks)
-		    break;
-		blocks += skip;
-		x += skip * 8;
-		while(x >= s->width) {
-		    x -= s->width;
-		    y += 8;
-		}
-		break;
-	    default:
-		av_log(avctx, AV_LOG_INFO, "warning: unknown escape 0x%02X\n", idx);
-	    }	
-	} else { //handle one block
-	    int code;
-	    int cf;
-	    int angle = 0;
-	    uint8_t Y[4]; // luma samples of block
-	    int tx = 0, ty = 0; //coords of subblock
-	    int chroma = 0;
-	    if (mode || uniq) {
-		uniq = 0;
-		cf = 1;
-		chroma = 0;
-	    } else {
-		cf = 0;
-		if (idx)
-		    chroma = *buf++;
-	    }
-	    for (i = 0; i < 4; i++) { // for every subblock
-		code = (idx >> (6 - i*2)) & 3; //extract 2 bits
-		if(!code) //skip subblock
-		    continue;
-		if(cf)
-		    chroma = *buf++;
-		tx = x + block_coords[i * 2];
-		ty = y + block_coords[(i * 2) + 1];
-		switch(code) {
-		case 1: 
-		    tmp = *buf++;
-		    
-		    angle = angle_by_index[(tmp >> 6) & 0x3];
-		    
-		    Y[0] = tmp & 0x3F;
-		    Y[1] = Y[0];
-		    
-		    if (angle) {
-			Y[2] = Y[0]+1;
-			if (Y[2] > 0x3F)
-			    Y[2] = 0x3F;
-			Y[3] = Y[2];			
-		    } else {
-			Y[2] = Y[0];
-			Y[3] = Y[0];
-		    }
-		    break;
-		    
-		case 2:
-		    if (modifier) { // unpack four luma samples
-			tmp = (*buf++) << 16;
-			tmp += (*buf++) << 8;
-			tmp += *buf++;
-			
-			Y[0] = (tmp >> 18) & 0x3F;
-			Y[1] = (tmp >> 12) & 0x3F;
-			Y[2] = (tmp >> 6) & 0x3F;
-			Y[3] = tmp & 0x3F;
-			angle = 16;
-		    } else { // retrieve luma samples from codebook
-			tmp = (*buf++) << 8;
-			tmp += (*buf++);
-			
-			angle = (tmp >> 12) & 0xF;
-			tmp &= 0xFFF;
-			tmp <<= 2;
-			Y[0] = s->ulti_codebook[tmp];
-			Y[1] = s->ulti_codebook[tmp + 1];
-			Y[2] = s->ulti_codebook[tmp + 2];
-			Y[3] = s->ulti_codebook[tmp + 3];
-		    }
-		    break;
-		    
-		case 3:
-		    if (modifier) { // all 16 luma samples
-			uint8_t Luma[16];
-			
-			tmp = (*buf++) << 16;
-			tmp += (*buf++) << 8;
-			tmp += *buf++;
-			Luma[0] = (tmp >> 18) & 0x3F;
-			Luma[1] = (tmp >> 12) & 0x3F;
-			Luma[2] = (tmp >> 6) & 0x3F;
-			Luma[3] = tmp & 0x3F;
-			
-			tmp = (*buf++) << 16;
-			tmp += (*buf++) << 8;
-			tmp += *buf++;
-			Luma[4] = (tmp >> 18) & 0x3F;
-			Luma[5] = (tmp >> 12) & 0x3F;
-			Luma[6] = (tmp >> 6) & 0x3F;
-			Luma[7] = tmp & 0x3F;
-			
-			tmp = (*buf++) << 16;
-			tmp += (*buf++) << 8;
-			tmp += *buf++;
-			Luma[8] = (tmp >> 18) & 0x3F;
-			Luma[9] = (tmp >> 12) & 0x3F;
-			Luma[10] = (tmp >> 6) & 0x3F;
-			Luma[11] = tmp & 0x3F;
-			
-			tmp = (*buf++) << 16;
-			tmp += (*buf++) << 8;
-			tmp += *buf++;
-			Luma[12] = (tmp >> 18) & 0x3F;
-			Luma[13] = (tmp >> 12) & 0x3F;
-			Luma[14] = (tmp >> 6) & 0x3F;
-			Luma[15] = tmp & 0x3F;
-			
-			ulti_convert_yuv(&s->frame, tx, ty, Luma, chroma);
-		    } else {
-			tmp = *buf++;
-			if(tmp & 0x80) {
-			    angle = (tmp >> 4) & 0x7;
-			    tmp = (tmp << 8) + *buf++;
-			    Y[0] = (tmp >> 6) & 0x3F;
-			    Y[1] = tmp & 0x3F;
-			    Y[2] = (*buf++) & 0x3F;
-			    Y[3] = (*buf++) & 0x3F;
-			    ulti_grad(&s->frame, tx, ty, Y, chroma, angle); //draw block
-			} else { // some patterns
-			    int f0, f1;
-			    f0 = *buf++;
-			    f1 = tmp;
-			    Y[0] = (*buf++) & 0x3F;
-			    Y[1] = (*buf++) & 0x3F;
-			    ulti_pattern(&s->frame, tx, ty, f1, f0, Y[0], Y[1], chroma);
-			}
-		    }
-		    break;
-		}
-		if(code != 3)
-		    ulti_grad(&s->frame, tx, ty, Y, chroma, angle); // draw block
-	    }
-	    blocks++;
-    	    x += 8;
-	    if(x >= s->width) {
-		x = 0;
-		y += 8;
-	    }
-	}
+        int idx;
+        if(blocks >= s->blocks || y >= s->height)
+            break;//all blocks decoded
+
+        idx = *buf++;
+        if((idx & 0xF8) == 0x70) {
+            switch(idx) {
+            case 0x70: //change modifier
+                modifier = *buf++;
+                if(modifier>1)
+                    av_log(avctx, AV_LOG_INFO, "warning: modifier must be 0 or 1, got %i\n", modifier);
+                break;
+            case 0x71: // set uniq flag
+                uniq = 1;
+                break;
+            case 0x72: //toggle mode
+                mode = !mode;
+                break;
+            case 0x73: //end-of-frame
+                done = 1;
+                break;
+            case 0x74: //skip some blocks
+                skip = *buf++;
+                if ((blocks + skip) >= s->blocks)
+                    break;
+                blocks += skip;
+                x += skip * 8;
+                while(x >= s->width) {
+                    x -= s->width;
+                    y += 8;
+                }
+                break;
+            default:
+                av_log(avctx, AV_LOG_INFO, "warning: unknown escape 0x%02X\n", idx);
+            }
+        } else { //handle one block
+            int code;
+            int cf;
+            int angle = 0;
+            uint8_t Y[4]; // luma samples of block
+            int tx = 0, ty = 0; //coords of subblock
+            int chroma = 0;
+            if (mode || uniq) {
+                uniq = 0;
+                cf = 1;
+                chroma = 0;
+            } else {
+                cf = 0;
+                if (idx)
+                    chroma = *buf++;
+            }
+            for (i = 0; i < 4; i++) { // for every subblock
+                code = (idx >> (6 - i*2)) & 3; //extract 2 bits
+                if(!code) //skip subblock
+                    continue;
+                if(cf)
+                    chroma = *buf++;
+                tx = x + block_coords[i * 2];
+                ty = y + block_coords[(i * 2) + 1];
+                switch(code) {
+                case 1:
+                    tmp = *buf++;
+
+                    angle = angle_by_index[(tmp >> 6) & 0x3];
+
+                    Y[0] = tmp & 0x3F;
+                    Y[1] = Y[0];
+
+                    if (angle) {
+                        Y[2] = Y[0]+1;
+                        if (Y[2] > 0x3F)
+                            Y[2] = 0x3F;
+                        Y[3] = Y[2];
+                    } else {
+                        Y[2] = Y[0];
+                        Y[3] = Y[0];
+                    }
+                    break;
+
+                case 2:
+                    if (modifier) { // unpack four luma samples
+                        tmp = (*buf++) << 16;
+                        tmp += (*buf++) << 8;
+                        tmp += *buf++;
+
+                        Y[0] = (tmp >> 18) & 0x3F;
+                        Y[1] = (tmp >> 12) & 0x3F;
+                        Y[2] = (tmp >> 6) & 0x3F;
+                        Y[3] = tmp & 0x3F;
+                        angle = 16;
+                    } else { // retrieve luma samples from codebook
+                        tmp = (*buf++) << 8;
+                        tmp += (*buf++);
+
+                        angle = (tmp >> 12) & 0xF;
+                        tmp &= 0xFFF;
+                        tmp <<= 2;
+                        Y[0] = s->ulti_codebook[tmp];
+                        Y[1] = s->ulti_codebook[tmp + 1];
+                        Y[2] = s->ulti_codebook[tmp + 2];
+                        Y[3] = s->ulti_codebook[tmp + 3];
+                    }
+                    break;
+
+                case 3:
+                    if (modifier) { // all 16 luma samples
+                        uint8_t Luma[16];
+
+                        tmp = (*buf++) << 16;
+                        tmp += (*buf++) << 8;
+                        tmp += *buf++;
+                        Luma[0] = (tmp >> 18) & 0x3F;
+                        Luma[1] = (tmp >> 12) & 0x3F;
+                        Luma[2] = (tmp >> 6) & 0x3F;
+                        Luma[3] = tmp & 0x3F;
+
+                        tmp = (*buf++) << 16;
+                        tmp += (*buf++) << 8;
+                        tmp += *buf++;
+                        Luma[4] = (tmp >> 18) & 0x3F;
+                        Luma[5] = (tmp >> 12) & 0x3F;
+                        Luma[6] = (tmp >> 6) & 0x3F;
+                        Luma[7] = tmp & 0x3F;
+
+                        tmp = (*buf++) << 16;
+                        tmp += (*buf++) << 8;
+                        tmp += *buf++;
+                        Luma[8] = (tmp >> 18) & 0x3F;
+                        Luma[9] = (tmp >> 12) & 0x3F;
+                        Luma[10] = (tmp >> 6) & 0x3F;
+                        Luma[11] = tmp & 0x3F;
+
+                        tmp = (*buf++) << 16;
+                        tmp += (*buf++) << 8;
+                        tmp += *buf++;
+                        Luma[12] = (tmp >> 18) & 0x3F;
+                        Luma[13] = (tmp >> 12) & 0x3F;
+                        Luma[14] = (tmp >> 6) & 0x3F;
+                        Luma[15] = tmp & 0x3F;
+
+                        ulti_convert_yuv(&s->frame, tx, ty, Luma, chroma);
+                    } else {
+                        tmp = *buf++;
+                        if(tmp & 0x80) {
+                            angle = (tmp >> 4) & 0x7;
+                            tmp = (tmp << 8) + *buf++;
+                            Y[0] = (tmp >> 6) & 0x3F;
+                            Y[1] = tmp & 0x3F;
+                            Y[2] = (*buf++) & 0x3F;
+                            Y[3] = (*buf++) & 0x3F;
+                            ulti_grad(&s->frame, tx, ty, Y, chroma, angle); //draw block
+                        } else { // some patterns
+                            int f0, f1;
+                            f0 = *buf++;
+                            f1 = tmp;
+                            Y[0] = (*buf++) & 0x3F;
+                            Y[1] = (*buf++) & 0x3F;
+                            ulti_pattern(&s->frame, tx, ty, f1, f0, Y[0], Y[1], chroma);
+                        }
+                    }
+                    break;
+                }
+                if(code != 3)
+                    ulti_grad(&s->frame, tx, ty, Y, chroma, angle); // draw block
+            }
+            blocks++;
+                x += 8;
+            if(x >= s->width) {
+                x = 0;
+                y += 8;
+            }
+        }
     }
-    
+
     *data_size=sizeof(AVFrame);
     *(AVFrame*)data= s->frame;
 
diff --git a/src/libffmpeg/libavcodec/utils.c b/src/libffmpeg/libavcodec/utils.c
index f68d658e7..525fc9a98 100644
--- a/src/libffmpeg/libavcodec/utils.c
+++ b/src/libffmpeg/libavcodec/utils.c
@@ -16,14 +16,14 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
- 
+
 /**
  * @file utils.c
  * utils.
  */
- 
+
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
@@ -59,7 +59,7 @@ void avcodec_default_free_buffers(AVCodecContext *s);
 void *av_mallocz(unsigned int size)
 {
     void *ptr;
-    
+
     ptr = av_malloc(size);
     if (!ptr)
         return NULL;
@@ -84,9 +84,9 @@ char *av_strdup(const char *s)
  */
 void *av_fast_realloc(void *ptr, unsigned int *size, unsigned int min_size)
 {
-    if(min_size < *size) 
+    if(min_size < *size)
         return ptr;
-    
+
     *size= FFMAX(17*min_size/16 + 32, min_size);
 
     return av_realloc(ptr, *size);
@@ -104,7 +104,7 @@ void *av_mallocz_static(unsigned int size)
 {
     void *ptr = av_mallocz(size);
 
-    if(ptr){ 
+    if(ptr){
         array_static =av_fast_realloc(array_static, &allocated_static, sizeof(void*)*(last_static+1));
         if(!array_static)
             return NULL;
@@ -149,9 +149,9 @@ void av_free_static(void)
  * Call av_free_static automatically before it's too late
  */
 
-static void do_free() __attribute__ ((destructor));
+static void do_free(void) __attribute__ ((destructor));
 
-static void do_free()
+static void do_free(void)
 {
     av_free_static();
 }
@@ -198,9 +198,9 @@ typedef struct InternalBuffer{
 #define ALIGN(x, a) (((x)+(a)-1)&~((a)-1))
 
 void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height){
-    int w_align= 1;    
-    int h_align= 1;    
-    
+    int w_align= 1;
+    int h_align= 1;
+
     switch(s->pix_fmt){
     case PIX_FMT_YUV420P:
     case PIX_FMT_YUV422:
@@ -254,7 +254,7 @@ void avcodec_align_dimensions(AVCodecContext *s, int *width, int *height){
 int avcodec_check_dimensions(void *av_log_ctx, unsigned int w, unsigned int h){
     if((int)w>0 && (int)h>0 && (w+128)*(uint64_t)(h+128) < INT_MAX/4)
         return 0;
-    
+
     av_log(av_log_ctx, AV_LOG_ERROR, "picture size invalid (%ux%u)\n", w, h);
     return -1;
 }
@@ -277,64 +277,65 @@ int avcodec_default_get_buffer(AVCodecContext *s, AVFrame *pic){
     }
 #if 0
     s->internal_buffer= av_fast_realloc(
-        s->internal_buffer, 
-        &s->internal_buffer_size, 
+        s->internal_buffer,
+        &s->internal_buffer_size,
         sizeof(InternalBuffer)*FFMAX(99,  s->internal_buffer_count+1)/*FIXME*/
         );
 #endif
-     
+
     buf= &((InternalBuffer*)s->internal_buffer)[s->internal_buffer_count];
     picture_number= &(((InternalBuffer*)s->internal_buffer)[INTERNAL_BUFFER_SIZE-1]).last_pic_num; //FIXME ugly hack
     (*picture_number)++;
-    
+
     if(buf->base[0]){
         pic->age= *picture_number - buf->last_pic_num;
         buf->last_pic_num= *picture_number;
     }else{
         int h_chroma_shift, v_chroma_shift;
-        int pixel_size;
-        
+        int pixel_size, size[3];
+        AVPicture picture;
+
         avcodec_get_chroma_sub_sample(s->pix_fmt, &h_chroma_shift, &v_chroma_shift);
-        
-        switch(s->pix_fmt){
-        case PIX_FMT_RGB555:
-        case PIX_FMT_RGB565:
-        case PIX_FMT_YUV422:
-        case PIX_FMT_UYVY422:
-            pixel_size=2;
-            break;
-        case PIX_FMT_RGB24:
-        case PIX_FMT_BGR24:
-            pixel_size=3;
-            break;
-        case PIX_FMT_RGBA32:
-            pixel_size=4;
-            break;
-        default:
-            pixel_size=1;
-        }
 
         avcodec_align_dimensions(s, &w, &h);
-            
+
         if(!(s->flags&CODEC_FLAG_EMU_EDGE)){
             w+= EDGE_WIDTH*2;
             h+= EDGE_WIDTH*2;
         }
-        
+        avpicture_fill(&picture, NULL, s->pix_fmt, w, h);
+        pixel_size= picture.linesize[0]*8 / w;
+//av_log(NULL, AV_LOG_ERROR, "%d %d %d %d\n", (int)picture.data[1], w, h, s->pix_fmt);
+        assert(pixel_size>=1);
+            //FIXME next ensures that linesize= 2^x uvlinesize, thats needed because some MC code assumes it
+        if(pixel_size == 3*8)
+            w= ALIGN(w, STRIDE_ALIGN<<h_chroma_shift);
+        else
+            w= ALIGN(pixel_size*w, STRIDE_ALIGN<<(h_chroma_shift+3)) / pixel_size;
+        size[1] = avpicture_fill(&picture, NULL, s->pix_fmt, w, h);
+        size[0] = picture.linesize[0] * h;
+        size[1] -= size[0];
+        if(picture.data[2])
+            size[1]= size[2]= size[1]/2;
+        else
+            size[2]= 0;
+
         buf->last_pic_num= -256*256*256*64;
+        memset(buf->base, 0, sizeof(buf->base));
+        memset(buf->data, 0, sizeof(buf->data));
 
-        for(i=0; i<3; i++){
+        for(i=0; i<3 && size[i]; i++){
             const int h_shift= i==0 ? 0 : h_chroma_shift;
             const int v_shift= i==0 ? 0 : v_chroma_shift;
 
-            //FIXME next ensures that linesize= 2^x uvlinesize, thats needed because some MC code assumes it
-            buf->linesize[i]= ALIGN(pixel_size*w>>h_shift, STRIDE_ALIGN<<(h_chroma_shift-h_shift)); 
+            buf->linesize[i]= picture.linesize[i];
 
-            buf->base[i]= av_malloc((buf->linesize[i]*h>>v_shift)+16); //FIXME 16
+            buf->base[i]= av_malloc(size[i]+16); //FIXME 16
             if(buf->base[i]==NULL) return -1;
-            memset(buf->base[i], 128, buf->linesize[i]*h>>v_shift);
-        
-            if(s->flags&CODEC_FLAG_EMU_EDGE)
+            memset(buf->base[i], 128, size[i]);
+
+            // no edge if EDEG EMU or not planar YUV, we check for PAL8 redundantly to protect against a exploitable bug regression ...
+            if((s->flags&CODEC_FLAG_EMU_EDGE) || (s->pix_fmt == PIX_FMT_PAL8) || !size[2])
                 buf->data[i] = buf->base[i];
             else
                 buf->data[i] = buf->base[i] + ALIGN((buf->linesize[i]*EDGE_WIDTH>>v_shift) + (EDGE_WIDTH>>h_shift), STRIDE_ALIGN);
@@ -431,7 +432,7 @@ static const char* context_to_name(void* ptr) {
     AVCodecContext *avc= ptr;
 
     if(avc && avc->codec && avc->codec->name)
-        return avc->codec->name; 
+        return avc->codec->name;
     else
         return "NULL";
 }
@@ -442,7 +443,7 @@ static const char* context_to_name(void* ptr) {
 #define V AV_OPT_FLAG_VIDEO_PARAM
 #define A AV_OPT_FLAG_AUDIO_PARAM
 #define S AV_OPT_FLAG_SUBTITLE_PARAM
-#define E AV_OPT_FLAG_ENCODING_PARAM 
+#define E AV_OPT_FLAG_ENCODING_PARAM
 #define D AV_OPT_FLAG_DECODING_PARAM
 
 static AVOption options[]={
@@ -493,6 +494,7 @@ static AVOption options[]={
 {"rate_emu", NULL, OFFSET(rate_emu), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
 {"sample_rate", NULL, OFFSET(sample_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
 {"channels", NULL, OFFSET(channels), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"cutoff", "set cutoff bandwidth", OFFSET(cutoff), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, A|E},
 {"frame_size", NULL, OFFSET(frame_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
 {"frame_number", NULL, OFFSET(frame_number), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
 {"real_pict_num", NULL, OFFSET(real_pict_num), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
@@ -505,7 +507,7 @@ static AVOption options[]={
 {"max_b_frames", NULL, OFFSET(max_b_frames), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"b_quant_factor", NULL, OFFSET(b_quant_factor), FF_OPT_TYPE_FLOAT, DEFAULT, FLT_MIN, FLT_MAX, V|E},
 {"rc_strategy", NULL, OFFSET(rc_strategy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"b_frame_strategy", NULL, OFFSET(b_frame_strategy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"b_strategy", NULL, OFFSET(b_frame_strategy), FF_OPT_TYPE_INT, 0, INT_MIN, INT_MAX, V|E},
 {"hurry_up", NULL, OFFSET(hurry_up), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D},
 {"rtp_mode", NULL, OFFSET(rtp_mode), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
 {"rtp_payload_size", NULL, OFFSET(rtp_payload_size), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
@@ -678,7 +680,7 @@ static AVOption options[]={
 {"nr", "noise reduction", OFFSET(noise_reduction), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"rc_init_occupancy", NULL, OFFSET(rc_initial_buffer_occupancy), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"inter_threshold", NULL, OFFSET(inter_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"flags2", NULL, OFFSET(flags2), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
+{"flags2", NULL, OFFSET(flags2), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, V|A|E|D, "flags2"},
 {"error_rate", NULL, OFFSET(error_rate), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX},
 {"antialias", NULL, OFFSET(antialias_algo), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|D, "aa"},
 {"auto", NULL, 0, FF_OPT_TYPE_CONST, FF_AA_AUTO, INT_MIN, INT_MAX, V|D, "aa"},
@@ -701,11 +703,38 @@ static AVOption options[]={
 {"frame_skip_threshold", NULL, OFFSET(frame_skip_threshold), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"frame_skip_factor", NULL, OFFSET(frame_skip_factor), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"frame_skip_exp", NULL, OFFSET(frame_skip_exp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
-{"skipcmp", "frame skip comapare function", OFFSET(frame_skip_cmp), FF_OPT_TYPE_INT, FF_CMP_DCTMAX, INT_MIN, INT_MAX, V|E, "cmp_func"},
+{"skipcmp", "frame skip compare function", OFFSET(frame_skip_cmp), FF_OPT_TYPE_INT, FF_CMP_DCTMAX, INT_MIN, INT_MAX, V|E, "cmp_func"},
 {"border_mask", NULL, OFFSET(border_masking), FF_OPT_TYPE_FLOAT, DEFAULT, FLT_MIN, FLT_MAX, V|E},
 {"mb_lmin", NULL, OFFSET(mb_lmin), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"mb_lmax", NULL, OFFSET(mb_lmax), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
 {"me_penalty_compensation", NULL, OFFSET(me_penalty_compensation), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"bidir_refine", NULL, OFFSET(bidir_refine), FF_OPT_TYPE_INT, DEFAULT, 0, 4, V|E},
+{"brd_scale", NULL, OFFSET(brd_scale), FF_OPT_TYPE_INT, DEFAULT, 0, 10, V|E},
+{"crf", NULL, OFFSET(crf), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"cqp", NULL, OFFSET(cqp), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"keyint_min", NULL, OFFSET(keyint_min), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"refs", NULL, OFFSET(refs), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"chromaoffset", NULL, OFFSET(chromaoffset), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"bframebias", NULL, OFFSET(bframebias), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"trellis", NULL, OFFSET(trellis), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"directpred", NULL, OFFSET(directpred), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"bpyramid", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BPYRAMID, INT_MIN, INT_MAX, V|E, "flags2"},
+{"wpred", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_WPRED, INT_MIN, INT_MAX, V|E, "flags2"},
+{"mixed_refs", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_MIXED_REFS, INT_MIN, INT_MAX, V|E, "flags2"},
+{"8x8dct", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_8X8DCT, INT_MIN, INT_MAX, V|E, "flags2"},
+{"fastpskip", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_FASTPSKIP, INT_MIN, INT_MAX, V|E, "flags2"},
+{"aud", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_AUD, INT_MIN, INT_MAX, V|E, "flags2"},
+{"brdo", NULL, 0, FF_OPT_TYPE_CONST, CODEC_FLAG2_BRDO, INT_MIN, INT_MAX, V|E, "flags2"},
+{"complexityblur", NULL, OFFSET(complexityblur), FF_OPT_TYPE_FLOAT, DEFAULT, FLT_MIN, FLT_MAX, V|E},
+{"deblockalpha", NULL, OFFSET(deblockalpha), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"deblockbeta", NULL, OFFSET(deblockbeta), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E},
+{"partitions", NULL, OFFSET(partitions), FF_OPT_TYPE_FLAGS, DEFAULT, INT_MIN, INT_MAX, V|E, "partitions"},
+{"parti4x4", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_I4X4, INT_MIN, INT_MAX, V|E, "partitions"},
+{"parti8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_I8X8, INT_MIN, INT_MAX, V|E, "partitions"},
+{"partp4x4", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_P4X4, INT_MIN, INT_MAX, V|E, "partitions"},
+{"partp8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_P8X8, INT_MIN, INT_MAX, V|E, "partitions"},
+{"partb8x8", NULL, 0, FF_OPT_TYPE_CONST, X264_PART_B8X8, INT_MIN, INT_MAX, V|E, "partitions"},
+{"sc_factor", NULL, OFFSET(scenechange_factor), FF_OPT_TYPE_INT, 6, 0, INT_MAX, V|E},
 {NULL},
 };
 
@@ -725,7 +754,13 @@ void avcodec_get_context_defaults(AVCodecContext *s){
     s->mb_lmin= FF_QP2LAMBDA * 2;
     s->mb_lmax= FF_QP2LAMBDA * 31;
     s->rc_eq= "tex^qComp";
+    s->cqp = -1;
+    s->refs = 1;
+    s->directpred = 2;
     s->qcompress= 0.5;
+    s->complexityblur = 20.0;
+    s->keyint_min = 25;
+    s->flags2 = CODEC_FLAG2_FASTPSKIP;
     s->max_qdiff= 3;
     s->b_quant_factor=1.25;
     s->b_quant_offset=1.25;
@@ -753,7 +788,7 @@ void avcodec_get_context_defaults(AVCodecContext *s){
     s->pix_fmt= PIX_FMT_NONE;
     s->frame_skip_cmp= FF_CMP_DCTMAX;
     s->nsse_weight= 8;
-    
+
     s->intra_quant_bias= FF_DEFAULT_QUANT_BIAS;
     s->inter_quant_bias= FF_DEFAULT_QUANT_BIAS;
     s->palctrl = NULL;
@@ -762,15 +797,15 @@ void avcodec_get_context_defaults(AVCodecContext *s){
 
 /**
  * allocates a AVCodecContext and set it to defaults.
- * this can be deallocated by simply calling free() 
+ * this can be deallocated by simply calling free()
  */
 AVCodecContext *avcodec_alloc_context(void){
     AVCodecContext *avctx= av_malloc(sizeof(AVCodecContext));
-    
+
     if(avctx==NULL) return NULL;
-    
+
     avcodec_get_context_defaults(avctx);
-    
+
     return avctx;
 }
 
@@ -783,22 +818,22 @@ void avcodec_get_frame_defaults(AVFrame *pic){
 
 /**
  * allocates a AVPFrame and set it to defaults.
- * this can be deallocated by simply calling free() 
+ * this can be deallocated by simply calling free()
  */
 AVFrame *avcodec_alloc_frame(void){
     AVFrame *pic= av_malloc(sizeof(AVFrame));
-    
+
     if(pic==NULL) return NULL;
-    
+
     avcodec_get_frame_defaults(pic);
-    
+
     return pic;
 }
 
 int avcodec_open(AVCodecContext *avctx, AVCodec *codec)
 {
     int ret= -1;
-    
+
     entangled_thread_counter++;
     if(entangled_thread_counter != 1){
         av_log(avctx, AV_LOG_ERROR, "insufficient thread locking around avcodec_open/close()\n");
@@ -813,7 +848,7 @@ int avcodec_open(AVCodecContext *avctx, AVCodec *codec)
     avctx->frame_number = 0;
     if (codec->priv_data_size > 0) {
         avctx->priv_data = av_mallocz(codec->priv_data_size);
-        if (!avctx->priv_data) 
+        if (!avctx->priv_data)
             goto end;
     } else {
         avctx->priv_data = NULL;
@@ -840,7 +875,7 @@ end:
     return ret;
 }
 
-int avcodec_encode_audio(AVCodecContext *avctx, uint8_t *buf, int buf_size, 
+int avcodec_encode_audio(AVCodecContext *avctx, uint8_t *buf, int buf_size,
                          const short *samples)
 {
     if(buf_size < FF_MIN_BUFFER_SIZE && 0){
@@ -855,7 +890,7 @@ int avcodec_encode_audio(AVCodecContext *avctx, uint8_t *buf, int buf_size,
         return 0;
 }
 
-int avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size, 
+int avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size,
                          const AVFrame *pict)
 {
     if(buf_size < FF_MIN_BUFFER_SIZE){
@@ -868,13 +903,13 @@ int avcodec_encode_video(AVCodecContext *avctx, uint8_t *buf, int buf_size,
         int ret = avctx->codec->encode(avctx, buf, buf_size, (void *)pict);
         avctx->frame_number++;
         emms_c(); //needed to avoid an emms_c() call before every return;
-    
+
         return ret;
     }else
         return 0;
 }
 
-int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size, 
+int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
                             const AVSubtitle *sub)
 {
     int ret;
@@ -883,31 +918,31 @@ int avcodec_encode_subtitle(AVCodecContext *avctx, uint8_t *buf, int buf_size,
     return ret;
 }
 
-/** 
- * decode a frame. 
+/**
+ * decode a frame.
  * @param buf bitstream buffer, must be FF_INPUT_BUFFER_PADDING_SIZE larger then the actual read bytes
  * because some optimized bitstream readers read 32 or 64 bit at once and could read over the end
  * @param buf_size the size of the buffer in bytes
  * @param got_picture_ptr zero if no frame could be decompressed, Otherwise, it is non zero
  * @return -1 if error, otherwise return the number of
- * bytes used. 
+ * bytes used.
  */
-int avcodec_decode_video(AVCodecContext *avctx, AVFrame *picture, 
+int avcodec_decode_video(AVCodecContext *avctx, AVFrame *picture,
                          int *got_picture_ptr,
                          uint8_t *buf, int buf_size)
 {
     int ret;
-    
+
     *got_picture_ptr= 0;
     if((avctx->coded_width||avctx->coded_height) && avcodec_check_dimensions(avctx,avctx->coded_width,avctx->coded_height))
         return -1;
     if((avctx->codec->capabilities & CODEC_CAP_DELAY) || buf_size){
-        ret = avctx->codec->decode(avctx, picture, got_picture_ptr, 
+        ret = avctx->codec->decode(avctx, picture, got_picture_ptr,
                                 buf, buf_size);
 
         emms_c(); //needed to avoid an emms_c() call before every return;
-    
-        if (*got_picture_ptr)                           
+
+        if (*got_picture_ptr)
             avctx->frame_number++;
     }else
         ret= 0;
@@ -919,7 +954,7 @@ int avcodec_decode_video(AVCodecContext *avctx, AVFrame *picture,
    *number of bytes used. If no frame could be decompressed,
    *frame_size_ptr is zero. Otherwise, it is the decompressed frame
    *size in BYTES. */
-int avcodec_decode_audio(AVCodecContext *avctx, int16_t *samples, 
+int avcodec_decode_audio(AVCodecContext *avctx, int16_t *samples,
                          int *frame_size_ptr,
                          uint8_t *buf, int buf_size)
 {
@@ -927,7 +962,7 @@ int avcodec_decode_audio(AVCodecContext *avctx, int16_t *samples,
 
     *frame_size_ptr= 0;
     if((avctx->codec->capabilities & CODEC_CAP_DELAY) || buf_size){
-        ret = avctx->codec->decode(avctx, samples, frame_size_ptr, 
+        ret = avctx->codec->decode(avctx, samples, frame_size_ptr,
                                 buf, buf_size);
         avctx->frame_number++;
     }else
@@ -945,7 +980,7 @@ int avcodec_decode_subtitle(AVCodecContext *avctx, AVSubtitle *sub,
     int ret;
 
     *got_sub_ptr = 0;
-    ret = avctx->codec->decode(avctx, sub, got_sub_ptr, 
+    ret = avctx->codec->decode(avctx, sub, got_sub_ptr,
                                (uint8_t *)buf, buf_size);
     if (*got_sub_ptr)
         avctx->frame_number++;
@@ -1047,9 +1082,9 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
         codec_name = enc->codec_name;
     } else {
         /* output avi tags */
-        if(   isprint(enc->codec_tag&0xFF) && isprint((enc->codec_tag>>8)&0xFF) 
+        if(   isprint(enc->codec_tag&0xFF) && isprint((enc->codec_tag>>8)&0xFF)
            && isprint((enc->codec_tag>>16)&0xFF) && isprint((enc->codec_tag>>24)&0xFF)){
-            snprintf(buf1, sizeof(buf1), "%c%c%c%c / 0x%04X", 
+            snprintf(buf1, sizeof(buf1), "%c%c%c%c / 0x%04X",
                      enc->codec_tag & 0xff,
                      (enc->codec_tag >> 8) & 0xff,
                      (enc->codec_tag >> 16) & 0xff,
@@ -1112,7 +1147,7 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
                      enc->sample_rate,
                      channels_str);
         }
-        
+
         /* for PCM codecs, compute bitrate directly */
         switch(enc->codec_id) {
         case CODEC_ID_PCM_S32LE:
@@ -1166,7 +1201,7 @@ void avcodec_string(char *buf, int buf_size, AVCodecContext *enc, int encode)
                      ", pass 2");
     }
     if (bitrate != 0) {
-        snprintf(buf + strlen(buf), buf_size - strlen(buf), 
+        snprintf(buf + strlen(buf), buf_size - strlen(buf),
                  ", %d kb/s", bitrate / 1000);
     }
 }
@@ -1187,7 +1222,7 @@ void avcodec_init(void)
     static int inited = 0;
 
     if (inited != 0)
-	return;
+        return;
     inited = 1;
 
     dsputil_static_init();
@@ -1206,7 +1241,7 @@ void avcodec_default_free_buffers(AVCodecContext *s){
     int i, j;
 
     if(s->internal_buffer==NULL) return;
-    
+
     for(i=0; i<INTERNAL_BUFFER_SIZE; i++){
         InternalBuffer *buf= &((InternalBuffer*)s->internal_buffer)[i];
         for(j=0; j<4; j++){
@@ -1215,18 +1250,18 @@ void avcodec_default_free_buffers(AVCodecContext *s){
         }
     }
     av_freep(&s->internal_buffer);
-    
+
     s->internal_buffer_count=0;
 }
 
 char av_get_pict_type_char(int pict_type){
     switch(pict_type){
-    case I_TYPE: return 'I'; 
-    case P_TYPE: return 'P'; 
-    case B_TYPE: return 'B'; 
-    case S_TYPE: return 'S'; 
-    case SI_TYPE:return 'i'; 
-    case SP_TYPE:return 'p'; 
+    case I_TYPE: return 'I';
+    case P_TYPE: return 'P';
+    case B_TYPE: return 'B';
+    case S_TYPE: return 'S';
+    case SI_TYPE:return 'i';
+    case SP_TYPE:return 'p';
     default:     return '?';
     }
 }
@@ -1240,15 +1275,15 @@ static void av_log_default_callback(void* ptr, int level, const char* fmt, va_li
     static int print_prefix=1;
     AVClass* avc= ptr ? *(AVClass**)ptr : NULL;
     if(level>av_log_level)
-	return;
+        return;
 /* #undef fprintf */
     if(print_prefix && avc) {
-	    fprintf(stderr, "[%s @ %p]", avc->item_name(ptr), avc);
+            fprintf(stderr, "[%s @ %p]", avc->item_name(ptr), avc);
     }
 /* #define fprintf please_use_av_log */
-        
+
     print_prefix= strstr(fmt, "\n") != NULL;
-        
+
     vfprintf(stderr, fmt, vl);
 }
 
diff --git a/src/libffmpeg/libavcodec/vcr1.c b/src/libffmpeg/libavcodec/vcr1.c
index 442ad9136..6012752eb 100644
--- a/src/libffmpeg/libavcodec/vcr1.c
+++ b/src/libffmpeg/libavcodec/vcr1.c
@@ -14,14 +14,14 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
- 
+
 /**
  * @file vcr1.c
  * ati vcr1 codec.
  */
- 
+
 #include "avcodec.h"
 #include "mpegvideo.h"
 
@@ -35,7 +35,7 @@ typedef struct VCR1Context{
     int offset[4];
 } VCR1Context;
 
-static int decode_frame(AVCodecContext *avctx, 
+static int decode_frame(AVCodecContext *avctx,
                         void *data, int *data_size,
                         uint8_t *buf, int buf_size)
 {
@@ -60,7 +60,7 @@ static int decode_frame(AVCodecContext *avctx,
         a->delta[i]= *(bytestream++);
         bytestream++;
     }
-    
+
     for(y=0; y<avctx->height; y++){
         int offset;
         uint8_t *luma= &a->picture.data[0][ y*a->picture.linesize[0] ];
@@ -79,10 +79,10 @@ static int decode_frame(AVCodecContext *avctx,
                 luma[2]=( offset += a->delta[ bytestream[0]&0xF ]);
                 luma[3]=( offset += a->delta[ bytestream[0]>>4  ]);
                 luma += 4;
-                
+
                 *(cb++) = bytestream[3];
                 *(cr++) = bytestream[1];
-                
+
                 bytestream+= 4;
             }
         }else{
@@ -107,7 +107,7 @@ static int decode_frame(AVCodecContext *avctx,
     *data_size = sizeof(AVPicture);
 
     emms_c();
-    
+
     return buf_size;
 }
 
@@ -124,13 +124,13 @@ static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size,
     p->key_frame= 1;
 
     emms_c();
-    
+
     align_put_bits(&a->pb);
     while(get_bit_count(&a->pb)&31)
         put_bits(&a->pb, 8, 0);
-    
+
     size= get_bit_count(&a->pb)/32;
-    
+
     return size*4;
 }
 #endif
@@ -143,9 +143,9 @@ static void common_init(AVCodecContext *avctx){
 }
 
 static int decode_init(AVCodecContext *avctx){
- 
+
     common_init(avctx);
-    
+
     avctx->pix_fmt= PIX_FMT_YUV410P;
 
     return 0;
@@ -153,9 +153,9 @@ static int decode_init(AVCodecContext *avctx){
 
 #if 0
 static int encode_init(AVCodecContext *avctx){
- 
+
     common_init(avctx);
-    
+
     return 0;
 }
 #endif
diff --git a/src/libffmpeg/libavcodec/vmdav.c b/src/libffmpeg/libavcodec/vmdav.c
index 37b85d7cc..34685b676 100644
--- a/src/libffmpeg/libavcodec/vmdav.c
+++ b/src/libffmpeg/libavcodec/vmdav.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -28,7 +28,7 @@
  * The video decoder outputs PAL8 colorspace data. The decoder expects
  * a 0x330-byte VMD file header to be transmitted via extradata during
  * codec initialization. Each encoded frame that is sent to this decoder
- * is expected to be prepended with the appropriate 16-byte frame 
+ * is expected to be prepended with the appropriate 16-byte frame
  * information record from the VMD file.
  *
  * The audio decoder, like the video decoder, expects each encoded data
@@ -143,7 +143,7 @@ static void lz_unpack(unsigned char *src, unsigned char *dest, int dest_len)
     }
 }
 
-static int rle_unpack(unsigned char *src, unsigned char *dest, 
+static int rle_unpack(unsigned char *src, unsigned char *dest,
     int src_len, int dest_len)
 {
     unsigned char *ps;
@@ -212,7 +212,7 @@ static void vmd_decode(VmdVideoContext *s)
     if (frame_x || frame_y || (frame_width != s->avctx->width) ||
         (frame_height != s->avctx->height)) {
 
-        memcpy(s->frame.data[0], s->prev_frame.data[0], 
+        memcpy(s->frame.data[0], s->prev_frame.data[0],
             s->avctx->height * s->frame.linesize[0]);
     }
 
@@ -331,7 +331,7 @@ static int vmdvideo_decode_init(AVCodecContext *avctx)
 
     /* make sure the VMD header made it */
     if (s->avctx->extradata_size != VMD_HEADER_SIZE) {
-        av_log(s->avctx, AV_LOG_ERROR, "VMD video: expected extradata size of %d\n", 
+        av_log(s->avctx, AV_LOG_ERROR, "VMD video: expected extradata size of %d\n",
             VMD_HEADER_SIZE);
         return -1;
     }
@@ -431,7 +431,7 @@ static int vmdaudio_decode_init(AVCodecContext *avctx)
     s->block_align = avctx->block_align;
 
     av_log(s->avctx, AV_LOG_DEBUG, "%d channels, %d bits/sample, block align = %d, sample rate = %d\n",
-	    s->channels, s->bits, s->block_align, avctx->sample_rate);
+            s->channels, s->bits, s->block_align, avctx->sample_rate);
 
     /* set up the steps8 and steps16 tables */
     for (i = 0; i < 8; i++) {
@@ -489,7 +489,7 @@ static int vmdaudio_loadsound(VmdAudioContext *s, unsigned char *data,
     int i;
 
     if (silence)
-	av_log(s->avctx, AV_LOG_INFO, "silent block!\n");
+        av_log(s->avctx, AV_LOG_INFO, "silent block!\n");
     if (s->channels == 2) {
 
         /* stereo handling */
diff --git a/src/libffmpeg/libavcodec/vorbis.c b/src/libffmpeg/libavcodec/vorbis.c
index 3cc81520a..9cc09bed1 100644
--- a/src/libffmpeg/libavcodec/vorbis.c
+++ b/src/libffmpeg/libavcodec/vorbis.c
@@ -15,7 +15,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -46,7 +46,7 @@
 /**
  *  reads 0-32 bits when using the ALT_BITSTREAM_READER_LE bitstream reader
  */
-unsigned int get_bits_long_le(GetBitContext *s, int n){
+static unsigned int get_bits_long_le(GetBitContext *s, int n){
     if(n<=17) return get_bits(s, n);
     else{
         int ret= get_bits(s, 16);
@@ -56,6 +56,9 @@ unsigned int get_bits_long_le(GetBitContext *s, int n){
 
 #define ilog(i) av_log2(2*(i))
 
+#define BARK(x) \
+    (13.1f*atan(0.00074f*(x))+2.24f*atan(1.85e-8f*(x)*(x))+1e-4f*(x))
+
 static unsigned int nth_root(unsigned int x, unsigned int n) {   // x^(1/n)
     unsigned int ret=0, i, j;
 
@@ -166,10 +169,18 @@ static void vorbis_free(vorbis_context *vc) {
     av_freep(&vc->codebooks);
 
     for(i=0;i<vc->floor_count;++i) {
-        av_free(vc->floors[i].x_list);
-        av_free(vc->floors[i].x_list_order);
-        av_free(vc->floors[i].low_neighbour);
-        av_free(vc->floors[i].high_neighbour);
+        if(vc->floors[i].floor_type==0) {
+            av_free(vc->floors[i].data.t0.map[0]);
+            av_free(vc->floors[i].data.t0.map[1]);
+            av_free(vc->floors[i].data.t0.book_list);
+            av_free(vc->floors[i].data.t0.lsp);
+        }
+        else {
+            av_free(vc->floors[i].data.t1.x_list);
+            av_free(vc->floors[i].data.t1.x_list_order);
+            av_free(vc->floors[i].data.t1.low_neighbour);
+            av_free(vc->floors[i].data.t1.high_neighbour);
+        }
     }
     av_freep(&vc->floors);
 
@@ -208,18 +219,18 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc) {
         AV_DEBUG(" %d. Codebook \n", cb);
 
         if (get_bits(gb, 24)!=0x564342) {
-            av_log(vc->avccontext, AV_LOG_ERROR, " %d. Codebook setup data corrupt. \n", cb);
+            av_log(vc->avccontext, AV_LOG_ERROR, " %"PRIdFAST16". Codebook setup data corrupt. \n", cb);
             goto error;
         }
 
         codebook_setup->dimensions=get_bits(gb, 16);
         if (codebook_setup->dimensions>16) {
-            av_log(vc->avccontext, AV_LOG_ERROR, " %d. Codebook's dimension is too large (%d). \n", cb, codebook_setup->dimensions);
+            av_log(vc->avccontext, AV_LOG_ERROR, " %"PRIdFAST16". Codebook's dimension is too large (%d). \n", cb, codebook_setup->dimensions);
             goto error;
         }
         entries=get_bits(gb, 24);
         if (entries>V_MAX_VLCS) {
-            av_log(vc->avccontext, AV_LOG_ERROR, " %d. Codebook has too many entries (%d). \n", cb, entries);
+            av_log(vc->avccontext, AV_LOG_ERROR, " %"PRIdFAST16". Codebook has too many entries (%"PRIdFAST32"). \n", cb, entries);
             goto error;
         }
 
@@ -366,7 +377,7 @@ static int vorbis_parse_setup_hdr_codebooks(vorbis_context *vc) {
         else                                       codebook_setup->nb_bits=V_NB_BITS;
 
         codebook_setup->maxdepth=(codebook_setup->maxdepth+codebook_setup->nb_bits-1)/codebook_setup->nb_bits;
-        
+
         if (init_vlc(&codebook_setup->vlc, codebook_setup->nb_bits, entries, tmp_vlc_bits, sizeof(*tmp_vlc_bits), sizeof(*tmp_vlc_bits), tmp_vlc_codes, sizeof(*tmp_vlc_codes), sizeof(*tmp_vlc_codes), INIT_VLC_LE)) {
             av_log(vc->avccontext, AV_LOG_ERROR, " Error generating vlc tables. \n");
             goto error;
@@ -404,8 +415,13 @@ static int vorbis_parse_setup_hdr_tdtransforms(vorbis_context *vc) {
     return 0;
 }
 
-// Process floors part - only floor type 1 is supported
+// Process floors part
 
+static uint_fast8_t vorbis_floor0_decode(vorbis_context *vc,
+                                         vorbis_floor_data *vfu, float *vec);
+static void create_map( vorbis_context * vc, uint_fast8_t floor_number );
+static uint_fast8_t vorbis_floor1_decode(vorbis_context *vc,
+                                         vorbis_floor_data *vfu, float *vec);
 static int vorbis_parse_setup_hdr_floors(vorbis_context *vc) {
     GetBitContext *gb=&vc->gb;
     uint_fast16_t i,j,k;
@@ -426,102 +442,177 @@ static int vorbis_parse_setup_hdr_floors(vorbis_context *vc) {
             uint_fast8_t rangebits;
             uint_fast16_t floor1_values=2;
 
-            floor_setup->partitions=get_bits(gb, 5);
+            floor_setup->decode=vorbis_floor1_decode;
+
+            floor_setup->data.t1.partitions=get_bits(gb, 5);
 
-            AV_DEBUG(" %d.floor: %d partitions \n", i, floor_setup->partitions);
+            AV_DEBUG(" %d.floor: %d partitions \n", i, floor_setup->data.t1.partitions);
 
-            for(j=0;j<floor_setup->partitions;++j) {
-                floor_setup->partition_class[j]=get_bits(gb, 4);
-                if (floor_setup->partition_class[j]>maximum_class) maximum_class=floor_setup->partition_class[j];
+            for(j=0;j<floor_setup->data.t1.partitions;++j) {
+                floor_setup->data.t1.partition_class[j]=get_bits(gb, 4);
+                if (floor_setup->data.t1.partition_class[j]>maximum_class) maximum_class=floor_setup->data.t1.partition_class[j];
 
-                AV_DEBUG(" %d. floor %d partition class %d \n", i, j, floor_setup->partition_class[j]);
+                AV_DEBUG(" %d. floor %d partition class %d \n", i, j, floor_setup->data.t1.partition_class[j]);
 
             }
 
             AV_DEBUG(" maximum class %d \n", maximum_class);
 
-            floor_setup->maximum_class=maximum_class;
+            floor_setup->data.t1.maximum_class=maximum_class;
 
             for(j=0;j<=maximum_class;++j) {
-                floor_setup->class_dimensions[j]=get_bits(gb, 3)+1;
-                floor_setup->class_subclasses[j]=get_bits(gb, 2);
+                floor_setup->data.t1.class_dimensions[j]=get_bits(gb, 3)+1;
+                floor_setup->data.t1.class_subclasses[j]=get_bits(gb, 2);
 
-                AV_DEBUG(" %d floor %d class dim: %d subclasses %d \n", i, j, floor_setup->class_dimensions[j], floor_setup->class_subclasses[j]);
+                AV_DEBUG(" %d floor %d class dim: %d subclasses %d \n", i, j, floor_setup->data.t1.class_dimensions[j], floor_setup->data.t1.class_subclasses[j]);
 
-                if (floor_setup->class_subclasses[j]) {
-                    floor_setup->class_masterbook[j]=get_bits(gb, 8);
+                if (floor_setup->data.t1.class_subclasses[j]) {
+                    floor_setup->data.t1.class_masterbook[j]=get_bits(gb, 8);
 
-                    AV_DEBUG("   masterbook: %d \n", floor_setup->class_masterbook[j]);
+                    AV_DEBUG("   masterbook: %d \n", floor_setup->data.t1.class_masterbook[j]);
                 }
 
-                for(k=0;k<(1<<floor_setup->class_subclasses[j]);++k) {
-                    floor_setup->subclass_books[j][k]=get_bits(gb, 8)-1;
+                for(k=0;k<(1<<floor_setup->data.t1.class_subclasses[j]);++k) {
+                    floor_setup->data.t1.subclass_books[j][k]=get_bits(gb, 8)-1;
 
-                    AV_DEBUG("    book %d. : %d \n", k, floor_setup->subclass_books[j][k]);
+                    AV_DEBUG("    book %d. : %d \n", k, floor_setup->data.t1.subclass_books[j][k]);
                 }
             }
 
-            floor_setup->multiplier=get_bits(gb, 2)+1;
-            floor_setup->x_list_dim=2;
+            floor_setup->data.t1.multiplier=get_bits(gb, 2)+1;
+            floor_setup->data.t1.x_list_dim=2;
 
-            for(j=0;j<floor_setup->partitions;++j) {
-                floor_setup->x_list_dim+=floor_setup->class_dimensions[floor_setup->partition_class[j]];
+            for(j=0;j<floor_setup->data.t1.partitions;++j) {
+                floor_setup->data.t1.x_list_dim+=floor_setup->data.t1.class_dimensions[floor_setup->data.t1.partition_class[j]];
             }
 
-            floor_setup->x_list=(uint_fast16_t *)av_mallocz(floor_setup->x_list_dim * sizeof(uint_fast16_t));
-            floor_setup->x_list_order=(uint_fast16_t *)av_mallocz(floor_setup->x_list_dim * sizeof(uint_fast16_t));
-            floor_setup->low_neighbour=(uint_fast16_t *)av_mallocz(floor_setup->x_list_dim * sizeof(uint_fast16_t));
-            floor_setup->high_neighbour=(uint_fast16_t *)av_mallocz(floor_setup->x_list_dim * sizeof(uint_fast16_t));
+            floor_setup->data.t1.x_list=(uint_fast16_t *)av_mallocz(floor_setup->data.t1.x_list_dim * sizeof(uint_fast16_t));
+            floor_setup->data.t1.x_list_order=(uint_fast16_t *)av_mallocz(floor_setup->data.t1.x_list_dim * sizeof(uint_fast16_t));
+            floor_setup->data.t1.low_neighbour=(uint_fast16_t *)av_mallocz(floor_setup->data.t1.x_list_dim * sizeof(uint_fast16_t));
+            floor_setup->data.t1.high_neighbour=(uint_fast16_t *)av_mallocz(floor_setup->data.t1.x_list_dim * sizeof(uint_fast16_t));
 
 
             rangebits=get_bits(gb, 4);
-            floor_setup->x_list[0] = 0;
-            floor_setup->x_list[1] = (1<<rangebits);
+            floor_setup->data.t1.x_list[0] = 0;
+            floor_setup->data.t1.x_list[1] = (1<<rangebits);
 
-            for(j=0;j<floor_setup->partitions;++j) {
-                for(k=0;k<floor_setup->class_dimensions[floor_setup->partition_class[j]];++k,++floor1_values) {
-                    floor_setup->x_list[floor1_values]=get_bits(gb, rangebits);
+            for(j=0;j<floor_setup->data.t1.partitions;++j) {
+                for(k=0;k<floor_setup->data.t1.class_dimensions[floor_setup->data.t1.partition_class[j]];++k,++floor1_values) {
+                    floor_setup->data.t1.x_list[floor1_values]=get_bits(gb, rangebits);
 
-                    AV_DEBUG(" %d. floor1 Y coord. %d \n", floor1_values, floor_setup->x_list[floor1_values]);
+                    AV_DEBUG(" %d. floor1 Y coord. %d \n", floor1_values, floor_setup->data.t1.x_list[floor1_values]);
                 }
             }
 
 // Precalculate order of x coordinates - needed for decode
 
-            for(k=0;k<floor_setup->x_list_dim;++k) {
-                floor_setup->x_list_order[k]=k;
+            for(k=0;k<floor_setup->data.t1.x_list_dim;++k) {
+                floor_setup->data.t1.x_list_order[k]=k;
             }
 
-            for(k=0;k<floor_setup->x_list_dim-1;++k) {   // FIXME optimize sorting ?
-                for(j=k+1;j<floor_setup->x_list_dim;++j) {
-                    if(floor_setup->x_list[floor_setup->x_list_order[k]]>floor_setup->x_list[floor_setup->x_list_order[j]]) {
-                        uint_fast16_t tmp=floor_setup->x_list_order[k];
-                        floor_setup->x_list_order[k]=floor_setup->x_list_order[j];
-                        floor_setup->x_list_order[j]=tmp;
+            for(k=0;k<floor_setup->data.t1.x_list_dim-1;++k) {   // FIXME optimize sorting ?
+                for(j=k+1;j<floor_setup->data.t1.x_list_dim;++j) {
+                    if(floor_setup->data.t1.x_list[floor_setup->data.t1.x_list_order[k]]>floor_setup->data.t1.x_list[floor_setup->data.t1.x_list_order[j]]) {
+                        uint_fast16_t tmp=floor_setup->data.t1.x_list_order[k];
+                        floor_setup->data.t1.x_list_order[k]=floor_setup->data.t1.x_list_order[j];
+                        floor_setup->data.t1.x_list_order[j]=tmp;
                     }
                 }
             }
 
 // Precalculate low and high neighbours
 
-            for(k=2;k<floor_setup->x_list_dim;++k) {
-                floor_setup->low_neighbour[k]=0;
-                floor_setup->high_neighbour[k]=1;  // correct according to SPEC requirements
+            for(k=2;k<floor_setup->data.t1.x_list_dim;++k) {
+                floor_setup->data.t1.low_neighbour[k]=0;
+                floor_setup->data.t1.high_neighbour[k]=1;  // correct according to SPEC requirements
 
                 for (j=0;j<k;++j) {
-                    if ((floor_setup->x_list[j]<floor_setup->x_list[k]) &&
-                      (floor_setup->x_list[j]>floor_setup->x_list[floor_setup->low_neighbour[k]])) {
-                        floor_setup->low_neighbour[k]=j;
+                    if ((floor_setup->data.t1.x_list[j]<floor_setup->data.t1.x_list[k]) &&
+                      (floor_setup->data.t1.x_list[j]>floor_setup->data.t1.x_list[floor_setup->data.t1.low_neighbour[k]])) {
+                        floor_setup->data.t1.low_neighbour[k]=j;
                     }
-                    if ((floor_setup->x_list[j]>floor_setup->x_list[k]) &&
-                      (floor_setup->x_list[j]<floor_setup->x_list[floor_setup->high_neighbour[k]])) {
-                        floor_setup->high_neighbour[k]=j;
+                    if ((floor_setup->data.t1.x_list[j]>floor_setup->data.t1.x_list[k]) &&
+                      (floor_setup->data.t1.x_list[j]<floor_setup->data.t1.x_list[floor_setup->data.t1.high_neighbour[k]])) {
+                        floor_setup->data.t1.high_neighbour[k]=j;
                     }
                 }
             }
         }
+        else if(floor_setup->floor_type==0) {
+            uint_fast8_t max_codebook_dim=0;
+
+            floor_setup->decode=vorbis_floor0_decode;
+
+            floor_setup->data.t0.order=get_bits(gb, 8);
+            floor_setup->data.t0.rate=get_bits(gb, 16);
+            floor_setup->data.t0.bark_map_size=get_bits(gb, 16);
+            floor_setup->data.t0.amplitude_bits=get_bits(gb, 6);
+            /* zero would result in a div by zero later *
+             * 2^0 - 1 == 0                             */
+            if (floor_setup->data.t0.amplitude_bits == 0) {
+              av_log(vc->avccontext, AV_LOG_ERROR,
+                     "Floor 0 amplitude bits is 0.\n");
+              return 1;
+            }
+            floor_setup->data.t0.amplitude_offset=get_bits(gb, 8);
+            floor_setup->data.t0.num_books=get_bits(gb, 4)+1;
+
+            /* allocate mem for booklist */
+            floor_setup->data.t0.book_list=
+                av_malloc(floor_setup->data.t0.num_books);
+            if(!floor_setup->data.t0.book_list) { return 1; }
+            /* read book indexes */
+            {
+                int idx;
+                uint_fast8_t book_idx;
+                for (idx=0;idx<floor_setup->data.t0.num_books;++idx) {
+                    book_idx=get_bits(gb, 8);
+                    floor_setup->data.t0.book_list[idx]=book_idx;
+                    if (vc->codebooks[book_idx].dimensions > max_codebook_dim)
+                        max_codebook_dim=vc->codebooks[book_idx].dimensions;
+
+                    if (floor_setup->data.t0.book_list[idx]>vc->codebook_count)
+                        return 1;
+                }
+            }
+
+            create_map( vc, i );
+
+            /* allocate mem for lsp coefficients */
+            {
+                /* codebook dim is for padding if codebook dim doesn't *
+                 * divide order+1 then we need to read more data       */
+                floor_setup->data.t0.lsp=
+                    av_malloc((floor_setup->data.t0.order+1 + max_codebook_dim)
+                              * sizeof(float));
+                if(!floor_setup->data.t0.lsp) { return 1; }
+            }
+
+#ifdef V_DEBUG /* debug output parsed headers */
+            AV_DEBUG("floor0 order: %u\n", floor_setup->data.t0.order);
+            AV_DEBUG("floor0 rate: %u\n", floor_setup->data.t0.rate);
+            AV_DEBUG("floor0 bark map size: %u\n",
+              floor_setup->data.t0.bark_map_size);
+            AV_DEBUG("floor0 amplitude bits: %u\n",
+              floor_setup->data.t0.amplitude_bits);
+            AV_DEBUG("floor0 amplitude offset: %u\n",
+              floor_setup->data.t0.amplitude_offset);
+            AV_DEBUG("floor0 number of books: %u\n",
+              floor_setup->data.t0.num_books);
+            AV_DEBUG("floor0 book list pointer: %p\n",
+              floor_setup->data.t0.book_list);
+            {
+              int idx;
+              for (idx=0;idx<floor_setup->data.t0.num_books;++idx) {
+                AV_DEBUG( "  Book %d: %u\n",
+                  idx+1,
+                  floor_setup->data.t0.book_list[idx] );
+              }
+            }
+#endif
+        }
         else {
-            av_log(vc->avccontext, AV_LOG_ERROR, "Only floor type 1 supported. \n");
+            av_log(vc->avccontext, AV_LOG_ERROR, "Invalid floor type!\n");
             return 1;
         }
     }
@@ -653,6 +744,44 @@ static int vorbis_parse_setup_hdr_mappings(vorbis_context *vc) {
 
 // Process modes part
 
+static void create_map( vorbis_context * vc, uint_fast8_t floor_number )
+{
+    vorbis_floor * floors=vc->floors;
+    vorbis_floor0 * vf;
+    int idx;
+    int_fast8_t blockflag;
+    int_fast32_t * map;
+    int_fast32_t n; //TODO: could theoretically be smaller?
+
+    for (blockflag=0;blockflag<2;++blockflag)
+    {
+    n=(blockflag ? vc->blocksize_1 : vc->blocksize_0) / 2;
+    floors[floor_number].data.t0.map[blockflag]=
+        av_malloc((n+1) * sizeof(int_fast32_t)); // n+sentinel
+
+    map=floors[floor_number].data.t0.map[blockflag];
+    vf=&floors[floor_number].data.t0;
+
+    for (idx=0; idx<n;++idx) {
+        map[idx]=floor( BARK((vf->rate*idx)/(2.0f*n)) *
+                              ((vf->bark_map_size)/
+                               BARK(vf->rate/2.0f )) );
+        if (vf->bark_map_size-1 < map[idx]) {
+            map[idx]=vf->bark_map_size-1;
+        }
+    }
+    map[n]=-1;
+    vf->map_size[blockflag]=n;
+    }
+
+#   ifdef V_DEBUG
+    for(idx=0;idx<=n;++idx) {
+        AV_DEBUG("floor0 map: map at pos %d is %d\n",
+                 idx, map[idx]);
+    }
+#   endif
+}
+
 static int vorbis_parse_setup_hdr_modes(vorbis_context *vc) {
     GetBitContext *gb=&vc->gb;
     uint_fast8_t i;
@@ -859,9 +988,121 @@ static int vorbis_decode_init(AVCodecContext *avccontext) {
 
 // Decode audiopackets -------------------------------------------------
 
-// Read and decode floor (type 1 only)
+// Read and decode floor
+
+static uint_fast8_t vorbis_floor0_decode(vorbis_context *vc,
+                                         vorbis_floor_data *vfu, float *vec) {
+    vorbis_floor0 * vf=&vfu->t0;
+    float * lsp=vf->lsp;
+    uint_fast32_t amplitude;
+    uint_fast32_t book_idx;
+    uint_fast8_t blockflag=vc->modes[vc->mode_number].blockflag;
+
+    amplitude=get_bits(&vc->gb, vf->amplitude_bits);
+    if (amplitude>0) {
+        float last = 0;
+        uint_fast16_t lsp_len = 0;
+        uint_fast16_t idx;
+        vorbis_codebook codebook;
+
+        book_idx=get_bits(&vc->gb, ilog(vf->num_books));
+        if ( book_idx >= vf->num_books ) {
+            av_log( vc->avccontext, AV_LOG_ERROR,
+                    "floor0 dec: booknumber too high!\n" );
+            //FIXME: look above
+        }
+        AV_DEBUG( "floor0 dec: booknumber: %u\n", book_idx );
+        codebook=vc->codebooks[vf->book_list[book_idx]];
+
+        while (lsp_len<vf->order) {
+            int vec_off;
+
+            AV_DEBUG( "floor0 dec: book dimension: %d\n", codebook.dimensions );
+            AV_DEBUG( "floor0 dec: maximum depth: %d\n", codebook.maxdepth );
+            /* read temp vector */
+            vec_off=get_vlc2(&vc->gb,
+                             codebook.vlc.table,
+                             codebook.nb_bits,
+                             codebook.maxdepth ) *
+                             codebook.dimensions;
+            AV_DEBUG( "floor0 dec: vector offset: %d\n", vec_off );
+            /* copy each vector component and add last to it */
+            for (idx=0; idx<codebook.dimensions; ++idx) {
+                lsp[lsp_len+idx]=codebook.codevectors[vec_off+idx]+last;
+            }
+            last=lsp[lsp_len+idx-1]; /* set last to last vector component */
 
-static uint_fast8_t vorbis_floor1_decode(vorbis_context *vc, vorbis_floor *vf, float *vec) {
+            lsp_len += codebook.dimensions;
+        }
+#ifdef V_DEBUG
+        /* DEBUG: output lsp coeffs */
+        {
+            int idx;
+            for ( idx = 0; idx < lsp_len; ++idx )
+                AV_DEBUG("floor0 dec: coeff at %d is %f\n", idx, lsp[idx] );
+        }
+#endif
+
+        /* synthesize floor output vector */
+        {
+            int i;
+            int order=vf->order;
+            float wstep=M_PI/vf->bark_map_size;
+
+            for(i=0;i<order;i++) { lsp[i]=2.0f*cos(lsp[i]); }
+
+            AV_DEBUG("floor0 synth: map_size=%d; m=%d; wstep=%f\n",
+                     vf->map_size, order, wstep);
+
+            i=0;
+            while(i<vf->map_size[blockflag]) {
+                int j, iter_cond=vf->map[blockflag][i];
+                float p=0.5f;
+                float q=0.5f;
+                float two_cos_w=2.0f*cos(wstep*iter_cond); // needed all times
+
+                /* similar part for the q and p products */
+                for(j=0;j<order;j+=2) {
+                    q *= lsp[j]  -two_cos_w;
+                    p *= lsp[j+1]-two_cos_w;
+                }
+                if(j==order) { // even order
+                    p *= p*(2.0f-two_cos_w);
+                    q *= q*(2.0f+two_cos_w);
+                }
+                else { // odd order
+                    q *= two_cos_w-lsp[j]; // one more time for q
+
+                    /* final step and square */
+                    p *= p*(4.f-two_cos_w*two_cos_w);
+                    q *= q;
+                }
+
+                /* calculate linear floor value */
+                {
+                    q=exp( (
+                             ( (amplitude*vf->amplitude_offset)/
+                               (((1<<vf->amplitude_bits)-1) * sqrt(p+q)) )
+                             - vf->amplitude_offset ) * .11512925f
+                         );
+                }
+
+                /* fill vector */
+                do { vec[i]=q; ++i; }while(vf->map[blockflag][i]==iter_cond);
+            }
+        }
+    }
+    else {
+        /* this channel is unused */
+        return 1;
+    }
+
+    AV_DEBUG(" Floor0 decoded\n");
+
+    return 0;
+}
+static uint_fast8_t vorbis_floor1_decode(vorbis_context *vc, vorbis_floor_data *vfu, float *vec) {
+    vorbis_floor1 * vf=&vfu->t1;
     GetBitContext *gb=&vc->gb;
     uint_fast16_t range_v[4]={ 256, 128, 86, 64 };
     uint_fast16_t range=range_v[vf->multiplier-1];
@@ -1221,6 +1462,7 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
     } else {
         mode_number=get_bits(gb, ilog(vc->mode_count-1));
     }
+    vc->mode_number=mode_number;
     mapping=&vc->mappings[vc->modes[mode_number].mapping];
 
     AV_DEBUG(" Mode number: %d , mapping: %d , blocktype %d \n", mode_number, vc->modes[mode_number].mapping, vc->modes[mode_number].blockflag);
@@ -1234,7 +1476,7 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
     memset(ch_res_ptr, 0, sizeof(float)*vc->audio_channels*blocksize/2); //FIXME can this be removed ?
     memset(ch_floor_ptr, 0, sizeof(float)*vc->audio_channels*blocksize/2); //FIXME can this be removed ?
 
-// Decode floor(1)
+// Decode floor
 
     for(i=0;i<vc->audio_channels;++i) {
         vorbis_floor *floor;
@@ -1244,7 +1486,7 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
             floor=&vc->floors[mapping->submap_floor[0]];
         }
 
-        no_residue[i]=vorbis_floor1_decode(vc, floor, ch_floor_ptr);
+        no_residue[i]=floor->decode(vc, &floor->data, ch_floor_ptr);
         ch_floor_ptr+=blocksize/2;
     }
 
diff --git a/src/libffmpeg/libavcodec/vorbis.h b/src/libffmpeg/libavcodec/vorbis.h
index 27274a126..c818207d9 100644
--- a/src/libffmpeg/libavcodec/vorbis.h
+++ b/src/libffmpeg/libavcodec/vorbis.h
@@ -12,21 +12,48 @@ typedef struct {
     unsigned int nb_bits;
 } vorbis_codebook;
 
+typedef union vorbis_floor_u vorbis_floor_data;
+typedef struct vorbis_floor0_s vorbis_floor0;
+typedef struct vorbis_floor1_s vorbis_floor1;
+struct vorbis_context_s;
+typedef
+uint_fast8_t (* vorbis_floor_decode_func)
+             (struct vorbis_context_s *, vorbis_floor_data *, float *);
 typedef struct {
     uint_fast8_t floor_type;
-    uint_fast8_t partitions;
-    uint_fast8_t maximum_class;
-    uint_fast8_t partition_class[32];
-    uint_fast8_t class_dimensions[16];
-    uint_fast8_t class_subclasses[16];
-    uint_fast8_t class_masterbook[16];
-    int_fast16_t subclass_books[16][8];
-    uint_fast8_t multiplier;
-    uint_fast16_t x_list_dim;
-    uint_fast16_t *x_list;
-    uint_fast16_t *x_list_order;
-    uint_fast16_t *low_neighbour;
-    uint_fast16_t *high_neighbour;
+    vorbis_floor_decode_func decode;
+    union vorbis_floor_u
+    {
+        struct vorbis_floor0_s
+        {
+            uint_fast8_t order;
+            uint_fast16_t rate;
+            uint_fast16_t bark_map_size;
+            int_fast32_t * map[2];
+            uint_fast32_t map_size[2];
+            uint_fast8_t amplitude_bits;
+            uint_fast8_t amplitude_offset;
+            uint_fast8_t num_books;
+            uint_fast8_t * book_list;
+            float * lsp;
+        } t0;
+        struct vorbis_floor1_s
+        {
+            uint_fast8_t partitions;
+            uint_fast8_t maximum_class;
+            uint_fast8_t partition_class[32];
+            uint_fast8_t class_dimensions[16];
+            uint_fast8_t class_subclasses[16];
+            uint_fast8_t class_masterbook[16];
+            int_fast16_t subclass_books[16][8];
+            uint_fast8_t multiplier;
+            uint_fast16_t x_list_dim;
+            uint_fast16_t *x_list;
+            uint_fast16_t *x_list_order;
+            uint_fast16_t *low_neighbour;
+            uint_fast16_t *high_neighbour;
+        } t1;
+    } data;
 } vorbis_floor;
 
 typedef struct {
@@ -57,7 +84,7 @@ typedef struct {
     uint_fast8_t mapping;
 } vorbis_mode;
 
-typedef struct {
+typedef struct vorbis_context_s {
     AVCodecContext *avccontext;
     GetBitContext gb;
 
@@ -84,6 +111,7 @@ typedef struct {
     vorbis_mapping *mappings;
     uint_fast8_t mode_count;
     vorbis_mode *modes;
+    uint_fast8_t mode_number; // mode number for the current packet
     float *channel_residues;
     float *channel_floors;
     float *saved;
diff --git a/src/libffmpeg/libavcodec/vp3.c b/src/libffmpeg/libavcodec/vp3.c
index 9cff50e55..a7a9e8bac 100644
--- a/src/libffmpeg/libavcodec/vp3.c
+++ b/src/libffmpeg/libavcodec/vp3.c
@@ -13,7 +13,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -42,9 +42,9 @@
 
 #define FRAGMENT_PIXELS 8
 
-/* 
+/*
  * Debugging Variables
- * 
+ *
  * Define one or more of the following compile-time variables to 1 to obtain
  * elaborate information about certain aspects of the decoding process.
  *
@@ -89,49 +89,49 @@ static inline void debug_init(const char *format, ...) { }
 #if DEBUG_DEQUANTIZERS
 #define debug_dequantizers(args...) av_log(NULL, AV_LOG_DEBUG, ## args)
 #else
-static inline void debug_dequantizers(const char *format, ...) { } 
+static inline void debug_dequantizers(const char *format, ...) { }
 #endif
 
 #if DEBUG_BLOCK_CODING
 #define debug_block_coding(args...) av_log(NULL, AV_LOG_DEBUG, ## args)
 #else
-static inline void debug_block_coding(const char *format, ...) { } 
+static inline void debug_block_coding(const char *format, ...) { }
 #endif
 
 #if DEBUG_MODES
-#define debug_modes(args...) av_log(NULL, AV_LOG_DEBUG, ## args) 
+#define debug_modes(args...) av_log(NULL, AV_LOG_DEBUG, ## args)
 #else
-static inline void debug_modes(const char *format, ...) { } 
+static inline void debug_modes(const char *format, ...) { }
 #endif
 
 #if DEBUG_VECTORS
 #define debug_vectors(args...) av_log(NULL, AV_LOG_DEBUG, ## args)
 #else
-static inline void debug_vectors(const char *format, ...) { } 
+static inline void debug_vectors(const char *format, ...) { }
 #endif
 
-#if DEBUG_TOKEN 
+#if DEBUG_TOKEN
 #define debug_token(args...) av_log(NULL, AV_LOG_DEBUG, ## args)
 #else
-static inline void debug_token(const char *format, ...) { } 
+static inline void debug_token(const char *format, ...) { }
 #endif
 
 #if DEBUG_VLC
 #define debug_vlc(args...) av_log(NULL, AV_LOG_DEBUG, ## args)
 #else
-static inline void debug_vlc(const char *format, ...) { } 
+static inline void debug_vlc(const char *format, ...) { }
 #endif
 
 #if DEBUG_DC_PRED
 #define debug_dc_pred(args...) av_log(NULL, AV_LOG_DEBUG, ## args)
 #else
-static inline void debug_dc_pred(const char *format, ...) { } 
+static inline void debug_dc_pred(const char *format, ...) { }
 #endif
 
 #if DEBUG_IDCT
 #define debug_idct(args...) av_log(NULL, AV_LOG_DEBUG, ## args)
 #else
-static inline void debug_idct(const char *format, ...) { } 
+static inline void debug_idct(const char *format, ...) { }
 #endif
 
 typedef struct Coeff {
@@ -178,39 +178,39 @@ static int ModeAlphabet[7][CODING_MODE_COUNT] =
     { 0, 0, 0, 0, 0, 0, 0, 0 },
 
     /* scheme 1: Last motion vector dominates */
-    {    MODE_INTER_LAST_MV,    MODE_INTER_PRIOR_LAST,  
+    {    MODE_INTER_LAST_MV,    MODE_INTER_PRIOR_LAST,
          MODE_INTER_PLUS_MV,    MODE_INTER_NO_MV,
-         MODE_INTRA,            MODE_USING_GOLDEN,      
+         MODE_INTRA,            MODE_USING_GOLDEN,
          MODE_GOLDEN_MV,        MODE_INTER_FOURMV },
 
     /* scheme 2 */
-    {    MODE_INTER_LAST_MV,    MODE_INTER_PRIOR_LAST,  
+    {    MODE_INTER_LAST_MV,    MODE_INTER_PRIOR_LAST,
          MODE_INTER_NO_MV,      MODE_INTER_PLUS_MV,
-         MODE_INTRA,            MODE_USING_GOLDEN,      
+         MODE_INTRA,            MODE_USING_GOLDEN,
          MODE_GOLDEN_MV,        MODE_INTER_FOURMV },
 
     /* scheme 3 */
-    {    MODE_INTER_LAST_MV,    MODE_INTER_PLUS_MV,     
+    {    MODE_INTER_LAST_MV,    MODE_INTER_PLUS_MV,
          MODE_INTER_PRIOR_LAST, MODE_INTER_NO_MV,
-         MODE_INTRA,            MODE_USING_GOLDEN,      
+         MODE_INTRA,            MODE_USING_GOLDEN,
          MODE_GOLDEN_MV,        MODE_INTER_FOURMV },
 
     /* scheme 4 */
-    {    MODE_INTER_LAST_MV,    MODE_INTER_PLUS_MV,     
+    {    MODE_INTER_LAST_MV,    MODE_INTER_PLUS_MV,
          MODE_INTER_NO_MV,      MODE_INTER_PRIOR_LAST,
-         MODE_INTRA,            MODE_USING_GOLDEN,      
+         MODE_INTRA,            MODE_USING_GOLDEN,
          MODE_GOLDEN_MV,        MODE_INTER_FOURMV },
 
     /* scheme 5: No motion vector dominates */
-    {    MODE_INTER_NO_MV,      MODE_INTER_LAST_MV,     
+    {    MODE_INTER_NO_MV,      MODE_INTER_LAST_MV,
          MODE_INTER_PRIOR_LAST, MODE_INTER_PLUS_MV,
-         MODE_INTRA,            MODE_USING_GOLDEN,      
+         MODE_INTRA,            MODE_USING_GOLDEN,
          MODE_GOLDEN_MV,        MODE_INTER_FOURMV },
 
     /* scheme 6 */
-    {    MODE_INTER_NO_MV,      MODE_USING_GOLDEN,      
+    {    MODE_INTER_NO_MV,      MODE_USING_GOLDEN,
          MODE_INTER_LAST_MV,    MODE_INTER_PRIOR_LAST,
-         MODE_INTER_PLUS_MV,    MODE_INTRA,             
+         MODE_INTER_PLUS_MV,    MODE_INTRA,
          MODE_GOLDEN_MV,        MODE_INTER_FOURMV },
 
 };
@@ -256,9 +256,9 @@ typedef struct Vp3DecodeContext {
     Coeff *next_coeff;
     int u_fragment_start;
     int v_fragment_start;
-    
+
     ScanTable scantable;
-    
+
     /* tables */
     uint16_t coded_dc_scale_factor[64];
     uint32_t coded_ac_scale_factor[64];
@@ -285,9 +285,9 @@ typedef struct Vp3DecodeContext {
 
     /* these arrays need to be on 16-byte boundaries since SSE2 operations
      * index into them */
-    int16_t __align16 intra_y_dequant[64];
-    int16_t __align16 intra_c_dequant[64];
-    int16_t __align16 inter_dequant[64];
+    DECLARE_ALIGNED_16(int16_t, intra_y_dequant[64]);
+    DECLARE_ALIGNED_16(int16_t, intra_c_dequant[64]);
+    DECLARE_ALIGNED_16(int16_t, inter_dequant[64]);
 
     /* This table contains superblock_count * 16 entries. Each set of 16
      * numbers corresponds to the fragment indices 0..15 of the superblock.
@@ -305,7 +305,7 @@ typedef struct Vp3DecodeContext {
      * numbers corresponds to the fragment indices 0..5 which comprise
      * the macroblock (4 Y fragments and 2 C fragments). */
     int *macroblock_fragments;
-    /* This is an array that indicates how a particular macroblock 
+    /* This is an array that indicates how a particular macroblock
      * is coded. */
     unsigned char *macroblock_coding;
 
@@ -342,7 +342,7 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext gb);
  *
  * Returns 0 is successful; returns 1 if *anything* went wrong.
  */
-static int init_block_mapping(Vp3DecodeContext *s) 
+static int init_block_mapping(Vp3DecodeContext *s)
 {
     int i, j;
     signed int hilbert_walk_y[16];
@@ -362,7 +362,7 @@ static int init_block_mapping(Vp3DecodeContext *s)
     int c_fragment;
 
     signed char travel_width[16] = {
-         1,  1,  0, -1, 
+         1,  1,  0, -1,
          0,  0,  1,  0,
          1,  0,  1,  0,
          0, -1,  0,  1
@@ -438,7 +438,7 @@ static int init_block_mapping(Vp3DecodeContext *s)
             bottom_edge = s->fragment_height;
             current_width = -1;
             current_height = 0;
-            superblock_row_inc = 3 * s->fragment_width - 
+            superblock_row_inc = 3 * s->fragment_width -
                 (s->y_superblock_width * 4 - s->fragment_width);
             hilbert = hilbert_walk_y;
 
@@ -452,7 +452,7 @@ static int init_block_mapping(Vp3DecodeContext *s)
             bottom_edge = s->fragment_height / 2;
             current_width = -1;
             current_height = 0;
-            superblock_row_inc = 3 * (s->fragment_width / 2) - 
+            superblock_row_inc = 3 * (s->fragment_width / 2) -
                 (s->c_superblock_width * 4 - s->fragment_width / 2);
             hilbert = hilbert_walk_c;
 
@@ -466,7 +466,7 @@ static int init_block_mapping(Vp3DecodeContext *s)
             bottom_edge = s->fragment_height / 2;
             current_width = -1;
             current_height = 0;
-            superblock_row_inc = 3 * (s->fragment_width / 2) - 
+            superblock_row_inc = 3 * (s->fragment_width / 2) -
                 (s->c_superblock_width * 4 - s->fragment_width / 2);
             hilbert = hilbert_walk_c;
 
@@ -494,12 +494,12 @@ static int init_block_mapping(Vp3DecodeContext *s)
             if ((current_width < right_edge) &&
                 (current_height < bottom_edge)) {
                 s->superblock_fragments[mapping_index] = current_fragment;
-                debug_init("    mapping fragment %d to superblock %d, position %d (%d/%d x %d/%d)\n", 
+                debug_init("    mapping fragment %d to superblock %d, position %d (%d/%d x %d/%d)\n",
                     s->superblock_fragments[mapping_index], i, j,
                     current_width, right_edge, current_height, bottom_edge);
             } else {
                 s->superblock_fragments[mapping_index] = -1;
-                debug_init("    superblock %d, position %d has no fragment (%d/%d x %d/%d)\n", 
+                debug_init("    superblock %d, position %d has no fragment (%d/%d x %d/%d)\n",
                     i, j,
                     current_width, right_edge, current_height, bottom_edge);
             }
@@ -575,31 +575,31 @@ static int init_block_mapping(Vp3DecodeContext *s)
                 s->macroblock_fragments[mapping_index++] = -1;
 
             if (i + 1 < s->fragment_height) {
-                s->all_fragments[current_fragment + s->fragment_width].macroblock = 
+                s->all_fragments[current_fragment + s->fragment_width].macroblock =
                     current_macroblock;
-                s->macroblock_fragments[mapping_index++] = 
+                s->macroblock_fragments[mapping_index++] =
                     current_fragment + s->fragment_width;
                 debug_init("%d ", current_fragment + s->fragment_width);
             } else
                 s->macroblock_fragments[mapping_index++] = -1;
 
             if ((j + 1 < s->fragment_width) && (i + 1 < s->fragment_height)) {
-                s->all_fragments[current_fragment + s->fragment_width + 1].macroblock = 
+                s->all_fragments[current_fragment + s->fragment_width + 1].macroblock =
                     current_macroblock;
-                s->macroblock_fragments[mapping_index++] = 
+                s->macroblock_fragments[mapping_index++] =
                     current_fragment + s->fragment_width + 1;
                 debug_init("%d ", current_fragment + s->fragment_width + 1);
             } else
                 s->macroblock_fragments[mapping_index++] = -1;
 
             /* C planes */
-            c_fragment = s->u_fragment_start + 
+            c_fragment = s->u_fragment_start +
                 (i * s->fragment_width / 4) + (j / 2);
             s->all_fragments[c_fragment].macroblock = s->macroblock_count;
             s->macroblock_fragments[mapping_index++] = c_fragment;
             debug_init("%d ", c_fragment);
 
-            c_fragment = s->v_fragment_start + 
+            c_fragment = s->v_fragment_start +
                 (i * s->fragment_width / 4) + (j / 2);
             s->all_fragments[c_fragment].macroblock = s->macroblock_count;
             s->macroblock_fragments[mapping_index++] = c_fragment;
@@ -609,7 +609,7 @@ static int init_block_mapping(Vp3DecodeContext *s)
 
             if (j + 2 <= s->fragment_width)
                 current_fragment += 2;
-            else 
+            else
                 current_fragment++;
             current_macroblock++;
         }
@@ -653,7 +653,7 @@ static void init_dequantizer(Vp3DecodeContext *s)
 
     debug_vp3("  vp3: initializing dequantization tables\n");
 
-    /* 
+    /*
      * Scale dequantizers:
      *
      *   quantizer * sf
@@ -704,7 +704,7 @@ static void init_dequantizer(Vp3DecodeContext *s)
             s->inter_dequant[j] = MIN_DEQUANT_VAL * 2;
         s->inter_dequant[j] *= SCALER;
     }
-    
+
     memset(s->qscale_table, (FFMAX(s->intra_y_dequant[1], s->intra_c_dequant[1])+8)/16, 512); //FIXME finetune
 
     /* print debug information as requested */
@@ -759,7 +759,7 @@ static void init_loop_filter(Vp3DecodeContext *s)
 }
 
 /*
- * This function unpacks all of the superblock/macroblock/fragment coding 
+ * This function unpacks all of the superblock/macroblock/fragment coding
  * information from the bitstream.
  */
 static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
@@ -785,13 +785,13 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
 
         /* unpack the list of partially-coded superblocks */
         bit = get_bits(gb, 1);
-        /* toggle the bit because as soon as the first run length is 
+        /* toggle the bit because as soon as the first run length is
          * fetched the bit will be toggled again */
         bit ^= 1;
         while (current_superblock < s->superblock_count) {
             if (current_run-- == 0) {
                 bit ^= 1;
-                current_run = get_vlc2(gb, 
+                current_run = get_vlc2(gb,
                     s->superblock_run_length_vlc.table, 6, 2);
                 if (current_run == 33)
                     current_run += get_bits(gb, 12);
@@ -821,7 +821,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
             current_superblock = 0;
             current_run = 0;
             bit = get_bits(gb, 1);
-            /* toggle the bit because as soon as the first run length is 
+            /* toggle the bit because as soon as the first run length is
              * fetched the bit will be toggled again */
             bit ^= 1;
             while (current_superblock < s->superblock_count) {
@@ -831,7 +831,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
 
                     if (current_run-- == 0) {
                         bit ^= 1;
-                        current_run = get_vlc2(gb, 
+                        current_run = get_vlc2(gb,
                             s->superblock_run_length_vlc.table, 6, 2);
                         if (current_run == 33)
                             current_run += get_bits(gb, 12);
@@ -852,7 +852,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
 
             current_run = 0;
             bit = get_bits(gb, 1);
-            /* toggle the bit because as soon as the first run length is 
+            /* toggle the bit because as soon as the first run length is
              * fetched the bit will be toggled again */
             bit ^= 1;
         }
@@ -882,7 +882,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
                 if (s->superblock_coding[i] == SB_NOT_CODED) {
 
                     /* copy all the fragments from the prior frame */
-                    s->all_fragments[current_fragment].coding_method = 
+                    s->all_fragments[current_fragment].coding_method =
                         MODE_COPY;
 
                 } else if (s->superblock_coding[i] == SB_PARTIALLY_CODED) {
@@ -891,17 +891,17 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
                      * that cares about the fragment coding runs */
                     if (current_run-- == 0) {
                         bit ^= 1;
-                        current_run = get_vlc2(gb, 
+                        current_run = get_vlc2(gb,
                             s->fragment_run_length_vlc.table, 5, 2);
                     }
 
                     if (bit) {
-                        /* default mode; actual mode will be decoded in 
+                        /* default mode; actual mode will be decoded in
                          * the next phase */
-                        s->all_fragments[current_fragment].coding_method = 
+                        s->all_fragments[current_fragment].coding_method =
                             MODE_INTER_NO_MV;
                         s->all_fragments[current_fragment].next_coeff= s->coeffs + current_fragment;
-                        s->coded_fragment_list[s->coded_fragment_list_index] = 
+                        s->coded_fragment_list[s->coded_fragment_list_index] =
                             current_fragment;
                         if ((current_fragment >= s->u_fragment_start) &&
                             (s->last_coded_y_fragment == -1) &&
@@ -926,10 +926,10 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
 
                     /* fragments are fully coded in this superblock; actual
                      * coding will be determined in next step */
-                    s->all_fragments[current_fragment].coding_method = 
+                    s->all_fragments[current_fragment].coding_method =
                         MODE_INTER_NO_MV;
                     s->all_fragments[current_fragment].next_coeff= s->coeffs + current_fragment;
-                    s->coded_fragment_list[s->coded_fragment_list_index] = 
+                    s->coded_fragment_list[s->coded_fragment_list_index] =
                         current_fragment;
                     if ((current_fragment >= s->u_fragment_start) &&
                         (s->last_coded_y_fragment == -1) &&
@@ -950,7 +950,7 @@ static int unpack_superblocks(Vp3DecodeContext *s, GetBitContext *gb)
     if (!first_c_fragment_seen)
         /* only Y fragments coded in this frame */
         s->last_coded_y_fragment = s->coded_fragment_list_index - 1;
-    else 
+    else
         /* end the list of coded C fragments */
         s->last_coded_c_fragment = s->coded_fragment_list_index - 1;
 
@@ -998,7 +998,7 @@ static int unpack_modes(Vp3DecodeContext *s, GetBitContext *gb)
         }
 
         for (i = 0; i < 8; i++)
-            debug_modes("      mode[%d][%d] = %d\n", scheme, i, 
+            debug_modes("      mode[%d][%d] = %d\n", scheme, i,
                 ModeAlphabet[scheme][i]);
 
         /* iterate through all of the macroblocks that contain 1 or more
@@ -1025,7 +1025,7 @@ static int unpack_modes(Vp3DecodeContext *s, GetBitContext *gb)
 
                 s->macroblock_coding[current_macroblock] = coding_mode;
                 for (k = 0; k < 6; k++) {
-                    current_fragment = 
+                    current_fragment =
                         s->macroblock_fragments[current_macroblock * 6 + k];
                     if (current_fragment == -1)
                         continue;
@@ -1034,7 +1034,7 @@ static int unpack_modes(Vp3DecodeContext *s, GetBitContext *gb)
                             current_fragment, s->fragment_count);
                         return 1;
                     }
-                    if (s->all_fragments[current_fragment].coding_method != 
+                    if (s->all_fragments[current_fragment].coding_method !=
                         MODE_COPY)
                         s->all_fragments[current_fragment].coding_method =
                             coding_mode;
@@ -1146,13 +1146,13 @@ static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb)
                         motion_y[4] += motion_y[k];
                     }
 
-                    if (motion_x[4] >= 0) 
+                    if (motion_x[4] >= 0)
                         motion_x[4] = (motion_x[4] + 2) / 4;
                     else
                         motion_x[4] = (motion_x[4] - 2) / 4;
                     motion_x[5] = motion_x[4];
 
-                    if (motion_y[4] >= 0) 
+                    if (motion_y[4] >= 0)
                         motion_y[4] = (motion_y[4] + 2) / 4;
                     else
                         motion_y[4] = (motion_y[4] - 2) / 4;
@@ -1210,7 +1210,7 @@ static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb)
                     current_fragment,
                     s->macroblock_coding[current_macroblock]);
                 for (k = 0; k < 6; k++) {
-                    current_fragment = 
+                    current_fragment =
                         s->macroblock_fragments[current_macroblock * 6 + k];
                     if (current_fragment == -1)
                         continue;
@@ -1231,7 +1231,7 @@ static int unpack_vectors(Vp3DecodeContext *s, GetBitContext *gb)
     return 0;
 }
 
-/* 
+/*
  * This function is called by unpack_dct_coeffs() to extract the VLCs from
  * the bitstream. The VLCs encode tokens which are used to unpack DCT
  * data. This function unpacks all the VLCs for either the Y plane or both
@@ -1306,7 +1306,7 @@ static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb,
                 s->coded_fragment_list[i], fragment->next_coeff[coeff_index]);
         } else {
             fragment->coeff_count |= 128;
-            debug_vlc(" fragment %d eob with %d coefficients\n", 
+            debug_vlc(" fragment %d eob with %d coefficients\n",
                 s->coded_fragment_list[i], fragment->coeff_count&127);
             eob_run--;
         }
@@ -1335,7 +1335,7 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb)
     /* unpack the Y plane DC coefficients */
     debug_vp3("  vp3: unpacking Y plane DC coefficients using table %d\n",
         dc_y_table);
-    residual_eob_run = unpack_vlcs(s, gb, &s->dc_vlc[dc_y_table], 0, 
+    residual_eob_run = unpack_vlcs(s, gb, &s->dc_vlc[dc_y_table], 0,
         s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run);
 
     /* unpack the C plane DC coefficients */
@@ -1353,12 +1353,12 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb)
 
         debug_vp3("  vp3: unpacking level %d Y plane AC coefficients using table %d\n",
             i, ac_y_table);
-        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_1[ac_y_table], i, 
+        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_1[ac_y_table], i,
             s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run);
 
         debug_vp3("  vp3: unpacking level %d C plane AC coefficients using table %d\n",
             i, ac_c_table);
-        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_1[ac_c_table], i, 
+        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_1[ac_c_table], i,
             s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run);
     }
 
@@ -1367,12 +1367,12 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb)
 
         debug_vp3("  vp3: unpacking level %d Y plane AC coefficients using table %d\n",
             i, ac_y_table);
-        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_2[ac_y_table], i, 
+        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_2[ac_y_table], i,
             s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run);
 
         debug_vp3("  vp3: unpacking level %d C plane AC coefficients using table %d\n",
             i, ac_c_table);
-        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_2[ac_c_table], i, 
+        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_2[ac_c_table], i,
             s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run);
     }
 
@@ -1381,12 +1381,12 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb)
 
         debug_vp3("  vp3: unpacking level %d Y plane AC coefficients using table %d\n",
             i, ac_y_table);
-        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_3[ac_y_table], i, 
+        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_3[ac_y_table], i,
             s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run);
 
         debug_vp3("  vp3: unpacking level %d C plane AC coefficients using table %d\n",
             i, ac_c_table);
-        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_3[ac_c_table], i, 
+        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_3[ac_c_table], i,
             s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run);
     }
 
@@ -1395,12 +1395,12 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb)
 
         debug_vp3("  vp3: unpacking level %d Y plane AC coefficients using table %d\n",
             i, ac_y_table);
-        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_4[ac_y_table], i, 
+        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_4[ac_y_table], i,
             s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run);
 
         debug_vp3("  vp3: unpacking level %d C plane AC coefficients using table %d\n",
             i, ac_c_table);
-        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_4[ac_c_table], i, 
+        residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_4[ac_c_table], i,
             s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run);
     }
 
@@ -1409,7 +1409,7 @@ static int unpack_dct_coeffs(Vp3DecodeContext *s, GetBitContext *gb)
 
 /*
  * This function reverses the DC prediction for each coded fragment in
- * the frame. Much of this function is adapted directly from the original 
+ * the frame. Much of this function is adapted directly from the original
  * VP3 source code.
  */
 #define COMPATIBLE_FRAME(x) \
@@ -1421,7 +1421,7 @@ static inline int iabs (int x) { return ((x < 0) ? -x : x); }
 static void reverse_dc_prediction(Vp3DecodeContext *s,
                                   int first_fragment,
                                   int fragment_width,
-                                  int fragment_height) 
+                                  int fragment_height)
 {
 
 #define PUL 8
@@ -1441,7 +1441,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
      * 10000000004
      * 10000000004
      *
-     * Note: Groups 5 and 7 do not exist as it would mean that the 
+     * Note: Groups 5 and 7 do not exist as it would mean that the
      * fragment's x coordinate is both 0 and (width - 1) at the same time.
      */
     int predictor_group;
@@ -1456,7 +1456,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
     /* indices for the left, up-left, up, and up-right fragments */
     int l, ul, u, ur;
 
-    /* 
+    /*
      * The 6 fields mean:
      *   0: up-left multiplier
      *   1: up multiplier
@@ -1487,7 +1487,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
     /* This table shows which types of blocks can use other blocks for
      * prediction. For example, INTRA is the only mode in this table to
      * have a frame number of 0. That means INTRA blocks can only predict
-     * from other INTRA blocks. There are 2 golden frame coding types; 
+     * from other INTRA blocks. There are 2 golden frame coding types;
      * blocks encoding in these modes can only predict from other blocks
      * that were encoded with these 1 of these 2 modes. */
     unsigned char compatible_frame[8] = {
@@ -1521,7 +1521,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
             /* reverse prediction if this block was coded */
             if (s->all_fragments[i].coding_method != MODE_COPY) {
 
-                current_frame_type = 
+                current_frame_type =
                     compatible_frame[s->all_fragments[i].coding_method];
                 predictor_group = (x == 0) + ((y == 0) << 1) +
                     ((x + 1 == fragment_width) << 2);
@@ -1639,7 +1639,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
                     /* if there were no fragments to predict from, use last
                      * DC saved */
                     predicted_dc = last_dc[current_frame_type];
-                    debug_dc_pred("from last DC (%d) = %d\n", 
+                    debug_dc_pred("from last DC (%d) = %d\n",
                         current_frame_type, DC_COEFF(i));
 
                 } else {
@@ -1654,7 +1654,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
                     /* if there is a shift value in the transform, add
                      * the sign bit before the shift */
                     if (predictor_transform[transform][5] != 0) {
-                        predicted_dc += ((predicted_dc >> 15) & 
+                        predicted_dc += ((predicted_dc >> 15) &
                             predictor_transform[transform][4]);
                         predicted_dc >>= predictor_transform[transform][5];
                     }
@@ -1670,7 +1670,7 @@ static void reverse_dc_prediction(Vp3DecodeContext *s,
                             predicted_dc = vul;
                     }
 
-                    debug_dc_pred("from pred DC = %d\n", 
+                    debug_dc_pred("from pred DC = %d\n",
                     DC_COEFF(i));
                 }
 
@@ -1711,7 +1711,7 @@ static void render_slice(Vp3DecodeContext *s, int slice)
     int m, n;
     int i;  /* indicates current fragment */
     int16_t *dequantizer;
-    DCTELEM __align16 block[64];
+    DECLARE_ALIGNED_16(DCTELEM, block[64]);
     unsigned char *output_plane;
     unsigned char *last_plane;
     unsigned char *golden_plane;
@@ -1774,7 +1774,7 @@ static void render_slice(Vp3DecodeContext *s, int slice)
             i = s->macroblock_fragments[current_macroblock_entry + 5];
         }
         fragment_width = plane_width / FRAGMENT_PIXELS;
-    
+
         if(ABS(stride) > 2048)
             return; //various tables are fixed size
 
@@ -1796,7 +1796,7 @@ static void render_slice(Vp3DecodeContext *s, int slice)
                     if ((s->all_fragments[i].coding_method == MODE_USING_GOLDEN) ||
                         (s->all_fragments[i].coding_method == MODE_GOLDEN_MV))
                         motion_source= golden_plane;
-                    else 
+                    else
                         motion_source= last_plane;
 
                     motion_source += s->all_fragments[i].first_pixel;
@@ -1834,14 +1834,14 @@ static void render_slice(Vp3DecodeContext *s, int slice)
                             motion_source= temp;
                         }
                     }
-                
+
 
                     /* first, take care of copying a block from either the
                      * previous or the golden frame */
                     if (s->all_fragments[i].coding_method != MODE_INTRA) {
-                        /* Note, it is possible to implement all MC cases with 
-                           put_no_rnd_pixels_l2 which would look more like the 
-                           VP3 source but this would be slower as 
+                        /* Note, it is possible to implement all MC cases with
+                           put_no_rnd_pixels_l2 which would look more like the
+                           VP3 source but this would be slower as
                            put_no_rnd_pixels_tab is better optimzed */
                         if(motion_halfpel_index != 3){
                             s->dsp.put_no_rnd_pixels_tab[1][motion_halfpel_index](
@@ -1851,8 +1851,8 @@ static void render_slice(Vp3DecodeContext *s, int slice)
                             int d= (motion_x ^ motion_y)>>31; // d is 0 if motion_x and _y have the same sign, else -1
                             s->dsp.put_no_rnd_pixels_l2[1](
                                 output_plane + s->all_fragments[i].first_pixel,
-                                motion_source - d, 
-                                motion_source + stride + 1 + d, 
+                                motion_source - d,
+                                motion_source + stride + 1 + d,
                                 stride, 8);
                         }
                         dequantizer = s->inter_dequant;
@@ -1864,8 +1864,8 @@ static void render_slice(Vp3DecodeContext *s, int slice)
                     }
 
                     /* dequantize the DCT coefficients */
-                    debug_idct("fragment %d, coding mode %d, DC = %d, dequant = %d:\n", 
-                        i, s->all_fragments[i].coding_method, 
+                    debug_idct("fragment %d, coding mode %d, DC = %d, dequant = %d:\n",
+                        i, s->all_fragments[i].coding_method,
                         DC_COEFF(i), dequantizer[0]);
 
                     if(s->avctx->idct_algo==FF_IDCT_VP3){
@@ -1885,7 +1885,7 @@ static void render_slice(Vp3DecodeContext *s, int slice)
                     }
 
                     /* invert DCT and place (or add) in final output */
-                
+
                     if (s->all_fragments[i].coding_method == MODE_INTRA) {
                         if(s->avctx->idct_algo!=FF_IDCT_VP3)
                             block[0] += 128<<3;
@@ -1905,7 +1905,7 @@ static void render_slice(Vp3DecodeContext *s, int slice)
                         "put" : "add");
                     for (m = 0; m < 8; m++) {
                         for (n = 0; n < 8; n++) {
-                            debug_idct(" %3d", *(output_plane + 
+                            debug_idct(" %3d", *(output_plane +
                                 s->all_fragments[i].first_pixel + (m * stride + n)));
                         }
                         debug_idct("\n");
@@ -1976,7 +1976,7 @@ static void horizontal_filter(unsigned char *first_pixel, int stride,
     int filter_value;
 
     for (end= first_pixel + 8*stride; first_pixel < end; first_pixel += stride) {
-        filter_value = 
+        filter_value =
             (first_pixel[-2] - first_pixel[ 1])
          +3*(first_pixel[ 0] - first_pixel[-1]);
         filter_value = bounding_values[(filter_value + 4) >> 3];
@@ -1993,7 +1993,7 @@ static void vertical_filter(unsigned char *first_pixel, int stride,
     const int nstride= -stride;
 
     for (end= first_pixel + 8; first_pixel < end; first_pixel++) {
-        filter_value = 
+        filter_value =
             (first_pixel[2 * nstride] - first_pixel[ stride])
          +3*(first_pixel[0          ] - first_pixel[nstride]);
         filter_value = bounding_values[(filter_value + 4) >> 3];
@@ -2065,7 +2065,7 @@ START_TIMER
                 if ((x > 0) &&
                     (s->all_fragments[fragment].coding_method != MODE_COPY)) {
                     horizontal_filter(
-                        plane_data + s->all_fragments[fragment].first_pixel - 7*stride, 
+                        plane_data + s->all_fragments[fragment].first_pixel - 7*stride,
                         stride, bounding_values);
                 }
 
@@ -2073,7 +2073,7 @@ START_TIMER
                 if ((y > 0) &&
                     (s->all_fragments[fragment].coding_method != MODE_COPY)) {
                     vertical_filter(
-                        plane_data + s->all_fragments[fragment].first_pixel + stride, 
+                        plane_data + s->all_fragments[fragment].first_pixel + stride,
                         stride, bounding_values);
                 }
 
@@ -2084,7 +2084,7 @@ START_TIMER
                     (s->all_fragments[fragment].coding_method != MODE_COPY) &&
                     (s->all_fragments[fragment + 1].coding_method == MODE_COPY)) {
                     horizontal_filter(
-                        plane_data + s->all_fragments[fragment + 1].first_pixel - 7*stride, 
+                        plane_data + s->all_fragments[fragment + 1].first_pixel - 7*stride,
                         stride, bounding_values);
                 }
 
@@ -2095,7 +2095,7 @@ START_TIMER
                     (s->all_fragments[fragment].coding_method != MODE_COPY) &&
                     (s->all_fragments[fragment + width].coding_method == MODE_COPY)) {
                     vertical_filter(
-                        plane_data + s->all_fragments[fragment + width].first_pixel + stride, 
+                        plane_data + s->all_fragments[fragment + width].first_pixel + stride,
                         stride, bounding_values);
                 }
 
@@ -2106,12 +2106,12 @@ STOP_TIMER("loop filter")
     }
 }
 
-/* 
+/*
  * This function computes the first pixel addresses for each fragment.
  * This function needs to be invoked after the first frame is allocated
  * so that it has access to the plane strides.
  */
-static void vp3_calculate_pixel_addresses(Vp3DecodeContext *s) 
+static void vp3_calculate_pixel_addresses(Vp3DecodeContext *s)
 {
 
     int i, x, y;
@@ -2121,11 +2121,11 @@ static void vp3_calculate_pixel_addresses(Vp3DecodeContext *s)
     i = 0;
     for (y = s->fragment_height; y > 0; y--) {
         for (x = 0; x < s->fragment_width; x++) {
-            s->all_fragments[i++].first_pixel = 
+            s->all_fragments[i++].first_pixel =
                 s->golden_frame.linesize[0] * y * FRAGMENT_PIXELS -
                     s->golden_frame.linesize[0] +
                     x * FRAGMENT_PIXELS;
-            debug_init("  fragment %d, first pixel @ %d\n", 
+            debug_init("  fragment %d, first pixel @ %d\n",
                 i-1, s->all_fragments[i-1].first_pixel);
         }
     }
@@ -2134,11 +2134,11 @@ static void vp3_calculate_pixel_addresses(Vp3DecodeContext *s)
     i = s->u_fragment_start;
     for (y = s->fragment_height / 2; y > 0; y--) {
         for (x = 0; x < s->fragment_width / 2; x++) {
-            s->all_fragments[i++].first_pixel = 
+            s->all_fragments[i++].first_pixel =
                 s->golden_frame.linesize[1] * y * FRAGMENT_PIXELS -
                     s->golden_frame.linesize[1] +
                     x * FRAGMENT_PIXELS;
-            debug_init("  fragment %d, first pixel @ %d\n", 
+            debug_init("  fragment %d, first pixel @ %d\n",
                 i-1, s->all_fragments[i-1].first_pixel);
         }
     }
@@ -2147,18 +2147,18 @@ static void vp3_calculate_pixel_addresses(Vp3DecodeContext *s)
     i = s->v_fragment_start;
     for (y = s->fragment_height / 2; y > 0; y--) {
         for (x = 0; x < s->fragment_width / 2; x++) {
-            s->all_fragments[i++].first_pixel = 
+            s->all_fragments[i++].first_pixel =
                 s->golden_frame.linesize[2] * y * FRAGMENT_PIXELS -
                     s->golden_frame.linesize[2] +
                     x * FRAGMENT_PIXELS;
-            debug_init("  fragment %d, first pixel @ %d\n", 
+            debug_init("  fragment %d, first pixel @ %d\n",
                 i-1, s->all_fragments[i-1].first_pixel);
         }
     }
 }
 
 /* FIXME: this should be merged with the above! */
-static void theora_calculate_pixel_addresses(Vp3DecodeContext *s) 
+static void theora_calculate_pixel_addresses(Vp3DecodeContext *s)
 {
 
     int i, x, y;
@@ -2168,11 +2168,11 @@ static void theora_calculate_pixel_addresses(Vp3DecodeContext *s)
     i = 0;
     for (y = 1; y <= s->fragment_height; y++) {
         for (x = 0; x < s->fragment_width; x++) {
-            s->all_fragments[i++].first_pixel = 
+            s->all_fragments[i++].first_pixel =
                 s->golden_frame.linesize[0] * y * FRAGMENT_PIXELS -
                     s->golden_frame.linesize[0] +
                     x * FRAGMENT_PIXELS;
-            debug_init("  fragment %d, first pixel @ %d\n", 
+            debug_init("  fragment %d, first pixel @ %d\n",
                 i-1, s->all_fragments[i-1].first_pixel);
         }
     }
@@ -2181,11 +2181,11 @@ static void theora_calculate_pixel_addresses(Vp3DecodeContext *s)
     i = s->u_fragment_start;
     for (y = 1; y <= s->fragment_height / 2; y++) {
         for (x = 0; x < s->fragment_width / 2; x++) {
-            s->all_fragments[i++].first_pixel = 
+            s->all_fragments[i++].first_pixel =
                 s->golden_frame.linesize[1] * y * FRAGMENT_PIXELS -
                     s->golden_frame.linesize[1] +
                     x * FRAGMENT_PIXELS;
-            debug_init("  fragment %d, first pixel @ %d\n", 
+            debug_init("  fragment %d, first pixel @ %d\n",
                 i-1, s->all_fragments[i-1].first_pixel);
         }
     }
@@ -2194,11 +2194,11 @@ static void theora_calculate_pixel_addresses(Vp3DecodeContext *s)
     i = s->v_fragment_start;
     for (y = 1; y <= s->fragment_height / 2; y++) {
         for (x = 0; x < s->fragment_width / 2; x++) {
-            s->all_fragments[i++].first_pixel = 
+            s->all_fragments[i++].first_pixel =
                 s->golden_frame.linesize[2] * y * FRAGMENT_PIXELS -
                     s->golden_frame.linesize[2] +
                     x * FRAGMENT_PIXELS;
-            debug_init("  fragment %d, first pixel @ %d\n", 
+            debug_init("  fragment %d, first pixel @ %d\n",
                 i-1, s->all_fragments[i-1].first_pixel);
         }
     }
@@ -2217,9 +2217,9 @@ static int vp3_decode_init(AVCodecContext *avctx)
     int c_superblock_count;
 
     if (avctx->codec_tag == MKTAG('V','P','3','0'))
-	s->version = 0;
+        s->version = 0;
     else
-	s->version = 1;
+        s->version = 1;
 
     s->avctx = avctx;
     s->width = (avctx->width + 15) & 0xFFFFFFF0;
@@ -2229,7 +2229,7 @@ static int vp3_decode_init(AVCodecContext *avctx)
     if(avctx->idct_algo==FF_IDCT_AUTO)
         avctx->idct_algo=FF_IDCT_VP3;
     dsputil_init(&s->dsp, avctx);
-    
+
     ff_init_scantable(s->dsp.idct_permutation, &s->scantable, ff_zigzag_direct);
 
     /* initialize to an impossible value which will force a recalculation
@@ -2270,7 +2270,7 @@ static int vp3_decode_init(AVCodecContext *avctx)
         s->y_superblock_width, s->y_superblock_height, y_superblock_count);
     debug_init("  C superblocks: %d x %d, %d total\n",
         s->c_superblock_width, s->c_superblock_height, c_superblock_count);
-    debug_init("  total superblocks = %d, U starts @ %d, V starts @ %d\n", 
+    debug_init("  total superblocks = %d, U starts @ %d, V starts @ %d\n",
         s->superblock_count, s->u_superblock_start, s->v_superblock_start);
     debug_init("  macroblocks: %d x %d, %d total\n",
         s->macroblock_width, s->macroblock_height, s->macroblock_count);
@@ -2288,18 +2288,18 @@ static int vp3_decode_init(AVCodecContext *avctx)
 
     if (!s->theora_tables)
     {
-	for (i = 0; i < 64; i++)
-	    s->coded_dc_scale_factor[i] = vp31_dc_scale_factor[i];
-	for (i = 0; i < 64; i++)
-	    s->coded_ac_scale_factor[i] = vp31_ac_scale_factor[i];
-	for (i = 0; i < 64; i++)
-	    s->coded_intra_y_dequant[i] = vp31_intra_y_dequant[i];
-	for (i = 0; i < 64; i++)
-	    s->coded_intra_c_dequant[i] = vp31_intra_c_dequant[i];
-	for (i = 0; i < 64; i++)
-	    s->coded_inter_dequant[i] = vp31_inter_dequant[i];
-	for (i = 0; i < 64; i++)
-	    s->filter_limit_values[i] = vp31_filter_limit_values[i];
+        for (i = 0; i < 64; i++)
+            s->coded_dc_scale_factor[i] = vp31_dc_scale_factor[i];
+        for (i = 0; i < 64; i++)
+            s->coded_ac_scale_factor[i] = vp31_ac_scale_factor[i];
+        for (i = 0; i < 64; i++)
+            s->coded_intra_y_dequant[i] = vp31_intra_y_dequant[i];
+        for (i = 0; i < 64; i++)
+            s->coded_intra_c_dequant[i] = vp31_intra_c_dequant[i];
+        for (i = 0; i < 64; i++)
+            s->coded_inter_dequant[i] = vp31_inter_dequant[i];
+        for (i = 0; i < 64; i++)
+            s->filter_limit_values[i] = vp31_filter_limit_values[i];
 
         /* init VLC tables */
         for (i = 0; i < 16; i++) {
@@ -2363,7 +2363,7 @@ static int vp3_decode_init(AVCodecContext *avctx)
         &superblock_run_length_vlc_table[0][1], 4, 2,
         &superblock_run_length_vlc_table[0][0], 4, 2, 0);
 
-    init_vlc(&s->fragment_run_length_vlc, 5, 31,
+    init_vlc(&s->fragment_run_length_vlc, 5, 30,
         &fragment_run_length_vlc_table[0][1], 4, 2,
         &fragment_run_length_vlc_table[0][0], 4, 2, 0);
 
@@ -2394,7 +2394,7 @@ static int vp3_decode_init(AVCodecContext *avctx)
 /*
  * This is the ffmpeg/libavcodec API frame decode function.
  */
-static int vp3_decode_frame(AVCodecContext *avctx, 
+static int vp3_decode_frame(AVCodecContext *avctx,
                             void *data, int *data_size,
                             uint8_t *buf, int buf_size)
 {
@@ -2404,39 +2404,44 @@ static int vp3_decode_frame(AVCodecContext *avctx,
     int i;
 
     init_get_bits(&gb, buf, buf_size * 8);
-    
+
     if (s->theora && get_bits1(&gb))
     {
-	int ptype = get_bits(&gb, 7);
-
-	skip_bits(&gb, 6*8); /* "theora" */
-	
-	switch(ptype)
-	{
-	    case 1:
-		theora_decode_comments(avctx, gb);
-		break;
-	    case 2:
-		theora_decode_tables(avctx, gb);
-    		init_dequantizer(s);
-		break;
-	    default:
-		av_log(avctx, AV_LOG_ERROR, "Unknown Theora config packet: %d\n", ptype);
-	}
-	return buf_size;
+#if 1
+        av_log(avctx, AV_LOG_ERROR, "Header packet passed to frame decoder, skipping\n");
+        return -1;
+#else
+        int ptype = get_bits(&gb, 7);
+
+        skip_bits(&gb, 6*8); /* "theora" */
+
+        switch(ptype)
+        {
+            case 1:
+                theora_decode_comments(avctx, gb);
+                break;
+            case 2:
+                theora_decode_tables(avctx, gb);
+                    init_dequantizer(s);
+                break;
+            default:
+                av_log(avctx, AV_LOG_ERROR, "Unknown Theora config packet: %d\n", ptype);
+        }
+        return buf_size;
+#endif
     }
 
     s->keyframe = !get_bits1(&gb);
     if (!s->theora)
-	skip_bits(&gb, 1);
+        skip_bits(&gb, 1);
     s->last_quality_index = s->quality_index;
     s->quality_index = get_bits(&gb, 6);
     if (s->theora >= 0x030200)
         skip_bits1(&gb);
 
     if (s->avctx->debug & FF_DEBUG_PICT_INFO)
-	av_log(s->avctx, AV_LOG_INFO, " VP3 %sframe #%d: Q index = %d\n",
-	    s->keyframe?"key":"", counter, s->quality_index);
+        av_log(s->avctx, AV_LOG_INFO, " VP3 %sframe #%d: Q index = %d\n",
+            s->keyframe?"key":"", counter, s->quality_index);
     counter++;
 
     if (s->quality_index != s->last_quality_index) {
@@ -2445,23 +2450,23 @@ static int vp3_decode_frame(AVCodecContext *avctx,
     }
 
     if (s->keyframe) {
-	if (!s->theora)
-	{
-	    skip_bits(&gb, 4); /* width code */
-	    skip_bits(&gb, 4); /* height code */
-	    if (s->version)
-	    {
-		s->version = get_bits(&gb, 5);
-		if (counter == 1)
-		    av_log(s->avctx, AV_LOG_DEBUG, "VP version: %d\n", s->version);
-	    }
-	}
-	if (s->version || s->theora)
-	{
-    	    if (get_bits1(&gb))
-    	        av_log(s->avctx, AV_LOG_ERROR, "Warning, unsupported keyframe coding type?!\n");
-	    skip_bits(&gb, 2); /* reserved? */
-	}
+        if (!s->theora)
+        {
+            skip_bits(&gb, 4); /* width code */
+            skip_bits(&gb, 4); /* height code */
+            if (s->version)
+            {
+                s->version = get_bits(&gb, 5);
+                if (counter == 1)
+                    av_log(s->avctx, AV_LOG_DEBUG, "VP version: %d\n", s->version);
+            }
+        }
+        if (s->version || s->theora)
+        {
+                if (get_bits1(&gb))
+                    av_log(s->avctx, AV_LOG_ERROR, "Warning, unsupported keyframe coding type?!\n");
+            skip_bits(&gb, 2); /* reserved? */
+        }
 
         if (s->last_frame.data[0] == s->golden_frame.data[0]) {
             if (s->golden_frame.data[0])
@@ -2485,12 +2490,12 @@ static int vp3_decode_frame(AVCodecContext *avctx,
 
         /* time to figure out pixel addresses? */
         if (!s->pixel_addresses_inited)
-	{
-	    if (!s->flipped_image)
-        	vp3_calculate_pixel_addresses(s);
-	    else
-		theora_calculate_pixel_addresses(s);
-	}
+        {
+            if (!s->flipped_image)
+                vp3_calculate_pixel_addresses(s);
+            else
+                theora_calculate_pixel_addresses(s);
+        }
     } else {
         /* allocate a new current frame */
         s->current_frame.reference = 3;
@@ -2597,7 +2602,7 @@ static int vp3_decode_end(AVCodecContext *avctx)
     av_free(s->superblock_macroblocks);
     av_free(s->macroblock_fragments);
     av_free(s->macroblock_coding);
-    
+
     /* release all frames */
     if (s->golden_frame.data[0] && s->golden_frame.data[0] != s->last_frame.data[0])
         avctx->release_buffer(avctx, &s->golden_frame);
@@ -2650,7 +2655,7 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext gb)
     minor = get_bits(&gb, 8); /* version minor */
     micro = get_bits(&gb, 8); /* version micro */
     av_log(avctx, AV_LOG_INFO, "Theora bitstream version %d.%d.%d\n",
-	major, minor, micro);
+        major, minor, micro);
 
     /* FIXME: endianess? */
     s->theora = (major << 16) | (minor << 8) | micro;
@@ -2659,20 +2664,35 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext gb)
     /* but previous versions have the image flipped relative to vp3 */
     if (s->theora < 0x030200)
     {
-	s->flipped_image = 1;
+        s->flipped_image = 1;
         av_log(avctx, AV_LOG_DEBUG, "Old (<alpha3) Theora bitstream, flipped image\n");
     }
 
     s->width = get_bits(&gb, 16) << 4;
     s->height = get_bits(&gb, 16) << 4;
-    
+
     if(avcodec_check_dimensions(avctx, s->width, s->height)){
+        av_log(avctx, AV_LOG_ERROR, "Invalid dimensions (%dx%d)\n", s->width, s->height);
         s->width= s->height= 0;
         return -1;
     }
-    
-    skip_bits(&gb, 24); /* frame width */
-    skip_bits(&gb, 24); /* frame height */
+
+    if (s->theora >= 0x030400)
+    {
+        skip_bits(&gb, 32); /* total number of superblocks in a frame */
+        // fixme, the next field is 36bits long
+        skip_bits(&gb, 32); /* total number of blocks in a frame */
+        skip_bits(&gb, 4); /* total number of blocks in a frame */
+        skip_bits(&gb, 32); /* total number of macroblocks in a frame */
+
+        skip_bits(&gb, 24); /* frame width */
+        skip_bits(&gb, 24); /* frame height */
+    }
+    else
+    {
+        skip_bits(&gb, 24); /* frame width */
+        skip_bits(&gb, 24); /* frame height */
+    }
 
     skip_bits(&gb, 8); /* offset x */
     skip_bits(&gb, 8); /* offset y */
@@ -2681,54 +2701,86 @@ static int theora_decode_header(AVCodecContext *avctx, GetBitContext gb)
     skip_bits(&gb, 32); /* fps denumerator */
     skip_bits(&gb, 24); /* aspect numerator */
     skip_bits(&gb, 24); /* aspect denumerator */
-    
+
     if (s->theora < 0x030200)
-	skip_bits(&gb, 5); /* keyframe frequency force */
+        skip_bits(&gb, 5); /* keyframe frequency force */
     skip_bits(&gb, 8); /* colorspace */
+    if (s->theora >= 0x030400)
+        skip_bits(&gb, 2); /* pixel format: 420,res,422,444 */
     skip_bits(&gb, 24); /* bitrate */
 
-    skip_bits(&gb, 6); /* last(?) quality index */
-    
+    skip_bits(&gb, 6); /* quality hint */
+
     if (s->theora >= 0x030200)
     {
-	skip_bits(&gb, 5); /* keyframe frequency force */
-	skip_bits(&gb, 5); /* spare bits */
+        skip_bits(&gb, 5); /* keyframe frequency force */
+
+        if (s->theora < 0x030400)
+            skip_bits(&gb, 5); /* spare bits */
     }
-    
+
 //    align_get_bits(&gb);
-    
+
     avctx->width = s->width;
     avctx->height = s->height;
 
     return 0;
 }
 
-static int theora_decode_comments(AVCodecContext *avctx, GetBitContext gb)
+static inline int theora_get_32bit(GetBitContext gb)
 {
-    int nb_comments, i, tmp;
+    int ret = get_bits(&gb, 8);
+    ret += get_bits(&gb, 8) << 8;
+    ret += get_bits(&gb, 8) << 16;
+    ret += get_bits(&gb, 8) << 24;
 
-    tmp = get_bits_long(&gb, 32);
-    tmp = be2me_32(tmp);
-    while(tmp--)
-	    skip_bits(&gb, 8);
+    return ret;
+}
 
-    nb_comments = get_bits_long(&gb, 32);
-    nb_comments = be2me_32(nb_comments);
-    for (i = 0; i < nb_comments; i++)
+static int theora_decode_comments(AVCodecContext *avctx, GetBitContext gb)
+{
+    Vp3DecodeContext *s = avctx->priv_data;
+    int len;
+
+    if (s->theora <= 0x030200)
     {
-	tmp = get_bits_long(&gb, 32);
-	tmp = be2me_32(tmp);
-	while(tmp--)
-	    skip_bits(&gb, 8);
+        int i, comments;
+
+        // vendor string
+        len = get_bits_long(&gb, 32);
+        len = le2me_32(len);
+        while(len--)
+            skip_bits(&gb, 8);
+
+        // user comments
+        comments = get_bits_long(&gb, 32);
+        comments = le2me_32(comments);
+        for (i = 0; i < comments; i++)
+        {
+            len = get_bits_long(&gb, 32);
+            len = be2me_32(len);
+            while(len--)
+                skip_bits(&gb, 8);
+        }
+    }
+    else
+    {
+        do {
+            len = get_bits_long(&gb, 32);
+            len = le2me_32(len);
+            if (len <= 0)
+                break;
+            while (len--)
+                skip_bits(&gb, 8);
+        } while (1);
     }
-    
     return 0;
 }
 
 static int theora_decode_tables(AVCodecContext *avctx, GetBitContext gb)
 {
     Vp3DecodeContext *s = avctx->priv_data;
-    int i, n;
+    int i, n, matrices;
 
     if (s->theora >= 0x030200) {
         n = get_bits(&gb, 3);
@@ -2736,14 +2788,14 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext gb)
         for (i = 0; i < 64; i++)
             s->filter_limit_values[i] = get_bits(&gb, n);
     }
-    
+
     if (s->theora >= 0x030200)
         n = get_bits(&gb, 4) + 1;
     else
         n = 16;
     /* quality threshold table */
     for (i = 0; i < 64; i++)
-	s->coded_ac_scale_factor[i] = get_bits(&gb, n);
+        s->coded_ac_scale_factor[i] = get_bits(&gb, n);
 
     if (s->theora >= 0x030200)
         n = get_bits(&gb, 4) + 1;
@@ -2751,29 +2803,34 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext gb)
         n = 16;
     /* dc scale factor table */
     for (i = 0; i < 64; i++)
-	s->coded_dc_scale_factor[i] = get_bits(&gb, n);
+        s->coded_dc_scale_factor[i] = get_bits(&gb, n);
 
     if (s->theora >= 0x030200)
-        n = get_bits(&gb, 9) + 1;
+        matrices = get_bits(&gb, 9) + 1;
     else
-        n = 3;
-    if (n != 3) {
-        av_log(NULL,AV_LOG_ERROR, "unsupported nbms : %d\n", n);
-        return -1;
+        matrices = 3;
+    if (matrices != 3) {
+        av_log(avctx,AV_LOG_ERROR, "unsupported matrices: %d\n", matrices);
+//        return -1;
     }
     /* y coeffs */
     for (i = 0; i < 64; i++)
-	s->coded_intra_y_dequant[i] = get_bits(&gb, 8);
+        s->coded_intra_y_dequant[i] = get_bits(&gb, 8);
 
     /* uv coeffs */
     for (i = 0; i < 64; i++)
-	s->coded_intra_c_dequant[i] = get_bits(&gb, 8);
+        s->coded_intra_c_dequant[i] = get_bits(&gb, 8);
 
     /* inter coeffs */
     for (i = 0; i < 64; i++)
-	s->coded_inter_dequant[i] = get_bits(&gb, 8);
+        s->coded_inter_dequant[i] = get_bits(&gb, 8);
+
+    /* skip unknown matrices */
+    n = matrices - 3;
+    while(n--)
+        for (i = 0; i < 64; i++)
+            skip_bits(&gb, 8);
 
-    /* Huffman tables */
     for (i = 0; i <= 1; i++) {
         for (n = 0; n <= 2; n++) {
             int newqr;
@@ -2787,17 +2844,20 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext gb)
             }
             else {
                 int qi = 0;
-                skip_bits(&gb, av_log2(2)+1);
+                skip_bits(&gb, av_log2(matrices-1)+1);
                 while (qi < 63) {
                     qi += get_bits(&gb, av_log2(63-qi)+1) + 1;
-                    skip_bits(&gb, av_log2(2)+1);
+                    skip_bits(&gb, av_log2(matrices-1)+1);
+                }
+                if (qi > 63) {
+                    av_log(avctx, AV_LOG_ERROR, "invalid qi %d > 63\n", qi);
+                    return -1;
                 }
-                if (qi > 63)
-                    av_log(NULL, AV_LOG_ERROR, "error...\n");
             }
         }
     }
 
+    /* Huffman tables */
     for (s->hti = 0; s->hti < 80; s->hti++) {
         s->entries = 0;
         s->huff_code_size = 1;
@@ -2808,9 +2868,9 @@ static int theora_decode_tables(AVCodecContext *avctx, GetBitContext gb)
             read_huffman_tree(avctx, &gb);
         }
     }
-    
+
     s->theora_tables = 1;
-    
+
     return 0;
 }
 
@@ -2821,11 +2881,14 @@ static int theora_decode_init(AVCodecContext *avctx)
     int ptype;
     uint8_t *p= avctx->extradata;
     int op_bytes, i;
-    
+
     s->theora = 1;
 
     if (!avctx->extradata_size)
-	return -1;
+    {
+        av_log(avctx, AV_LOG_ERROR, "Missing extradata!\n");
+        return -1;
+    }
 
   for(i=0;i<3;i++) {
     op_bytes = *(p++)<<8;
@@ -2836,23 +2899,31 @@ static int theora_decode_init(AVCodecContext *avctx)
 
     ptype = get_bits(&gb, 8);
     debug_vp3("Theora headerpacket type: %x\n", ptype);
-	    
-    if (!(ptype & 0x80))
-	return -1;
-	
+
+     if (!(ptype & 0x80))
+     {
+        av_log(avctx, AV_LOG_ERROR, "Invalid extradata!\n");
+        return -1;
+     }
+
+    // FIXME: check for this aswell
     skip_bits(&gb, 6*8); /* "theora" */
-	
+
     switch(ptype)
     {
         case 0x80:
             theora_decode_header(avctx, gb);
-    	    break;
-	case 0x81:
-	    theora_decode_comments(avctx, gb);
-	    break;
-	case 0x82:
-	    theora_decode_tables(avctx, gb);
-	    break;
+                break;
+        case 0x81:
+// FIXME: is this needed? it breaks sometimes
+//            theora_decode_comments(avctx, gb);
+            break;
+        case 0x82:
+            theora_decode_tables(avctx, gb);
+            break;
+        default:
+            av_log(avctx, AV_LOG_ERROR, "Unknown Theora config packet: %d\n", ptype&~0x80);
+            break;
     }
   }
 
diff --git a/src/libffmpeg/libavcodec/vp3data.h b/src/libffmpeg/libavcodec/vp3data.h
index 8bead2fc1..51cbae8db 100644
--- a/src/libffmpeg/libavcodec/vp3data.h
+++ b/src/libffmpeg/libavcodec/vp3data.h
@@ -14,7 +14,7 @@ static const int16_t vp31_intra_y_dequant[64] =
         72,  92,  95,  98, 112, 100, 103,  99
 };
 
-/* these coefficients dequantize intraframe C plane coefficients 
+/* these coefficients dequantize intraframe C plane coefficients
  * (note: same as JPEG) */
 static const int16_t vp31_intra_c_dequant[64] =
 {       17,  18,     24,     47,     99,     99,     99,     99,
@@ -61,7 +61,7 @@ static const uint32_t vp31_ac_scale_factor[64] =
    21,   19,   18,   17,   15,   13,  12,  10
 };
 
-static const uint32_t vp31_filter_limit_values[64] = 
+static const uint32_t vp31_filter_limit_values[64] =
 {  30, 25, 20, 20, 15, 15, 14, 14,
    13, 13, 12, 12, 11, 11, 10, 10,
     9,  9,  8,  8,  7,  7,  7,  7,
@@ -88,7 +88,7 @@ static const uint16_t superblock_run_length_vlc_table[34][2] = {
     { 0x3E4, 10 },    { 0x3E5, 10 },    { 0x3E6, 10 },    { 0x3E7, 10 },
     { 0x3E8, 10 },    { 0x3E9, 10 },    { 0x3EA, 10 },    { 0x3EB, 10 },
     { 0x3EC, 10 },    { 0x3ED, 10 },    { 0x3EE, 10 },    { 0x3EF, 10 },
-    
+
     { 0x3F, 6 }  /* this last VLC is a special case for reading 12 more
                     bits from stream and adding the value 34 */
 };
@@ -118,7 +118,7 @@ static const uint16_t fragment_run_length_vlc_table[30][2] = {
     { 0x1FC, 9 },  { 0x1FD, 9 },  { 0x1FE, 9 },  { 0x1FF, 9 }
 };
 
-static const uint8_t mode_code_vlc_table[30][2] = {
+static const uint8_t mode_code_vlc_table[8][2] = {
     { 0, 1 },      { 2, 2 },
     { 6, 3 },      { 14, 4 },
     { 30, 5 },     { 62, 6 },
@@ -129,7 +129,7 @@ static const uint8_t motion_vector_vlc_table[63][2] = {
     { 0, 3 },
     { 1, 3 },
     { 2, 3 },
-    
+
     { 6, 4 },    { 7, 4 },
 
     { 8, 4 },    { 9, 4 },
@@ -164,7 +164,7 @@ static const int motion_vector_table[63] = {
 };
 
 static const int8_t fixed_motion_vector_table[64] = {
-   0,   0,   1,  -1,   2,  -2,   3,  -3,   
+   0,   0,   1,  -1,   2,  -2,   3,  -3,
    4,  -4,   5,  -5,   6,  -6,   7,  -7,
    8,  -8,   9,  -9,  10, -10,  11, -11,
   12, -12,  13, -13,  14, -14,  15, -15,
@@ -203,7 +203,7 @@ static const int coeff_get_bits[32] = {
     1, 1, 1, 1,  /* 13..16 are constants but still need sign bit */
     2, 3, 4, 5, 6, 10,  /* 17..22, for reading large coeffs */
     1, 1, 1, 1, 1, 1, 1,  /* 23..29 are constants but still need sign bit */
-    2, 2  /* 30..31 */    
+    2, 2  /* 30..31 */
 };
 
 static const int16_t coeff_table_token_7_8[1] = { 0 };
@@ -234,151 +234,151 @@ static const int16_t coeff_table_token_19[16] = {
 };
 
 static const int16_t coeff_table_token_20[32] = {
-    21, 22, 23, 24, 25, 26, 27, 28, 
-    29, 30, 31, 32, 33, 34, 35, 36, 
-    -21, -22, -23, -24, -25, -26, -27, -28, 
+    21, 22, 23, 24, 25, 26, 27, 28,
+    29, 30, 31, 32, 33, 34, 35, 36,
+    -21, -22, -23, -24, -25, -26, -27, -28,
     -29, -30, -31, -32, -33, -34, -35, -36
 };
 
 static const int16_t coeff_table_token_21[64] = {
-    37, 38, 39, 40, 41, 42, 43, 44, 
-    45, 46, 47, 48, 49, 50, 51, 52, 
-    53, 54, 55, 56, 57, 58, 59, 60, 
-    61, 62, 63, 64, 65, 66, 67, 68, 
-    -37, -38, -39, -40, -41, -42, -43, -44, 
-    -45, -46, -47, -48, -49, -50, -51, -52, 
-    -53, -54, -55, -56, -57, -58, -59, -60, 
+    37, 38, 39, 40, 41, 42, 43, 44,
+    45, 46, 47, 48, 49, 50, 51, 52,
+    53, 54, 55, 56, 57, 58, 59, 60,
+    61, 62, 63, 64, 65, 66, 67, 68,
+    -37, -38, -39, -40, -41, -42, -43, -44,
+    -45, -46, -47, -48, -49, -50, -51, -52,
+    -53, -54, -55, -56, -57, -58, -59, -60,
     -61, -62, -63, -64, -65, -66, -67, -68
 };
 
 static const int16_t coeff_table_token_22[1024] = {
-    69, 70, 71, 72, 73, 74, 75, 76, 
-    77, 78, 79, 80, 81, 82, 83, 84, 
-    85, 86, 87, 88, 89, 90, 91, 92, 
-    93, 94, 95, 96, 97, 98, 99, 100, 
-    101, 102, 103, 104, 105, 106, 107, 108, 
-    109, 110, 111, 112, 113, 114, 115, 116, 
-    117, 118, 119, 120, 121, 122, 123, 124, 
-    125, 126, 127, 128, 129, 130, 131, 132, 
-    133, 134, 135, 136, 137, 138, 139, 140, 
-    141, 142, 143, 144, 145, 146, 147, 148, 
-    149, 150, 151, 152, 153, 154, 155, 156, 
-    157, 158, 159, 160, 161, 162, 163, 164, 
-    165, 166, 167, 168, 169, 170, 171, 172, 
-    173, 174, 175, 176, 177, 178, 179, 180, 
-    181, 182, 183, 184, 185, 186, 187, 188, 
-    189, 190, 191, 192, 193, 194, 195, 196, 
-    197, 198, 199, 200, 201, 202, 203, 204, 
-    205, 206, 207, 208, 209, 210, 211, 212, 
-    213, 214, 215, 216, 217, 218, 219, 220, 
-    221, 222, 223, 224, 225, 226, 227, 228, 
-    229, 230, 231, 232, 233, 234, 235, 236, 
-    237, 238, 239, 240, 241, 242, 243, 244, 
-    245, 246, 247, 248, 249, 250, 251, 252, 
-    253, 254, 255, 256, 257, 258, 259, 260, 
-    261, 262, 263, 264, 265, 266, 267, 268, 
-    269, 270, 271, 272, 273, 274, 275, 276, 
-    277, 278, 279, 280, 281, 282, 283, 284, 
-    285, 286, 287, 288, 289, 290, 291, 292, 
-    293, 294, 295, 296, 297, 298, 299, 300, 
-    301, 302, 303, 304, 305, 306, 307, 308, 
-    309, 310, 311, 312, 313, 314, 315, 316, 
-    317, 318, 319, 320, 321, 322, 323, 324, 
-    325, 326, 327, 328, 329, 330, 331, 332, 
-    333, 334, 335, 336, 337, 338, 339, 340, 
-    341, 342, 343, 344, 345, 346, 347, 348, 
-    349, 350, 351, 352, 353, 354, 355, 356, 
-    357, 358, 359, 360, 361, 362, 363, 364, 
-    365, 366, 367, 368, 369, 370, 371, 372, 
-    373, 374, 375, 376, 377, 378, 379, 380, 
-    381, 382, 383, 384, 385, 386, 387, 388, 
-    389, 390, 391, 392, 393, 394, 395, 396, 
-    397, 398, 399, 400, 401, 402, 403, 404, 
-    405, 406, 407, 408, 409, 410, 411, 412, 
-    413, 414, 415, 416, 417, 418, 419, 420, 
-    421, 422, 423, 424, 425, 426, 427, 428, 
-    429, 430, 431, 432, 433, 434, 435, 436, 
-    437, 438, 439, 440, 441, 442, 443, 444, 
-    445, 446, 447, 448, 449, 450, 451, 452, 
-    453, 454, 455, 456, 457, 458, 459, 460, 
-    461, 462, 463, 464, 465, 466, 467, 468, 
-    469, 470, 471, 472, 473, 474, 475, 476, 
-    477, 478, 479, 480, 481, 482, 483, 484, 
-    485, 486, 487, 488, 489, 490, 491, 492, 
-    493, 494, 495, 496, 497, 498, 499, 500, 
-    501, 502, 503, 504, 505, 506, 507, 508, 
-    509, 510, 511, 512, 513, 514, 515, 516, 
-    517, 518, 519, 520, 521, 522, 523, 524, 
-    525, 526, 527, 528, 529, 530, 531, 532, 
-    533, 534, 535, 536, 537, 538, 539, 540, 
-    541, 542, 543, 544, 545, 546, 547, 548, 
-    549, 550, 551, 552, 553, 554, 555, 556, 
-    557, 558, 559, 560, 561, 562, 563, 564, 
-    565, 566, 567, 568, 569, 570, 571, 572, 
+    69, 70, 71, 72, 73, 74, 75, 76,
+    77, 78, 79, 80, 81, 82, 83, 84,
+    85, 86, 87, 88, 89, 90, 91, 92,
+    93, 94, 95, 96, 97, 98, 99, 100,
+    101, 102, 103, 104, 105, 106, 107, 108,
+    109, 110, 111, 112, 113, 114, 115, 116,
+    117, 118, 119, 120, 121, 122, 123, 124,
+    125, 126, 127, 128, 129, 130, 131, 132,
+    133, 134, 135, 136, 137, 138, 139, 140,
+    141, 142, 143, 144, 145, 146, 147, 148,
+    149, 150, 151, 152, 153, 154, 155, 156,
+    157, 158, 159, 160, 161, 162, 163, 164,
+    165, 166, 167, 168, 169, 170, 171, 172,
+    173, 174, 175, 176, 177, 178, 179, 180,
+    181, 182, 183, 184, 185, 186, 187, 188,
+    189, 190, 191, 192, 193, 194, 195, 196,
+    197, 198, 199, 200, 201, 202, 203, 204,
+    205, 206, 207, 208, 209, 210, 211, 212,
+    213, 214, 215, 216, 217, 218, 219, 220,
+    221, 222, 223, 224, 225, 226, 227, 228,
+    229, 230, 231, 232, 233, 234, 235, 236,
+    237, 238, 239, 240, 241, 242, 243, 244,
+    245, 246, 247, 248, 249, 250, 251, 252,
+    253, 254, 255, 256, 257, 258, 259, 260,
+    261, 262, 263, 264, 265, 266, 267, 268,
+    269, 270, 271, 272, 273, 274, 275, 276,
+    277, 278, 279, 280, 281, 282, 283, 284,
+    285, 286, 287, 288, 289, 290, 291, 292,
+    293, 294, 295, 296, 297, 298, 299, 300,
+    301, 302, 303, 304, 305, 306, 307, 308,
+    309, 310, 311, 312, 313, 314, 315, 316,
+    317, 318, 319, 320, 321, 322, 323, 324,
+    325, 326, 327, 328, 329, 330, 331, 332,
+    333, 334, 335, 336, 337, 338, 339, 340,
+    341, 342, 343, 344, 345, 346, 347, 348,
+    349, 350, 351, 352, 353, 354, 355, 356,
+    357, 358, 359, 360, 361, 362, 363, 364,
+    365, 366, 367, 368, 369, 370, 371, 372,
+    373, 374, 375, 376, 377, 378, 379, 380,
+    381, 382, 383, 384, 385, 386, 387, 388,
+    389, 390, 391, 392, 393, 394, 395, 396,
+    397, 398, 399, 400, 401, 402, 403, 404,
+    405, 406, 407, 408, 409, 410, 411, 412,
+    413, 414, 415, 416, 417, 418, 419, 420,
+    421, 422, 423, 424, 425, 426, 427, 428,
+    429, 430, 431, 432, 433, 434, 435, 436,
+    437, 438, 439, 440, 441, 442, 443, 444,
+    445, 446, 447, 448, 449, 450, 451, 452,
+    453, 454, 455, 456, 457, 458, 459, 460,
+    461, 462, 463, 464, 465, 466, 467, 468,
+    469, 470, 471, 472, 473, 474, 475, 476,
+    477, 478, 479, 480, 481, 482, 483, 484,
+    485, 486, 487, 488, 489, 490, 491, 492,
+    493, 494, 495, 496, 497, 498, 499, 500,
+    501, 502, 503, 504, 505, 506, 507, 508,
+    509, 510, 511, 512, 513, 514, 515, 516,
+    517, 518, 519, 520, 521, 522, 523, 524,
+    525, 526, 527, 528, 529, 530, 531, 532,
+    533, 534, 535, 536, 537, 538, 539, 540,
+    541, 542, 543, 544, 545, 546, 547, 548,
+    549, 550, 551, 552, 553, 554, 555, 556,
+    557, 558, 559, 560, 561, 562, 563, 564,
+    565, 566, 567, 568, 569, 570, 571, 572,
     573, 574, 575, 576, 577, 578, 579, 580,
-    -69, -70, -71, -72, -73, -74, -75, -76, 
-    -77, -78, -79, -80, -81, -82, -83, -84, 
-    -85, -86, -87, -88, -89, -90, -91, -92, 
-    -93, -94, -95, -96, -97, -98, -99, -100, 
-    -101, -102, -103, -104, -105, -106, -107, -108, 
-    -109, -110, -111, -112, -113, -114, -115, -116, 
-    -117, -118, -119, -120, -121, -122, -123, -124, 
-    -125, -126, -127, -128, -129, -130, -131, -132, 
-    -133, -134, -135, -136, -137, -138, -139, -140, 
-    -141, -142, -143, -144, -145, -146, -147, -148, 
-    -149, -150, -151, -152, -153, -154, -155, -156, 
-    -157, -158, -159, -160, -161, -162, -163, -164, 
-    -165, -166, -167, -168, -169, -170, -171, -172, 
-    -173, -174, -175, -176, -177, -178, -179, -180, 
-    -181, -182, -183, -184, -185, -186, -187, -188, 
-    -189, -190, -191, -192, -193, -194, -195, -196, 
-    -197, -198, -199, -200, -201, -202, -203, -204, 
-    -205, -206, -207, -208, -209, -210, -211, -212, 
-    -213, -214, -215, -216, -217, -218, -219, -220, 
-    -221, -222, -223, -224, -225, -226, -227, -228, 
-    -229, -230, -231, -232, -233, -234, -235, -236, 
-    -237, -238, -239, -240, -241, -242, -243, -244, 
-    -245, -246, -247, -248, -249, -250, -251, -252, 
-    -253, -254, -255, -256, -257, -258, -259, -260, 
-    -261, -262, -263, -264, -265, -266, -267, -268, 
-    -269, -270, -271, -272, -273, -274, -275, -276, 
-    -277, -278, -279, -280, -281, -282, -283, -284, 
-    -285, -286, -287, -288, -289, -290, -291, -292, 
-    -293, -294, -295, -296, -297, -298, -299, -300, 
-    -301, -302, -303, -304, -305, -306, -307, -308, 
-    -309, -310, -311, -312, -313, -314, -315, -316, 
-    -317, -318, -319, -320, -321, -322, -323, -324, 
-    -325, -326, -327, -328, -329, -330, -331, -332, 
-    -333, -334, -335, -336, -337, -338, -339, -340, 
-    -341, -342, -343, -344, -345, -346, -347, -348, 
-    -349, -350, -351, -352, -353, -354, -355, -356, 
-    -357, -358, -359, -360, -361, -362, -363, -364, 
-    -365, -366, -367, -368, -369, -370, -371, -372, 
-    -373, -374, -375, -376, -377, -378, -379, -380, 
-    -381, -382, -383, -384, -385, -386, -387, -388, 
-    -389, -390, -391, -392, -393, -394, -395, -396, 
-    -397, -398, -399, -400, -401, -402, -403, -404, 
-    -405, -406, -407, -408, -409, -410, -411, -412, 
-    -413, -414, -415, -416, -417, -418, -419, -420, 
-    -421, -422, -423, -424, -425, -426, -427, -428, 
-    -429, -430, -431, -432, -433, -434, -435, -436, 
-    -437, -438, -439, -440, -441, -442, -443, -444, 
-    -445, -446, -447, -448, -449, -450, -451, -452, 
-    -453, -454, -455, -456, -457, -458, -459, -460, 
-    -461, -462, -463, -464, -465, -466, -467, -468, 
-    -469, -470, -471, -472, -473, -474, -475, -476, 
-    -477, -478, -479, -480, -481, -482, -483, -484, 
-    -485, -486, -487, -488, -489, -490, -491, -492, 
-    -493, -494, -495, -496, -497, -498, -499, -500, 
-    -501, -502, -503, -504, -505, -506, -507, -508, 
-    -509, -510, -511, -512, -513, -514, -515, -516, 
-    -517, -518, -519, -520, -521, -522, -523, -524, 
-    -525, -526, -527, -528, -529, -530, -531, -532, 
-    -533, -534, -535, -536, -537, -538, -539, -540, 
-    -541, -542, -543, -544, -545, -546, -547, -548, 
-    -549, -550, -551, -552, -553, -554, -555, -556, 
-    -557, -558, -559, -560, -561, -562, -563, -564, 
-    -565, -566, -567, -568, -569, -570, -571, -572, 
+    -69, -70, -71, -72, -73, -74, -75, -76,
+    -77, -78, -79, -80, -81, -82, -83, -84,
+    -85, -86, -87, -88, -89, -90, -91, -92,
+    -93, -94, -95, -96, -97, -98, -99, -100,
+    -101, -102, -103, -104, -105, -106, -107, -108,
+    -109, -110, -111, -112, -113, -114, -115, -116,
+    -117, -118, -119, -120, -121, -122, -123, -124,
+    -125, -126, -127, -128, -129, -130, -131, -132,
+    -133, -134, -135, -136, -137, -138, -139, -140,
+    -141, -142, -143, -144, -145, -146, -147, -148,
+    -149, -150, -151, -152, -153, -154, -155, -156,
+    -157, -158, -159, -160, -161, -162, -163, -164,
+    -165, -166, -167, -168, -169, -170, -171, -172,
+    -173, -174, -175, -176, -177, -178, -179, -180,
+    -181, -182, -183, -184, -185, -186, -187, -188,
+    -189, -190, -191, -192, -193, -194, -195, -196,
+    -197, -198, -199, -200, -201, -202, -203, -204,
+    -205, -206, -207, -208, -209, -210, -211, -212,
+    -213, -214, -215, -216, -217, -218, -219, -220,
+    -221, -222, -223, -224, -225, -226, -227, -228,
+    -229, -230, -231, -232, -233, -234, -235, -236,
+    -237, -238, -239, -240, -241, -242, -243, -244,
+    -245, -246, -247, -248, -249, -250, -251, -252,
+    -253, -254, -255, -256, -257, -258, -259, -260,
+    -261, -262, -263, -264, -265, -266, -267, -268,
+    -269, -270, -271, -272, -273, -274, -275, -276,
+    -277, -278, -279, -280, -281, -282, -283, -284,
+    -285, -286, -287, -288, -289, -290, -291, -292,
+    -293, -294, -295, -296, -297, -298, -299, -300,
+    -301, -302, -303, -304, -305, -306, -307, -308,
+    -309, -310, -311, -312, -313, -314, -315, -316,
+    -317, -318, -319, -320, -321, -322, -323, -324,
+    -325, -326, -327, -328, -329, -330, -331, -332,
+    -333, -334, -335, -336, -337, -338, -339, -340,
+    -341, -342, -343, -344, -345, -346, -347, -348,
+    -349, -350, -351, -352, -353, -354, -355, -356,
+    -357, -358, -359, -360, -361, -362, -363, -364,
+    -365, -366, -367, -368, -369, -370, -371, -372,
+    -373, -374, -375, -376, -377, -378, -379, -380,
+    -381, -382, -383, -384, -385, -386, -387, -388,
+    -389, -390, -391, -392, -393, -394, -395, -396,
+    -397, -398, -399, -400, -401, -402, -403, -404,
+    -405, -406, -407, -408, -409, -410, -411, -412,
+    -413, -414, -415, -416, -417, -418, -419, -420,
+    -421, -422, -423, -424, -425, -426, -427, -428,
+    -429, -430, -431, -432, -433, -434, -435, -436,
+    -437, -438, -439, -440, -441, -442, -443, -444,
+    -445, -446, -447, -448, -449, -450, -451, -452,
+    -453, -454, -455, -456, -457, -458, -459, -460,
+    -461, -462, -463, -464, -465, -466, -467, -468,
+    -469, -470, -471, -472, -473, -474, -475, -476,
+    -477, -478, -479, -480, -481, -482, -483, -484,
+    -485, -486, -487, -488, -489, -490, -491, -492,
+    -493, -494, -495, -496, -497, -498, -499, -500,
+    -501, -502, -503, -504, -505, -506, -507, -508,
+    -509, -510, -511, -512, -513, -514, -515, -516,
+    -517, -518, -519, -520, -521, -522, -523, -524,
+    -525, -526, -527, -528, -529, -530, -531, -532,
+    -533, -534, -535, -536, -537, -538, -539, -540,
+    -541, -542, -543, -544, -545, -546, -547, -548,
+    -549, -550, -551, -552, -553, -554, -555, -556,
+    -557, -558, -559, -560, -561, -562, -563, -564,
+    -565, -566, -567, -568, -569, -570, -571, -572,
     -573, -574, -575, -576, -577, -578, -579, -580
 };
 
diff --git a/src/libffmpeg/libavcodec/vp3dsp.c b/src/libffmpeg/libavcodec/vp3dsp.c
index 1fa6d094b..0cbe8d551 100644
--- a/src/libffmpeg/libavcodec/vp3dsp.c
+++ b/src/libffmpeg/libavcodec/vp3dsp.c
@@ -13,12 +13,12 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file vp3dsp.c
- * Standard C DSP-oriented functions cribbed from the original VP3 
+ * Standard C DSP-oriented functions cribbed from the original VP3
  * source code.
  */
 
@@ -45,7 +45,7 @@ static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int typ
     int t1, t2;
 
     int i;
-    
+
     /* Inverse DCT on the rows now */
     for (i = 0; i < 8; i++) {
         /* Check for non-zero values */
@@ -134,7 +134,7 @@ static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int typ
 
         ip += 8;            /* next row */
     }
-    
+
     ip = input;
 
     for ( i = 0; i < 8; i++) {
@@ -224,49 +224,49 @@ static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int typ
             if(type==0){
                 ip[0*8] = (_Gd + _Cd )  >> 4;
                 ip[7*8] = (_Gd - _Cd )  >> 4;
-    
+
                 ip[1*8] = (_Add + _Hd ) >> 4;
                 ip[2*8] = (_Add - _Hd ) >> 4;
-    
+
                 ip[3*8] = (_Ed + _Dd )  >> 4;
                 ip[4*8] = (_Ed - _Dd )  >> 4;
-    
+
                 ip[5*8] = (_Fd + _Bdd ) >> 4;
                 ip[6*8] = (_Fd - _Bdd ) >> 4;
             }else if(type==1){
                 dst[0*stride] = cm[(_Gd + _Cd )  >> 4];
                 dst[7*stride] = cm[(_Gd - _Cd )  >> 4];
-    
+
                 dst[1*stride] = cm[(_Add + _Hd ) >> 4];
                 dst[2*stride] = cm[(_Add - _Hd ) >> 4];
-    
+
                 dst[3*stride] = cm[(_Ed + _Dd )  >> 4];
                 dst[4*stride] = cm[(_Ed - _Dd )  >> 4];
-    
+
                 dst[5*stride] = cm[(_Fd + _Bdd ) >> 4];
                 dst[6*stride] = cm[(_Fd - _Bdd ) >> 4];
             }else{
                 dst[0*stride] = cm[dst[0*stride] + ((_Gd + _Cd )  >> 4)];
                 dst[7*stride] = cm[dst[7*stride] + ((_Gd - _Cd )  >> 4)];
-    
+
                 dst[1*stride] = cm[dst[1*stride] + ((_Add + _Hd ) >> 4)];
                 dst[2*stride] = cm[dst[2*stride] + ((_Add - _Hd ) >> 4)];
-    
+
                 dst[3*stride] = cm[dst[3*stride] + ((_Ed + _Dd )  >> 4)];
                 dst[4*stride] = cm[dst[4*stride] + ((_Ed - _Dd )  >> 4)];
-    
+
                 dst[5*stride] = cm[dst[5*stride] + ((_Fd + _Bdd ) >> 4)];
                 dst[6*stride] = cm[dst[6*stride] + ((_Fd - _Bdd ) >> 4)];
             }
 
         } else {
             if(type==0){
-                ip[0*8] = 
-                ip[1*8] = 
-                ip[2*8] = 
-                ip[3*8] = 
-                ip[4*8] = 
-                ip[5*8] = 
+                ip[0*8] =
+                ip[1*8] =
+                ip[2*8] =
+                ip[3*8] =
+                ip[4*8] =
+                ip[5*8] =
                 ip[6*8] =
                 ip[7*8] = ((xC4S4 * ip[0*8] + (IdctAdjustBeforeShift<<16))>>20);
             }else if(type==1){
@@ -301,7 +301,7 @@ static always_inline void idct(uint8_t *dst, int stride, int16_t *input, int typ
 void ff_vp3_idct_c(DCTELEM *block/* align 16*/){
     idct(NULL, 0, block, 0);
 }
-    
+
 void ff_vp3_idct_put_c(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/){
     idct(dest, line_size, block, 1);
 }
diff --git a/src/libffmpeg/libavcodec/vqavideo.c b/src/libffmpeg/libavcodec/vqavideo.c
index fb0871e18..7f0c95206 100644
--- a/src/libffmpeg/libavcodec/vqavideo.c
+++ b/src/libffmpeg/libavcodec/vqavideo.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -456,7 +456,7 @@ static void vqa_decode_chunk(VqaContext *s)
         index_shift = 4;
     else
         index_shift = 3;
-    for (y = 0; y < s->frame.linesize[0] * s->height; 
+    for (y = 0; y < s->frame.linesize[0] * s->height;
         y += s->frame.linesize[0] * s->vector_height) {
 
         for (x = y; x < y + s->width; x += 4, lobytes++, hibytes++) {
@@ -467,7 +467,7 @@ static void vqa_decode_chunk(VqaContext *s)
             switch (s->vqa_version) {
 
             case 1:
-/* still need sample media for this case (only one game, "Legend of 
+/* still need sample media for this case (only one game, "Legend of
  * Kyrandia III : Malcolm's Revenge", is known to use this version) */
                 lines = 0;
                 break;
@@ -517,7 +517,7 @@ static void vqa_decode_chunk(VqaContext *s)
         if (s->partial_countdown == 0) {
 
             /* time to replace codebook */
-            memcpy(s->codebook, s->next_codebook_buffer, 
+            memcpy(s->codebook, s->next_codebook_buffer,
                 s->next_codebook_buffer_index);
 
             /* reset accounting */
@@ -540,8 +540,8 @@ static void vqa_decode_chunk(VqaContext *s)
         if (s->partial_countdown == 0) {
 
             /* decompress codebook */
-            decode_format80(s->next_codebook_buffer, 
-                s->next_codebook_buffer_index, 
+            decode_format80(s->next_codebook_buffer,
+                s->next_codebook_buffer_index,
                 s->codebook, s->codebook_size, 0);
 
             /* reset accounting */
diff --git a/src/libffmpeg/libavcodec/wmadata.h b/src/libffmpeg/libavcodec/wmadata.h
index ee1720710..e12c4792e 100644
--- a/src/libffmpeg/libavcodec/wmadata.h
+++ b/src/libffmpeg/libavcodec/wmadata.h
@@ -1389,24 +1389,24 @@ static const uint16_t levels5[40] = {
   1,  1,  1,  1,  1,  1,  1,  1,
   1,  1,  1,  1,  1,  1,  1,  1,
 };
-    
+
 static const CoefVLCTable coef_vlcs[6] = {
-    { 
+    {
         sizeof(coef0_huffbits), coef0_huffcodes, coef0_huffbits, levels0,
     },
-    { 
+    {
         sizeof(coef1_huffbits), coef1_huffcodes, coef1_huffbits, levels1,
     },
-    { 
+    {
         sizeof(coef2_huffbits), coef2_huffcodes, coef2_huffbits, levels2,
     },
-    { 
+    {
         sizeof(coef3_huffbits), coef3_huffcodes, coef3_huffbits, levels3,
     },
-    { 
+    {
         sizeof(coef4_huffbits), coef4_huffcodes, coef4_huffbits, levels4,
     },
-    { 
+    {
         sizeof(coef5_huffbits), coef5_huffcodes, coef5_huffbits, levels5,
     },
 };
diff --git a/src/libffmpeg/libavcodec/wmadec.c b/src/libffmpeg/libavcodec/wmadec.c
index 9ea685af1..c557a2a7a 100644
--- a/src/libffmpeg/libavcodec/wmadec.c
+++ b/src/libffmpeg/libavcodec/wmadec.c
@@ -14,20 +14,20 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 
 /**
  * @file wmadec.c
  * WMA compatible decoder.
  * This decoder handles Microsoft Windows Media Audio data, versions 1 & 2.
- * WMA v1 is identified by audio format 0x160 in Microsoft media files 
+ * WMA v1 is identified by audio format 0x160 in Microsoft media files
  * (ASF/AVI/WAV). WMA v2 is identified by audio format 0x161.
  *
  * To use this decoder, a calling application must supply the extra data
  * bytes provided with the WMA data. These are the extra, codec-specific
- * bytes at the end of a WAVEFORMATEX data structure. Transmit these bytes 
- * to the decoder using the extradata[_size] fields in AVCodecContext. There 
+ * bytes at the end of a WAVEFORMATEX data structure. Transmit these bytes
+ * to the decoder using the extradata[_size] fields in AVCodecContext. There
  * should be 4 extra bytes for v1 data and 6 extra bytes for v2 data.
  */
 
@@ -56,6 +56,8 @@
 
 #define LSP_POW_BITS 7
 
+#define VLCBITS 9
+
 typedef struct WMADecodeContext {
     GetBitContext gb;
     int sample_rate;
@@ -75,9 +77,9 @@ typedef struct WMADecodeContext {
     int coefs_start;               /* first coded coef */
     int coefs_end[BLOCK_NB_SIZES]; /* max number of coded coefficients */
     int exponent_high_sizes[BLOCK_NB_SIZES];
-    int exponent_high_bands[BLOCK_NB_SIZES][HIGH_BAND_MAX_SIZE]; 
+    int exponent_high_bands[BLOCK_NB_SIZES][HIGH_BAND_MAX_SIZE];
     VLC hgain_vlc;
-    
+
     /* coded values in high bands */
     int high_band_coded[MAX_CHANNELS][HIGH_BAND_MAX_SIZE];
     int high_band_values[MAX_CHANNELS][HIGH_BAND_MAX_SIZE];
@@ -100,15 +102,15 @@ typedef struct WMADecodeContext {
     int block_pos; /* current position in frame */
     uint8_t ms_stereo; /* true if mid/side stereo mode */
     uint8_t channel_coded[MAX_CHANNELS]; /* true if channel is coded */
-    float exponents[MAX_CHANNELS][BLOCK_MAX_SIZE] __attribute__((aligned(16)));
+    DECLARE_ALIGNED_16(float, exponents[MAX_CHANNELS][BLOCK_MAX_SIZE]);
     float max_exponent[MAX_CHANNELS];
     int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE];
-    float coefs[MAX_CHANNELS][BLOCK_MAX_SIZE] __attribute__((aligned(16)));
+    DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]);
     MDCTContext mdct_ctx[BLOCK_NB_SIZES];
     float *windows[BLOCK_NB_SIZES];
-    FFTSample mdct_tmp[BLOCK_MAX_SIZE] __attribute__((aligned(16))); /* temporary storage for imdct */
+    DECLARE_ALIGNED_16(FFTSample, mdct_tmp[BLOCK_MAX_SIZE]); /* temporary storage for imdct */
     /* output buffer for one frame and the last for IMDCT windowing */
-    float frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2] __attribute__((aligned(16)));
+    DECLARE_ALIGNED_16(float, frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2]);
     /* last frame info */
     uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */
     int last_bitoffset;
@@ -171,7 +173,7 @@ static void dump_floats(const char *name, int prec, const float *tab, int n)
 #endif
 
 /* XXX: use same run/length optimization as mpeg decoders */
-static void init_coef_vlc(VLC *vlc, 
+static void init_coef_vlc(VLC *vlc,
                           uint16_t **prun_table, uint16_t **plevel_table,
                           const CoefVLCTable *vlc_table)
 {
@@ -213,7 +215,7 @@ static int wma_decode_init(AVCodecContext * avctx)
     volatile float bps;
     int sample_rate1;
     int coef_vlc_table;
-    
+
     s->sample_rate = avctx->sample_rate;
     s->nb_channels = avctx->channels;
     s->bit_rate = avctx->bit_rate;
@@ -224,7 +226,7 @@ static int wma_decode_init(AVCodecContext * avctx)
     } else {
         s->version = 2;
     }
-    
+
     /* extract flag infos */
     flags1 = 0;
     flags2 = 0;
@@ -233,7 +235,7 @@ static int wma_decode_init(AVCodecContext * avctx)
         flags1 = extradata[0] | (extradata[1] << 8);
         flags2 = extradata[2] | (extradata[3] << 8);
     } else if (s->version == 2 && avctx->extradata_size >= 6) {
-        flags1 = extradata[0] | (extradata[1] << 8) | 
+        flags1 = extradata[0] | (extradata[1] << 8) |
             (extradata[2] << 16) | (extradata[3] << 24);
         flags2 = extradata[4] | (extradata[5] << 8);
     }
@@ -244,7 +246,7 @@ static int wma_decode_init(AVCodecContext * avctx)
     /* compute MDCT block size */
     if (s->sample_rate <= 16000) {
         s->frame_len_bits = 9;
-    } else if (s->sample_rate <= 22050 || 
+    } else if (s->sample_rate <= 22050 ||
                (s->sample_rate <= 32000 && s->version == 1)) {
         s->frame_len_bits = 10;
     } else {
@@ -271,20 +273,20 @@ static int wma_decode_init(AVCodecContext * avctx)
     /* if version 2, then the rates are normalized */
     sample_rate1 = s->sample_rate;
     if (s->version == 2) {
-        if (sample_rate1 >= 44100) 
+        if (sample_rate1 >= 44100)
             sample_rate1 = 44100;
-        else if (sample_rate1 >= 22050) 
+        else if (sample_rate1 >= 22050)
             sample_rate1 = 22050;
-        else if (sample_rate1 >= 16000) 
+        else if (sample_rate1 >= 16000)
             sample_rate1 = 16000;
-        else if (sample_rate1 >= 11025) 
+        else if (sample_rate1 >= 11025)
             sample_rate1 = 11025;
-        else if (sample_rate1 >= 8000) 
+        else if (sample_rate1 >= 8000)
             sample_rate1 = 8000;
     }
 
     bps = (float)s->bit_rate / (float)(s->nb_channels * s->sample_rate);
-    s->byte_offset_bits = av_log2((int)(bps * s->frame_len / 8.0)) + 2;
+    s->byte_offset_bits = av_log2((int)(bps * s->frame_len / 8.0 + 0.5)) + 2;
 
     /* compute high frequency value and choose if noise coding should
        be activated */
@@ -299,7 +301,7 @@ static int wma_decode_init(AVCodecContext * avctx)
     } else if (sample_rate1 == 22050) {
         if (bps1 >= 1.16)
             s->use_noise_coding = 0;
-        else if (bps1 >= 0.72) 
+        else if (bps1 >= 0.72)
             high_freq = high_freq * 0.7;
         else
             high_freq = high_freq * 0.6;
@@ -329,9 +331,9 @@ static int wma_decode_init(AVCodecContext * avctx)
     }
     dprintf("flags1=0x%x flags2=0x%x\n", flags1, flags2);
     dprintf("version=%d channels=%d sample_rate=%d bitrate=%d block_align=%d\n",
-           s->version, s->nb_channels, s->sample_rate, s->bit_rate, 
+           s->version, s->nb_channels, s->sample_rate, s->bit_rate,
            s->block_align);
-    dprintf("bps=%f bps1=%f high_freq=%f bitoffset=%d\n", 
+    dprintf("bps=%f bps1=%f high_freq=%f bitoffset=%d\n",
            bps, bps1, high_freq, s->byte_offset_bits);
     dprintf("use_noise_coding=%d use_exp_vlc=%d nb_block_sizes=%d\n",
            s->use_noise_coding, s->use_exp_vlc, s->nb_block_sizes);
@@ -340,7 +342,7 @@ static int wma_decode_init(AVCodecContext * avctx)
     {
         int a, b, pos, lpos, k, block_len, i, j, n;
         const uint8_t *table;
-        
+
         if (s->version == 1) {
             s->coefs_start = 3;
         } else {
@@ -355,7 +357,7 @@ static int wma_decode_init(AVCodecContext * avctx)
                     a = wma_critical_freqs[i];
                     b = s->sample_rate;
                     pos = ((block_len * 2 * a)  + (b >> 1)) / b;
-                    if (pos > block_len) 
+                    if (pos > block_len)
                         pos = block_len;
                     s->exponent_bands[0][i] = pos - lpos;
                     if (pos >= block_len) {
@@ -390,7 +392,7 @@ static int wma_decode_init(AVCodecContext * avctx)
                         b = s->sample_rate;
                         pos = ((block_len * 2 * a)  + (b << 1)) / (4 * b);
                         pos <<= 2;
-                        if (pos > block_len) 
+                        if (pos > block_len)
                             pos = block_len;
                         if (pos > lpos)
                             s->exponent_bands[k][j++] = pos - lpos;
@@ -405,7 +407,7 @@ static int wma_decode_init(AVCodecContext * avctx)
             /* max number of coefs */
             s->coefs_end[k] = (s->frame_len - ((s->frame_len * 9) / 100)) >> k;
             /* high freq computation */
-            s->high_band_start[k] = (int)((block_len * 2 * high_freq) / 
+            s->high_band_start[k] = (int)((block_len * 2 * high_freq) /
                                           s->sample_rate + 0.5);
             n = s->exponent_sizes[k];
             j = 0;
@@ -425,7 +427,7 @@ static int wma_decode_init(AVCodecContext * avctx)
             s->exponent_high_sizes[k] = j;
 #if 0
             tprintf("%5d: coefs_end=%d high_band_start=%d nb_high_bands=%d: ",
-                  s->frame_len >> k, 
+                  s->frame_len >> k,
                   s->coefs_end[k],
                   s->high_band_start[k],
                   s->exponent_high_sizes[k]);
@@ -440,8 +442,8 @@ static int wma_decode_init(AVCodecContext * avctx)
     {
         int i, j;
         for(i = 0; i < s->nb_block_sizes; i++) {
-            tprintf("%5d: n=%2d:", 
-                   s->frame_len >> i, 
+            tprintf("%5d: n=%2d:",
+                   s->frame_len >> i,
                    s->exponent_sizes[i]);
             for(j=0;j<s->exponent_sizes[i];j++)
                 tprintf(" %d", s->exponent_bands[i][j]);
@@ -453,7 +455,7 @@ static int wma_decode_init(AVCodecContext * avctx)
     /* init MDCT */
     for(i = 0; i < s->nb_block_sizes; i++)
         ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1);
-    
+
     /* init MDCT windows : simple sinus window */
     for(i = 0; i < s->nb_block_sizes; i++) {
         int n, j;
@@ -468,7 +470,7 @@ static int wma_decode_init(AVCodecContext * avctx)
     }
 
     s->reset_block_lengths = 1;
-    
+
     if (s->use_noise_coding) {
 
         /* init the noise generator */
@@ -476,7 +478,7 @@ static int wma_decode_init(AVCodecContext * avctx)
             s->noise_mult = 0.02;
         else
             s->noise_mult = 0.04;
-               
+
 #ifdef TRACE
         for(i=0;i<NOISE_TAB_SIZE;i++)
             s->noise_table[i] = 1.0 * s->noise_mult;
@@ -492,13 +494,13 @@ static int wma_decode_init(AVCodecContext * avctx)
             }
         }
 #endif
-        init_vlc(&s->hgain_vlc, 9, sizeof(hgain_huffbits), 
+        init_vlc(&s->hgain_vlc, 9, sizeof(hgain_huffbits),
                  hgain_huffbits, 1, 1,
                  hgain_huffcodes, 2, 2, 0);
     }
 
     if (s->use_exp_vlc) {
-        init_vlc(&s->exp_vlc, 9, sizeof(scale_huffbits), 
+        init_vlc(&s->exp_vlc, 9, sizeof(scale_huffbits),
                  scale_huffbits, 1, 1,
                  scale_huffcodes, 4, 4, 0);
     } else {
@@ -572,7 +574,7 @@ static inline float pow_m1_4(WMADecodeContext *s, float x)
 }
 
 static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len)
-{  
+{
     float wdel, a, b;
     int i, e, m;
 
@@ -610,8 +612,8 @@ static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len)
 
 /* NOTE: We use the same code as Vorbis here */
 /* XXX: optimize it further with SSE/3Dnow */
-static void wma_lsp_to_curve(WMADecodeContext *s, 
-                             float *out, float *val_max_ptr, 
+static void wma_lsp_to_curve(WMADecodeContext *s,
+                             float *out, float *val_max_ptr,
                              int n, float *lsp)
 {
     int i, j;
@@ -661,7 +663,7 @@ static int decode_exp_vlc(WMADecodeContext *s, int ch)
     int last_exp, n, code;
     const uint16_t *ptr, *band_ptr;
     float v, *q, max_scale, *q_end;
-    
+
     band_ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits];
     ptr = band_ptr;
     q = s->exponents[ch];
@@ -679,7 +681,7 @@ static int decode_exp_vlc(WMADecodeContext *s, int ch)
     }
     last_exp = 36;
     while (q < q_end) {
-        code = get_vlc(&s->gb, &s->exp_vlc);
+        code = get_vlc2(&s->gb, s->exp_vlc.table, VLCBITS, 2);
         if (code < 0)
             return -1;
         /* NOTE: this offset is the same as MPEG4 AAC ! */
@@ -719,7 +721,7 @@ static int wma_decode_block(WMADecodeContext *s)
     /* compute current block length */
     if (s->use_variable_block_len) {
         n = av_log2(s->nb_block_sizes - 1) + 1;
-    
+
         if (s->reset_block_lengths) {
             s->reset_block_lengths = 0;
             v = get_bits(&s->gb, n);
@@ -776,7 +778,7 @@ static int wma_decode_block(WMADecodeContext *s)
         if (a != 127)
             break;
     }
-    
+
     if (total_gain < 15)
         coef_nb_bits = 13;
     else if (total_gain < 32)
@@ -820,7 +822,7 @@ static int wma_decode_block(WMADecodeContext *s)
                         if (val == (int)0x80000000) {
                             val = get_bits(&s->gb, 7) - 19;
                         } else {
-                            code = get_vlc(&s->gb, &s->hgain_vlc);
+                            code = get_vlc2(&s->gb, s->hgain_vlc.table, VLCBITS, 2);
                             if (code < 0)
                                 return -1;
                             val += code - 18;
@@ -831,13 +833,13 @@ static int wma_decode_block(WMADecodeContext *s)
             }
         }
     }
-           
+
     /* exposant can be interpolated in short blocks. */
     parse_exponents = 1;
     if (s->block_len_bits != s->frame_len_bits) {
         parse_exponents = get_bits(&s->gb, 1);
     }
-    
+
     if (parse_exponents) {
         for(ch = 0; ch < s->nb_channels; ch++) {
             if (s->channel_coded[ch]) {
@@ -852,7 +854,7 @@ static int wma_decode_block(WMADecodeContext *s)
     } else {
         for(ch = 0; ch < s->nb_channels; ch++) {
             if (s->channel_coded[ch]) {
-                interpolate_array(s->exponents[ch], 1 << s->prev_block_len_bits, 
+                interpolate_array(s->exponents[ch], 1 << s->prev_block_len_bits,
                                   s->block_len);
             }
         }
@@ -877,7 +879,7 @@ static int wma_decode_block(WMADecodeContext *s)
             eptr = ptr + nb_coefs[ch];
             memset(ptr, 0, s->block_len * sizeof(int16_t));
             for(;;) {
-                code = get_vlc(&s->gb, coef_vlc);
+                code = get_vlc2(&s->gb, coef_vlc->table, VLCBITS, 3);
                 if (code < 0)
                     return -1;
                 if (code == 1) {
@@ -910,7 +912,7 @@ static int wma_decode_block(WMADecodeContext *s)
             align_get_bits(&s->gb);
         }
     }
-     
+
     /* normalize */
     {
         int n4 = s->block_len / 2;
@@ -940,16 +942,16 @@ static int wma_decode_block(WMADecodeContext *s)
                     *coefs++ = s->noise_table[s->noise_index] * (*exponents++) * mult1;
                     s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1);
                 }
-                
+
                 n1 = s->exponent_high_sizes[bsize];
 
                 /* compute power of high bands */
-                exp_ptr = exponents + 
-                    s->high_band_start[bsize] - 
+                exp_ptr = exponents +
+                    s->high_band_start[bsize] -
                     s->coefs_start;
                 last_high_band = 0; /* avoid warning */
                 for(j=0;j<n1;j++) {
-                    n = s->exponent_high_bands[s->frame_len_bits - 
+                    n = s->exponent_high_bands[s->frame_len_bits -
                                               s->block_len_bits][j];
                     if (s->high_band_coded[ch][j]) {
                         float e2, v;
@@ -968,10 +970,10 @@ static int wma_decode_block(WMADecodeContext *s)
                 /* main freqs and high freqs */
                 for(j=-1;j<n1;j++) {
                     if (j < 0) {
-                        n = s->high_band_start[bsize] - 
+                        n = s->high_band_start[bsize] -
                             s->coefs_start;
                     } else {
-                        n = s->exponent_high_bands[s->frame_len_bits - 
+                        n = s->exponent_high_bands[s->frame_len_bits -
                                                   s->block_len_bits][j];
                     }
                     if (j >= 0 && s->high_band_coded[ch][j]) {
@@ -1026,7 +1028,7 @@ static int wma_decode_block(WMADecodeContext *s)
         }
     }
 #endif
-    
+
     if (s->ms_stereo && s->channel_coded[1]) {
         float a, b;
         int i;
@@ -1039,7 +1041,7 @@ static int wma_decode_block(WMADecodeContext *s)
             memset(s->coefs[0], 0, sizeof(float) * s->block_len);
             s->channel_coded[0] = 1;
         }
-        
+
         for(i = 0; i < s->block_len; i++) {
             a = s->coefs[0][i];
             b = s->coefs[1][i];
@@ -1092,16 +1094,16 @@ static int wma_decode_block(WMADecodeContext *s)
         }
     }
 
-    
+
     for(ch = 0; ch < s->nb_channels; ch++) {
         if (s->channel_coded[ch]) {
-            FFTSample output[BLOCK_MAX_SIZE * 2] __attribute__((aligned(16)));
+            DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]);
             float *ptr;
             int i, n4, index, n;
 
             n = s->block_len;
             n4 = s->block_len / 2;
-            ff_imdct_calc(&s->mdct_ctx[bsize], 
+            ff_imdct_calc(&s->mdct_ctx[bsize],
                           output, s->coefs[ch], s->mdct_tmp);
 
             /* XXX: optimize all that by build the window and
@@ -1156,7 +1158,7 @@ static int wma_decode_frame(WMADecodeContext *s, int16_t *samples)
     s->block_pos = 0;
     for(;;) {
         ret = wma_decode_block(s);
-        if (ret < 0) 
+        if (ret < 0)
             return -1;
         if (ret)
             break;
@@ -1182,7 +1184,7 @@ static int wma_decode_frame(WMADecodeContext *s, int16_t *samples)
         memmove(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len],
                 s->frame_len * sizeof(float));
         /* XXX: suppress this */
-        memset(&s->frame_out[ch][s->frame_len], 0, 
+        memset(&s->frame_out[ch][s->frame_len], 0,
                s->frame_len * sizeof(float));
     }
 
@@ -1192,7 +1194,7 @@ static int wma_decode_frame(WMADecodeContext *s, int16_t *samples)
     return 0;
 }
 
-static int wma_decode_superframe(AVCodecContext *avctx, 
+static int wma_decode_superframe(AVCodecContext *avctx,
                                  void *data, int *data_size,
                                  uint8_t *buf, int buf_size)
 {
@@ -1200,18 +1202,18 @@ static int wma_decode_superframe(AVCodecContext *avctx,
     int nb_frames, bit_offset, i, pos, len;
     uint8_t *q;
     int16_t *samples;
-    
+
     tprintf("***decode_superframe:\n");
 
     if(buf_size==0){
         s->last_superframe_len = 0;
         return 0;
     }
-    
+
     samples = data;
 
     init_get_bits(&s->gb, buf, buf_size*8);
-    
+
     if (s->use_bit_reservoir) {
         /* read super frame header */
         get_bits(&s->gb, 4); /* super frame index */
@@ -1222,7 +1224,7 @@ static int wma_decode_superframe(AVCodecContext *avctx,
         if (s->last_superframe_len > 0) {
             //        printf("skip=%d\n", s->last_bitoffset);
             /* add bit_offset bits to last frame */
-            if ((s->last_superframe_len + ((bit_offset + 7) >> 3)) > 
+            if ((s->last_superframe_len + ((bit_offset + 7) >> 3)) >
                 MAX_CODED_SUPERFRAME_SIZE)
                 goto fail;
             q = s->last_superframe + s->last_superframe_len;
@@ -1234,7 +1236,7 @@ static int wma_decode_superframe(AVCodecContext *avctx,
             if (len > 0) {
                 *q++ = (get_bits)(&s->gb, len) << (8 - len);
             }
-            
+
             /* XXX: bit_offset bits into last frame */
             init_get_bits(&s->gb, s->last_superframe, MAX_CODED_SUPERFRAME_SIZE*8);
             /* skip unused bits */
@@ -1253,7 +1255,7 @@ static int wma_decode_superframe(AVCodecContext *avctx,
         len = pos & 7;
         if (len > 0)
             skip_bits(&s->gb, len);
-    
+
         s->reset_block_lengths = 1;
         for(i=0;i<nb_frames;i++) {
             if (wma_decode_frame(s, samples) < 0)
@@ -1306,7 +1308,7 @@ static int wma_decode_end(AVCodecContext *avctx)
         av_free(s->run_table[i]);
         av_free(s->level_table[i]);
     }
-    
+
     return 0;
 }
 
diff --git a/src/libffmpeg/libavcodec/wmv2.c b/src/libffmpeg/libavcodec/wmv2.c
index 75e924cb6..dd88b7d28 100644
--- a/src/libffmpeg/libavcodec/wmv2.c
+++ b/src/libffmpeg/libavcodec/wmv2.c
@@ -13,7 +13,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -21,9 +21,9 @@
  * @file wmv2.c
  * wmv2 codec.
  */
- 
+
 #include "simple_idct.h"
- 
+
 #define SKIP_TYPE_NONE 0
 #define SKIP_TYPE_MPEG 1
 #define SKIP_TYPE_ROW  2
@@ -47,14 +47,14 @@ typedef struct Wmv2Context{
     int per_mb_rl_bit;
     int skip_type;
     int hshift;
-    
+
     ScanTable abt_scantable[2];
-    DCTELEM abt_block2[6][64] __align8;
+    DECLARE_ALIGNED_8(DCTELEM, abt_block2[6][64]);
 }Wmv2Context;
 
 static void wmv2_common_init(Wmv2Context * w){
     MpegEncContext * const s= &w->s;
-        
+
     ff_init_scantable(s->dsp.idct_permutation, &w->abt_scantable[0], wmv2_scantableA);
     ff_init_scantable(s->dsp.idct_permutation, &w->abt_scantable[1], wmv2_scantableB);
 }
@@ -65,12 +65,12 @@ static int encode_ext_header(Wmv2Context *w){
     MpegEncContext * const s= &w->s;
     PutBitContext pb;
     int code;
-        
+
     init_put_bits(&pb, s->avctx->extradata, s->avctx->extradata_size);
 
     put_bits(&pb, 5, s->avctx->time_base.den / s->avctx->time_base.num); //yes 29.97 -> 29
     put_bits(&pb, 11, FFMIN(s->bit_rate/1024, 2047));
-    
+
     put_bits(&pb, 1, w->mspel_bit=1);
     put_bits(&pb, 1, w->flag3=1);
     put_bits(&pb, 1, w->abt_flag=1);
@@ -78,38 +78,38 @@ static int encode_ext_header(Wmv2Context *w){
     put_bits(&pb, 1, w->top_left_mv_flag=0);
     put_bits(&pb, 1, w->per_mb_rl_bit=1);
     put_bits(&pb, 3, code=1);
-    
+
     flush_put_bits(&pb);
 
     s->slice_height = s->mb_height / code;
-    
+
     return 0;
 }
 
 static int wmv2_encode_init(AVCodecContext *avctx){
     Wmv2Context * const w= avctx->priv_data;
-    
+
     if(MPV_encode_init(avctx) < 0)
         return -1;
-    
+
     wmv2_common_init(w);
 
     avctx->extradata_size= 4;
     avctx->extradata= av_mallocz(avctx->extradata_size + 10);
     encode_ext_header(w);
-    
+
     return 0;
 }
 
 #if 0 /* unused, remove? */
 static int wmv2_encode_end(AVCodecContext *avctx){
-    
+
     if(MPV_encode_end(avctx) < 0)
         return -1;
-    
+
     avctx->extradata_size= 0;
     av_freep(&avctx->extradata);
-    
+
     return 0;
 }
 #endif
@@ -138,9 +138,9 @@ int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number)
     if (s->pict_type == I_TYPE) {
         assert(s->no_rounding==1);
         if(w->j_type_bit) put_bits(&s->pb, 1, w->j_type);
-        
+
         if(w->per_mb_rl_bit) put_bits(&s->pb, 1, s->per_mb_rl_table);
-        
+
         if(!s->per_mb_rl_table){
             code012(&s->pb, s->rl_chroma_table_index);
             code012(&s->pb, s->rl_table_index);
@@ -153,7 +153,7 @@ int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number)
         int cbp_index;
 
         put_bits(&s->pb, 2, SKIP_TYPE_NONE);
-        
+
         code012(&s->pb, cbp_index=0);
         if(s->qscale <= 10){
             int map[3]= {0,2,1};
@@ -167,7 +167,7 @@ int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number)
         }
 
         if(w->mspel_bit) put_bits(&s->pb, 1, s->mspel);
-    
+
         if(w->abt_flag){
             put_bits(&s->pb, 1, w->per_mb_abt^1);
             if(!w->per_mb_abt){
@@ -176,14 +176,14 @@ int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number)
         }
 
         if(w->per_mb_rl_bit) put_bits(&s->pb, 1, s->per_mb_rl_table);
-        
+
         if(!s->per_mb_rl_table){
             code012(&s->pb, s->rl_table_index);
             s->rl_chroma_table_index = s->rl_table_index;
         }
         put_bits(&s->pb, 1, s->dc_table_index);
         put_bits(&s->pb, 1, s->mv_table_index);
-    
+
         s->inter_intra_pred= 0;//(s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE);
     }
     s->esc3_level_length= 0;
@@ -194,7 +194,7 @@ int ff_wmv2_encode_picture_header(MpegEncContext * s, int picture_number)
 
 // nearly idential to wmv1 but thats just because we dont use the useless M$ crap features
 // its duplicated here in case someone wants to add support for these carp features
-void ff_wmv2_encode_mb(MpegEncContext * s, 
+void ff_wmv2_encode_mb(MpegEncContext * s,
                        DCTELEM block[6][64],
                        int motion_x, int motion_y)
 {
@@ -204,29 +204,29 @@ void ff_wmv2_encode_mb(MpegEncContext * s,
     uint8_t *coded_block;
 
     handle_slices(s);
-    
+
     if (!s->mb_intra) {
-	/* compute cbp */
+        /* compute cbp */
         set_stat(ST_INTER_MB);
-	cbp = 0;
-	for (i = 0; i < 6; i++) {
-	    if (s->block_last_index[i] >= 0)
-		cbp |= 1 << (5 - i);
-	}
-        
-        put_bits(&s->pb, 
-                 wmv2_inter_table[w->cbp_table_index][cbp + 64][1], 
+        cbp = 0;
+        for (i = 0; i < 6; i++) {
+            if (s->block_last_index[i] >= 0)
+                cbp |= 1 << (5 - i);
+        }
+
+        put_bits(&s->pb,
+                 wmv2_inter_table[w->cbp_table_index][cbp + 64][1],
                  wmv2_inter_table[w->cbp_table_index][cbp + 64][0]);
 
         /* motion vector */
         h263_pred_motion(s, 0, 0, &pred_x, &pred_y);
-        msmpeg4_encode_motion(s, motion_x - pred_x, 
+        msmpeg4_encode_motion(s, motion_x - pred_x,
                               motion_y - pred_y);
     } else {
-	/* compute cbp */
-	cbp = 0;
+        /* compute cbp */
+        cbp = 0;
         coded_cbp = 0;
-	for (i = 0; i < 6; i++) {
+        for (i = 0; i < 6; i++) {
             int val, pred;
             val = (s->block_last_index[i] >= 1);
             cbp |= val << (5 - i);
@@ -237,7 +237,7 @@ void ff_wmv2_encode_mb(MpegEncContext * s,
                 val = val ^ pred;
             }
             coded_cbp |= val << (5 - i);
-	}
+        }
 #if 0
         if (coded_cbp)
             printf("cbp=%x %x\n", cbp, coded_cbp);
@@ -245,15 +245,15 @@ void ff_wmv2_encode_mb(MpegEncContext * s,
 
         if (s->pict_type == I_TYPE) {
             set_stat(ST_INTRA_MB);
-            put_bits(&s->pb, 
+            put_bits(&s->pb,
                      ff_msmp4_mb_i_table[coded_cbp][1], ff_msmp4_mb_i_table[coded_cbp][0]);
         } else {
-            put_bits(&s->pb, 
-                     wmv2_inter_table[w->cbp_table_index][cbp][1], 
+            put_bits(&s->pb,
+                     wmv2_inter_table[w->cbp_table_index][cbp][1],
                      wmv2_inter_table[w->cbp_table_index][cbp][0]);
         }
         set_stat(ST_INTRA_MB);
-        put_bits(&s->pb, 1, 0);	/* no AC prediction yet */
+        put_bits(&s->pb, 1, 0);         /* no AC prediction yet */
         if(s->inter_intra_pred){
             s->h263_aic_dir=0;
             put_bits(&s->pb, table_inter_intra[s->h263_aic_dir][1], table_inter_intra[s->h263_aic_dir][0]);
@@ -323,7 +323,7 @@ static int decode_ext_header(Wmv2Context *w){
     int code;
 
     if(s->avctx->extradata_size<4) return -1;
-    
+
     init_get_bits(&gb, s->avctx->extradata, s->avctx->extradata_size*8);
 
     fps                = get_bits(&gb, 5);
@@ -335,14 +335,14 @@ static int decode_ext_header(Wmv2Context *w){
     w->top_left_mv_flag= get_bits1(&gb);
     w->per_mb_rl_bit   = get_bits1(&gb);
     code               = get_bits(&gb, 3);
-    
+
     if(code==0) return -1;
 
     s->slice_height = s->mb_height / code;
 
     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-        av_log(s->avctx, AV_LOG_DEBUG, "fps:%d, br:%d, qpbit:%d, abt_flag:%d, j_type_bit:%d, tl_mv_flag:%d, mbrl_bit:%d, code:%d, flag3:%d, slices:%d\n", 
-        fps, s->bit_rate, w->mspel_bit, w->abt_flag, w->j_type_bit, w->top_left_mv_flag, w->per_mb_rl_bit, code, w->flag3, 
+        av_log(s->avctx, AV_LOG_DEBUG, "fps:%d, br:%d, qpbit:%d, abt_flag:%d, j_type_bit:%d, tl_mv_flag:%d, mbrl_bit:%d, code:%d, flag3:%d, slices:%d\n",
+        fps, s->bit_rate, w->mspel_bit, w->abt_flag, w->j_type_bit, w->top_left_mv_flag, w->per_mb_rl_bit, code, w->flag3,
         code);
     }
     return 0;
@@ -374,7 +374,7 @@ return -1;
     s->chroma_qscale= s->qscale = get_bits(&s->gb, 5);
     if(s->qscale < 0)
        return -1;
-       
+
     return 0;
 }
 
@@ -385,11 +385,11 @@ int ff_wmv2_decode_secondary_picture_header(MpegEncContext * s)
     if (s->pict_type == I_TYPE) {
         if(w->j_type_bit) w->j_type= get_bits1(&s->gb);
         else              w->j_type= 0; //FIXME check
-        
+
         if(!w->j_type){
             if(w->per_mb_rl_bit) s->per_mb_rl_table= get_bits1(&s->gb);
             else                 s->per_mb_rl_table= 0;
-        
+
             if(!s->per_mb_rl_table){
                 s->rl_chroma_table_index = decode012(&s->gb);
                 s->rl_table_index = decode012(&s->gb);
@@ -400,11 +400,11 @@ int ff_wmv2_decode_secondary_picture_header(MpegEncContext * s)
         s->inter_intra_pred= 0;
         s->no_rounding = 1;
         if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-	    av_log(s->avctx, AV_LOG_DEBUG, "qscale:%d rlc:%d rl:%d dc:%d mbrl:%d j_type:%d \n", 
-		s->qscale,
-		s->rl_chroma_table_index,
-		s->rl_table_index, 
-		s->dc_table_index,
+            av_log(s->avctx, AV_LOG_DEBUG, "qscale:%d rlc:%d rl:%d dc:%d mbrl:%d j_type:%d \n",
+                s->qscale,
+                s->rl_chroma_table_index,
+                s->rl_table_index,
+                s->dc_table_index,
                 s->per_mb_rl_table,
                 w->j_type);
         }
@@ -427,7 +427,7 @@ int ff_wmv2_decode_secondary_picture_header(MpegEncContext * s)
 
         if(w->mspel_bit) s->mspel= get_bits1(&s->gb);
         else             s->mspel= 0; //FIXME check
-    
+
         if(w->abt_flag){
             w->per_mb_abt= get_bits1(&s->gb)^1;
             if(!w->per_mb_abt){
@@ -437,7 +437,7 @@ int ff_wmv2_decode_secondary_picture_header(MpegEncContext * s)
 
         if(w->per_mb_rl_bit) s->per_mb_rl_table= get_bits1(&s->gb);
         else                 s->per_mb_rl_table= 0;
-        
+
         if(!s->per_mb_rl_table){
             s->rl_table_index = decode012(&s->gb);
             s->rl_chroma_table_index = s->rl_table_index;
@@ -445,16 +445,16 @@ int ff_wmv2_decode_secondary_picture_header(MpegEncContext * s)
 
         s->dc_table_index = get_bits1(&s->gb);
         s->mv_table_index = get_bits1(&s->gb);
-    
+
         s->inter_intra_pred= 0;//(s->width*s->height < 320*240 && s->bit_rate<=II_BITRATE);
         s->no_rounding ^= 1;
-        
+
         if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-            av_log(s->avctx, AV_LOG_DEBUG, "rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d mspel:%d per_mb_abt:%d abt_type:%d cbp:%d ii:%d\n", 
-		s->rl_table_index, 
-		s->rl_chroma_table_index, 
-		s->dc_table_index,
-		s->mv_table_index,
+            av_log(s->avctx, AV_LOG_DEBUG, "rl:%d rlc:%d dc:%d mv:%d mbrl:%d qp:%d mspel:%d per_mb_abt:%d abt_type:%d cbp:%d ii:%d\n",
+                s->rl_table_index,
+                s->rl_chroma_table_index,
+                s->dc_table_index,
+                s->mv_table_index,
                 s->per_mb_rl_table,
                 s->qscale,
                 s->mspel,
@@ -466,7 +466,7 @@ int ff_wmv2_decode_secondary_picture_header(MpegEncContext * s)
     }
     s->esc3_level_length= 0;
     s->esc3_run_length= 0;
-    
+
 s->picture_number++; //FIXME ?
 
 
@@ -484,18 +484,18 @@ s->picture_number++; //FIXME ?
 static inline int wmv2_decode_motion(Wmv2Context *w, int *mx_ptr, int *my_ptr){
     MpegEncContext * const s= &w->s;
     int ret;
-   
+
     ret= msmpeg4_decode_motion(s, mx_ptr, my_ptr);
-   
+
     if(ret<0) return -1;
-   
+
     if((((*mx_ptr)|(*my_ptr)) & 1) && s->mspel)
         w->hshift= get_bits1(&s->gb);
-    else 
+    else
         w->hshift= 0;
 
 //printf("%d %d  ", *mx_ptr, *my_ptr);
-   
+
     return 0;
 }
 
@@ -512,17 +512,17 @@ static int16_t *wmv2_pred_motion(Wmv2Context *w, int *px, int *py){
     A = s->current_picture.motion_val[0][xy - 1];
     B = s->current_picture.motion_val[0][xy - wrap];
     C = s->current_picture.motion_val[0][xy + 2 - wrap];
-    
+
     if(s->mb_x && !s->first_slice_line && !s->mspel && w->top_left_mv_flag)
         diff= FFMAX(ABS(A[0] - B[0]), ABS(A[1] - B[1]));
-    else 
+    else
         diff=0;
-    
+
     if(diff >= 8)
         type= get_bits1(&s->gb);
     else
         type= 2;
-    
+
     if(type == 0){
         *px= A[0];
         *py= A[1];
@@ -548,12 +548,12 @@ static inline int wmv2_decode_inter_block(Wmv2Context *w, DCTELEM *block, int n,
     static const int sub_cbp_table[3]= {2,3,1};
     int sub_cbp;
 
-    if(!cbp){ 
+    if(!cbp){
         s->block_last_index[n] = -1;
 
         return 0;
     }
-    
+
     if(w->per_block_abt)
         w->abt_type= decode012(&s->gb);
 #if 0
@@ -574,7 +574,7 @@ static inline int wmv2_decode_inter_block(Wmv2Context *w, DCTELEM *block, int n,
             if (msmpeg4_decode_block(s, block, n, 1, scantable) < 0)
                 return -1;
         }
-        
+
         if(sub_cbp&2){
             if (msmpeg4_decode_block(s, w->abt_block2[n], n, 1, scantable) < 0)
                 return -1;
@@ -618,9 +618,9 @@ void ff_wmv2_add_mb(MpegEncContext *s, DCTELEM block1[6][64], uint8_t *dest_y, u
     wmv2_add_block(w, block1[1], dest_y + 8                , s->linesize, 1);
     wmv2_add_block(w, block1[2], dest_y +     8*s->linesize, s->linesize, 2);
     wmv2_add_block(w, block1[3], dest_y + 8 + 8*s->linesize, s->linesize, 3);
-    
+
     if(s->flags&CODEC_FLAG_GRAY) return;
-    
+
     wmv2_add_block(w, block1[4], dest_cb                   , s->uvlinesize, 4);
     wmv2_add_block(w, block1[5], dest_cr                   , s->uvlinesize, 5);
 }
@@ -634,12 +634,12 @@ void ff_mspel_motion(MpegEncContext *s,
     uint8_t *ptr;
     int dxy, offset, mx, my, src_x, src_y, v_edge_pos, linesize, uvlinesize;
     int emu=0;
-    
+
     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
     dxy = 2*dxy + w->hshift;
     src_x = s->mb_x * 16 + (motion_x >> 1);
     src_y = s->mb_y * 16 + (motion_y >> 1);
-                
+
     /* WARNING: do no forget half pels */
     v_edge_pos = s->v_edge_pos;
     src_x = clip(src_x, -16, s->width);
@@ -651,7 +651,7 @@ void ff_mspel_motion(MpegEncContext *s,
     if(s->flags&CODEC_FLAG_EMU_EDGE){
         if(src_x<1 || src_y<1 || src_x + 17  >= s->h_edge_pos
                               || src_y + h+1 >= v_edge_pos){
-            ff_emulated_edge_mc(s->edge_emu_buffer, ptr - 1 - s->linesize, s->linesize, 19, 19, 
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr - 1 - s->linesize, s->linesize, 19, 19,
                              src_x-1, src_y-1, s->h_edge_pos, s->v_edge_pos);
             ptr= s->edge_emu_buffer + 1 + s->linesize;
             emu=1;
@@ -680,7 +680,7 @@ void ff_mspel_motion(MpegEncContext *s,
         mx >>= 1;
         my >>= 1;
     }
-    
+
     src_x = s->mb_x * 8 + mx;
     src_y = s->mb_y * 8 + my;
     src_x = clip(src_x, -8, s->width >> 1);
@@ -692,7 +692,7 @@ void ff_mspel_motion(MpegEncContext *s,
     offset = (src_y * uvlinesize) + src_x;
     ptr = ref_picture[1] + offset;
     if(emu){
-        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, 
+        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9,
                          src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
         ptr= s->edge_emu_buffer;
     }
@@ -700,7 +700,7 @@ void ff_mspel_motion(MpegEncContext *s,
 
     ptr = ref_picture[2] + offset;
     if(emu){
-        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, 
+        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9,
                          src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
         ptr= s->edge_emu_buffer;
     }
@@ -715,7 +715,7 @@ static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
     uint8_t *coded_val;
 
     if(w->j_type) return 0;
-    
+
     if (s->pict_type == P_TYPE) {
         if(IS_SKIP(s->current_picture.mb_type[s->mb_y * s->mb_stride + s->mb_x])){
             /* skip mb */
@@ -734,8 +734,8 @@ static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
         code = get_vlc2(&s->gb, mb_non_intra_vlc[w->cbp_table_index].table, MB_NON_INTRA_VLC_BITS, 3);
         if (code < 0)
             return -1;
-	s->mb_intra = (~code & 0x40) >> 6;
-            
+        s->mb_intra = (~code & 0x40) >> 6;
+
         cbp = code & 0x3f;
     } else {
         s->mb_intra = 1;
@@ -761,7 +761,7 @@ static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
         int mx, my;
 //printf("P at %d %d\n", s->mb_x, s->mb_y);
         wmv2_pred_motion(w, &mx, &my);
-        
+
         if(cbp){
             s->dsp.clear_blocks(s->block[0]);
             if(s->per_mb_rl_table){
@@ -776,7 +776,7 @@ static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
             }else
                 w->per_block_abt=0;
         }
-        
+
         if (wmv2_decode_motion(w, &mx, &my) < 0)
             return -1;
 
@@ -787,11 +787,11 @@ static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
 
         for (i = 0; i < 6; i++) {
             if (wmv2_decode_inter_block(w, block[i], i, (cbp >> (5 - i)) & 1) < 0)
-	    {
-	        av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding inter block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
-	        return -1;
-	    }
-        }    
+            {
+                av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding inter block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
+                return -1;
+            }
+        }
     } else {
 //if(s->pict_type==P_TYPE)
 //   printf("%d%d ", s->inter_intra_pred, cbp);
@@ -805,15 +805,15 @@ static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
             s->rl_table_index = decode012(&s->gb);
             s->rl_chroma_table_index = s->rl_table_index;
         }
-    
+
         s->dsp.clear_blocks(s->block[0]);
         for (i = 0; i < 6; i++) {
             if (msmpeg4_decode_block(s, block[i], i, (cbp >> (5 - i)) & 1, NULL) < 0)
-	    {
-	        av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding intra block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
-	        return -1;
-	    }
-        }    
+            {
+                av_log(s->avctx, AV_LOG_ERROR, "\nerror while decoding intra block: %d x %d (%d)\n", s->mb_x, s->mb_y, i);
+                return -1;
+            }
+        }
     }
 
     return 0;
@@ -821,12 +821,12 @@ static int wmv2_decode_mb(MpegEncContext *s, DCTELEM block[6][64])
 
 static int wmv2_decode_init(AVCodecContext *avctx){
     Wmv2Context * const w= avctx->priv_data;
-    
+
     if(ff_h263_decode_init(avctx) < 0)
         return -1;
-    
+
     wmv2_common_init(w);
-    
+
     return 0;
 }
 
diff --git a/src/libffmpeg/libavcodec/wnv1.c b/src/libffmpeg/libavcodec/wnv1.c
index 292c7e042..335a04f35 100644
--- a/src/libffmpeg/libavcodec/wnv1.c
+++ b/src/libffmpeg/libavcodec/wnv1.c
@@ -14,15 +14,15 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
- 
+
 /**
  * @file wnv1.c
  * Winnov WNV1 codec.
  */
- 
+
 #include "avcodec.h"
 #include "common.h"
 #include "bitstream.h"
@@ -38,7 +38,7 @@ typedef struct WNV1Context{
 
 static uint16_t code_tab[16][2]={
 {0x1FD,9}, {0xFD,8}, {0x7D,7}, {0x3D,6}, {0x1D,5}, {0x0D,4}, {0x005,3},
-{0x000,1}, 
+{0x000,1},
 {0x004,3}, {0x0C,4}, {0x1C,5}, {0x3C,6}, {0x7C,7}, {0xFC,8}, {0x1FC,9}, {0xFF,8}
 };
 
@@ -56,7 +56,7 @@ static inline int wnv1_get_code(WNV1Context *w, int base_value)
         return base_value + ((v - 7)<<w->shift);
 }
 
-static int decode_frame(AVCodecContext *avctx, 
+static int decode_frame(AVCodecContext *avctx,
                         void *data, int *data_size,
                         uint8_t *buf, int buf_size)
 {
@@ -93,7 +93,7 @@ static int decode_frame(AVCodecContext *avctx,
             l->shift = 1;
         }
     }
-    
+
     Y = p->data[0];
     U = p->data[1];
     V = p->data[2];
@@ -109,10 +109,10 @@ static int decode_frame(AVCodecContext *avctx,
         V += p->linesize[2];
     }
 
-    
+
     *data_size = sizeof(AVFrame);
     *(AVFrame*)data = l->pic;
-    
+
     return buf_size;
 }
 
diff --git a/src/libffmpeg/libavcodec/ws-snd1.c b/src/libffmpeg/libavcodec/ws-snd1.c
index 5ac4c61bd..aa85b4526 100644
--- a/src/libffmpeg/libavcodec/ws-snd1.c
+++ b/src/libffmpeg/libavcodec/ws-snd1.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  */
 #include "avcodec.h"
 
@@ -40,7 +40,7 @@ static const char ws_adpcm_4bit[] = {
 static int ws_snd_decode_init(AVCodecContext * avctx)
 {
 //    WSSNDContext *c = avctx->priv_data;
-    
+
     return 0;
 }
 
@@ -49,12 +49,12 @@ static int ws_snd_decode_frame(AVCodecContext *avctx,
                 uint8_t *buf, int buf_size)
 {
 //    WSSNDContext *c = avctx->priv_data;
-    
+
     int in_size, out_size;
     int sample = 0;
     int i;
     short *samples = data;
-    
+
     if (!buf_size)
         return 0;
 
@@ -62,13 +62,13 @@ static int ws_snd_decode_frame(AVCodecContext *avctx,
     *data_size = out_size * 2;
     in_size = LE_16(&buf[2]);
     buf += 4;
-    
+
     if (in_size == out_size) {
         for (i = 0; i < out_size; i++)
             *samples++ = (*buf++ - 0x80) << 8;
         return buf_size;
     }
-    
+
     while (out_size > 0) {
         int code;
         uint8_t count;
@@ -129,7 +129,7 @@ static int ws_snd_decode_frame(AVCodecContext *avctx,
             }
         }
     }
-    
+
     return buf_size;
 }
 
diff --git a/src/libffmpeg/libavcodec/xan.c b/src/libffmpeg/libavcodec/xan.c
index 96cc19329..7ccc65c00 100644
--- a/src/libffmpeg/libavcodec/xan.c
+++ b/src/libffmpeg/libavcodec/xan.c
@@ -14,7 +14,7 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
 
@@ -61,7 +61,7 @@ static int xan_decode_init(AVCodecContext *avctx)
     s->avctx = avctx;
     s->frame_size = 0;
 
-    if ((avctx->codec->id == CODEC_ID_XAN_WC3) && 
+    if ((avctx->codec->id == CODEC_ID_XAN_WC3) &&
         (s->avctx->palctrl == NULL)) {
         av_log(avctx, AV_LOG_ERROR, " WC3 Xan video: palette expected.\n");
         return -1;
@@ -72,7 +72,7 @@ static int xan_decode_init(AVCodecContext *avctx)
 
     if(avcodec_check_dimensions(avctx, avctx->width, avctx->height))
         return -1;
-    
+
     s->buffer1_size = avctx->width * avctx->height;
     s->buffer1 = av_malloc(s->buffer1_size);
     s->buffer2_size = avctx->width * avctx->height;
@@ -83,7 +83,7 @@ static int xan_decode_init(AVCodecContext *avctx)
     return 0;
 }
 
-/* This function is used in lieu of memcpy(). This decoder can not use 
+/* This function is used in lieu of memcpy(). This decoder can not use
  * memcpy because the memory locations often overlap and
  * memcpy doesn't like that; it's not uncommon, for example, for
  * dest = src+1, to turn byte A into  pattern AAAAAAAA.
@@ -96,7 +96,7 @@ static inline void bytecopy(unsigned char *dest, unsigned char *src, int count)
         dest[i] = src[i];
 }
 
-static int xan_huffman_decode(unsigned char *dest, unsigned char *src, 
+static int xan_huffman_decode(unsigned char *dest, unsigned char *src,
     int dest_len)
 {
     unsigned char byte = *src++;
@@ -206,7 +206,7 @@ static void xan_unpack(unsigned char *dest, unsigned char *src, int dest_len)
     bytecopy(dest, src, size);  dest += size;  src += size;
 }
 
-static void inline xan_wc3_output_pixel_run(XanContext *s, 
+static void inline xan_wc3_output_pixel_run(XanContext *s,
     unsigned char *pixel_buffer, int x, int y, int pixel_count)
 {
     int stride;
@@ -235,7 +235,7 @@ static void inline xan_wc3_output_pixel_run(XanContext *s,
     }
 }
 
-static void inline xan_wc3_copy_pixel_run(XanContext *s, 
+static void inline xan_wc3_copy_pixel_run(XanContext *s,
     int x, int y, int pixel_count, int motion_x, int motion_y)
 {
     int stride;
@@ -255,7 +255,7 @@ static void inline xan_wc3_copy_pixel_run(XanContext *s,
     prevframe_x = x + motion_x;
     while((pixel_count--) && (curframe_index < s->frame_size)) {
 
-        palette_plane[curframe_index++] = 
+        palette_plane[curframe_index++] =
             prev_palette_plane[prevframe_index++];
 
         curframe_x++;
@@ -302,7 +302,7 @@ static void xan_wc3_decode_frame(XanContext *s) {
     xan_huffman_decode(opcode_buffer, huffman_segment, opcode_buffer_size);
 
     if (imagedata_segment[0] == 2)
-        xan_unpack(imagedata_buffer, &imagedata_segment[1], 
+        xan_unpack(imagedata_buffer, &imagedata_segment[1],
             imagedata_buffer_size);
     else
         imagedata_buffer = &imagedata_segment[1];
@@ -423,7 +423,7 @@ static int xan_decode_frame(AVCodecContext *avctx,
         s->frame_size = s->current_frame.linesize[0] * s->avctx->height;
 
     palette_control->palette_changed = 0;
-    memcpy(s->current_frame.data[1], palette_control->palette, 
+    memcpy(s->current_frame.data[1], palette_control->palette,
         AVPALETTE_SIZE);
     s->current_frame.palette_has_changed = 1;
 
diff --git a/src/libffmpeg/libavcodec/xl.c b/src/libffmpeg/libavcodec/xl.c
index 2ba48eb27..d626ff12a 100644
--- a/src/libffmpeg/libavcodec/xl.c
+++ b/src/libffmpeg/libavcodec/xl.c
@@ -14,15 +14,15 @@
  *
  * You should have received a copy of the GNU Lesser General Public
  * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  *
  */
- 
+
 /**
  * @file xl.c
  * Miro VideoXL codec.
  */
- 
+
 #include "avcodec.h"
 #include "mpegvideo.h"
 
@@ -37,7 +37,7 @@ const int xl_table[32] = {
   64,  82,  94, 103, 108, 113, 116, 119,
  120, 121, 122, 123, 124, 125, 126, 127};
 
-static int decode_frame(AVCodecContext *avctx, 
+static int decode_frame(AVCodecContext *avctx,
                         void *data, int *data_size,
                         uint8_t *buf, int buf_size)
 {
@@ -63,18 +63,18 @@ static int decode_frame(AVCodecContext *avctx,
     Y = a->pic.data[0];
     U = a->pic.data[1];
     V = a->pic.data[2];
-    
+
     stride = avctx->width - 4;
     for (i = 0; i < avctx->height; i++) {
         /* lines are stored in reversed order */
         buf += stride;
-        
+
         for (j = 0; j < avctx->width; j += 4) {
             /* value is stored in LE dword with word swapped */
             val = LE_32(buf);
             buf -= 4;
             val = ((val >> 16) & 0xFFFF) | ((val & 0xFFFF) << 16);
-    
+
             if(!j)
                 y0 = (val & 0x1F) << 2;
             else
@@ -95,16 +95,16 @@ static int decode_frame(AVCodecContext *avctx,
                 c1 = (val & 0x1F) << 2;
             else
                 c1 += xl_table[val & 0x1F];
-            
+
             Y[j + 0] = y0 << 1;
             Y[j + 1] = y1 << 1;
             Y[j + 2] = y2 << 1;
             Y[j + 3] = y3 << 1;
-            
+
             U[j >> 2] = c0 << 1;
             V[j >> 2] = c1 << 1;
         }
-        
+
         buf += avctx->width + 4;
         Y += a->pic.linesize[0];
         U += a->pic.linesize[1];
@@ -113,7 +113,7 @@ static int decode_frame(AVCodecContext *avctx,
 
     *data_size = sizeof(AVFrame);
     *(AVFrame*)data = a->pic;
-    
+
     return buf_size;
 }