31 files changed, 9591 insertions, 122 deletions
diff --git a/README-VDPAU b/README-VDPAU
new file mode 100644
index 000000000..51e087f09
--- /dev/null
+++ b/README-VDPAU
@@ -0,0 +1,70 @@
+xine-vdpau README:
+------------------------------------------------------------------------------
+
+So, you want to give it a try, but wonder which steps are required.
+Ok, so here it is:
+
+0) you need nvidia's driver 180.22 or later.
+
+1) get the sources:
+svn co svn://jusst.de/xine-vdpau
+
+2) compile the sources:
+cd xine-vdpau
+./autogen.sh
+./configure
+make
+make install (as root)
+    **(make sure that no other xine-lib installation will conflict with this one)
+
+3) edit your xine configuration
+nano $HOME/.xine/config (if it does not exist, first run "xine --no-logo" then quit.
+search for "engine.buffers.video_num_frames" and set it to 22
+
+4) running the beast:
+xine -verbose /path/to/a/working/sample
+    ** --verbose will print some usefull things in your console (in case of problems,
+        the developers will ask you to give this output, at least)
+
+5) update your svn copy quite often
+
+6) don't blame us if it crashes, burn you gpu (unlikely:) or anything else.
+
+
+------------------------------------------------------------------------------
+
+FAQ:
+
+Q:
+  Why my file plays fine with mplayer-vdpau and not with xine-vdpau?
+A:
+  We are not using the nvidia's libavcodec patch.
+  We are writing decoders from scratch.
+  So don't expect them to be as mature as ffmpeg ones. Not yet.
+
+Q:
+  Why mpeg2 doesn't use less cpu than software decoder?
+A:
+  Because at that moment it does a lot of memcpy. This will be fixed soon, but that's not
+  a priority. Stability is our focus.
+
+Q:
+  Is deinterlacing working?
+A:
+  Yes. It's already quite good (doing 50i->50p), but could even be better in the future.
+
+Q:
+  How do i get it working with VDR, Kaffeine, whatever.
+A:
+  Ask VDR, Kaffeine, whatever developers.
+    (Note: for kaffeine you are lucky, i'm going to tell you the tip.
+     Build kaffeine like that: ./configure --without-xcb && make && make install)
+
+Q:
+  How can i contact you?
+A:
+  IRC: #xine-vdpau on freenode
+  MAIL: http://lists.kafic.ba/mailman/listinfo/xine-vdpau
+  Eventually, nvnews.
+
+----------------------------------------------------------------------------
diff --git a/configure.ac b/configure.ac
index c9a8557df..88a019f44 100644
--- a/configure.ac
+++ b/configure.ac
@@ -890,6 +890,20 @@ AC_SUBST(XVMC_LIBS)
 
 
 dnl ---------------------------------------------
+dnl Check for VDPAU
+dnl ---------------------------------------------
+AC_ARG_WITH([vdpau], AS_HELP_STRING([--without-vdpau], [Doesn't build VDPAU plugins]))
+if test "x$with_vdpau" != "xno"; then
+  AC_CHECK_HEADERS([vdpau/vdpau_x11.h], [have_vdpau=yes], [have_vdpau=no])
+  if test "x$have_vdpau" = "xyes"; then
+    AC_CHECK_LIB(vdpau, vdp_device_create_x11, have_vdpau="yes", [have_vdpau="no"], [$X_LIBS $X_PRE_LIBS -lXext $X_EXTRA_LIBS])
+  fi
+fi
+
+AM_CONDITIONAL(HAVE_VDPAU, test "x$have_vdpau" = "xyes" )
+
+
+dnl ---------------------------------------------
 dnl Check for xcb
 dnl ---------------------------------------------
 AC_ARG_WITH([xcb], AS_HELP_STRING([--without-xcb], [Doesn't build XCB video out plugins]))
@@ -2779,6 +2793,7 @@ src/libmpeg2new/libmpeg2/Makefile
 src/libmpeg2new/include/Makefile
 src/libmusepack/Makefile
 src/libmusepack/musepack/Makefile
+src/libvdpau/Makefile
 src/libspudec/Makefile
 src/libspucc/Makefile
 src/libspucmml/Makefile
@@ -3118,6 +3133,9 @@ if test "x$have_xcb" = "xyes"; then
     echo "   - xcb-xv (XVideo using XCB)"
   fi
 fi
+if test "x$have_vdpau" = "xyes"; then
+  echo "   - vdpau (X11 VDPAU)"
+fi
 if test "x$no_aalib" != "xyes"; then
   echo "   - aa (Ascii ART)"
 fi
diff --git a/include/xine.h.in b/include/xine.h.in
index 1b4ddbd64..01fd7fbc5 100644
--- a/include/xine.h.in
+++ b/include/xine.h.in
@@ -375,12 +375,13 @@ int  xine_get_param (xine_stream_t *stream, int param) XINE_PROTECTED;
 #define XINE_PARAM_VO_TVMODE               0x0100000a /* ???                */
 #define XINE_PARAM_VO_WINDOW_WIDTH         0x0100000f /* readonly           */
 #define XINE_PARAM_VO_WINDOW_HEIGHT        0x01000010 /* readonly           */
+#define XINE_PARAM_VO_SHARPNESS            0x01000018 /* 0..65535           */
+#define XINE_PARAM_VO_NOISE_REDUCTION      0x01000019 /* 0..65535           */
 #define XINE_PARAM_VO_CROP_LEFT            0x01000020 /* crop frame pixels  */
 #define XINE_PARAM_VO_CROP_RIGHT           0x01000021 /* crop frame pixels  */
 #define XINE_PARAM_VO_CROP_TOP             0x01000022 /* crop frame pixels  */
 #define XINE_PARAM_VO_CROP_BOTTOM          0x01000023 /* crop frame pixels  */
 
-
 #define XINE_VO_ZOOM_STEP                  100
 #define XINE_VO_ZOOM_MAX                   400
 #define XINE_VO_ZOOM_MIN                   -85
@@ -482,6 +483,7 @@ int  xine_get_current_frame_data (xine_stream_t *stream,
 #define XINE_IMGFMT_YUY2 (('2'<<24)|('Y'<<16)|('U'<<8)|'Y')
 #define XINE_IMGFMT_XVMC (('C'<<24)|('M'<<16)|('v'<<8)|'X')
 #define XINE_IMGFMT_XXMC (('C'<<24)|('M'<<16)|('x'<<8)|'X')
+#define XINE_IMGFMT_VDPAU (('A'<<24)|('P'<<16)|('D'<<8)|'V')
 
 /* get current xine's virtual presentation timestamp (1/90000 sec)
  * note: this is mostly internal data.
@@ -2134,8 +2136,11 @@ void xine_event_send (xine_stream_t *stream, const xine_event_t *event) XINE_PRO
 /* yellow text, black border, transparent background */
 #define XINE_TEXTPALETTE_YELLOW_BLACK_TRANSPARENT   3
 
-#define XINE_OSD_CAP_FREETYPE2 0x0001 /* freetype2 support compiled in     */
-#define XINE_OSD_CAP_UNSCALED  0x0002 /* unscaled overlays supp. by vo drv */
+#define XINE_OSD_CAP_FREETYPE2     0x0001 /* freetype2 support compiled in     */
+#define XINE_OSD_CAP_UNSCALED      0x0002 /* unscaled overlays supp. by vo drv */
+#define XINE_OSD_CAP_CUSTOM_EXTENT 0x0004 /* hardware scaled to match video output window */
+#define XINE_OSD_CAP_ARGB_LAYER    0x0008 /* supports separate true color layer */
+#define XINE_OSD_CAP_VIDEO_WINDOW  0x0010 /* can scale video to an area within osd extent */
 
 typedef struct xine_osd_s xine_osd_t;
 
@@ -2200,6 +2205,35 @@ void        xine_osd_get_palette   (xine_osd_t *self, uint32_t *color,
 void        xine_osd_set_palette   (xine_osd_t *self,
 				    const uint32_t *const color,
 				    const uint8_t *const trans ) XINE_PROTECTED;
+
+/*
+ * set an argb buffer to be blended into video
+ * the buffer must exactly match the osd dimensions
+ * and stay valid while the osd is on screen. pass
+ * a NULL pointer to safely remove the buffer from
+ * the osd layer. only the dirty area  will be
+ * updated on screen. for convinience the whole
+ * osd object will be considered dirty when setting
+ * a different buffer pointer.
+ * see also XINE_OSD_CAP_ARGB_LAYER
+ */
+void xine_osd_set_argb_buffer(xine_osd_t *self, uint32_t *argb_buffer,
+                              int dirty_x, int dirty_y, int dirty_width, int dirty_height) XINE_PROTECTED;
+
+/*
+ * define extent of reference coordinate system
+ * for video resolution independent osds.
+ * see also XINE_OSD_CAP_CUSTOM_EXTENT
+ */
+void xine_osd_set_extent(xine_osd_t *self, int extent_width, int extent_height) XINE_PROTECTED;
+
+/*
+ * define area within osd extent to output
+ * video to while osd is on screen
+ * see also XINE_OSD_CAP_VIDEO_WINDOW
+ */
+void xine_osd_set_video_window(xine_osd_t *self, int window_x, int window_y, int window_width, int window_height) XINE_PROTECTED;
+
 /*
  * close osd rendering engine
  * loaded fonts are unloaded
diff --git a/src/Makefile.am b/src/Makefile.am
index 5d21a97eb..ec4925407 100644
--- a/src/Makefile.am
+++ b/src/Makefile.am
@@ -27,5 +27,6 @@ SUBDIRS = \
 	libreal \
 	libfaad \
         libmusepack \
+	libvdpau \
 	post \
 	combined
diff --git a/src/demuxers/Makefile.am b/src/demuxers/Makefile.am
index 4c2aac019..fe10947c5 100644
--- a/src/demuxers/Makefile.am
+++ b/src/demuxers/Makefile.am
@@ -49,7 +49,8 @@ xineplug_LTLIBRARIES = $(ogg_module) $(asf_module) $(mng_module) $(image_module)
 	xineplug_dmx_nsv.la \
 	xineplug_dmx_matroska.la \
 	xineplug_dmx_iff.la \
-	xineplug_dmx_flv.la 
+	xineplug_dmx_flv.la \
+	xineplug_dmx_vc1_es.la
 
 xineplug_dmx_ogg_la_SOURCES = demux_ogg.c
 xineplug_dmx_ogg_la_LIBADD = $(XINE_LIB) $(VORBIS_LIBS) $(SPEEX_LIBS) $(THEORA_LIBS) $(OGG_LIBS) $(LTLIBINTL)
@@ -67,6 +68,9 @@ xineplug_dmx_mpeg_la_LIBADD = $(XINE_LIB)
 xineplug_dmx_mpeg_elem_la_SOURCES = demux_elem.c
 xineplug_dmx_mpeg_elem_la_LIBADD = $(XINE_LIB)
 
+xineplug_dmx_vc1_es_la_SOURCES = demux_vc1es.c
+xineplug_dmx_vc1_es_la_LIBADD = $(XINE_LIB)
+
 xineplug_dmx_mpeg_pes_la_SOURCES = demux_mpeg_pes.c
 xineplug_dmx_mpeg_pes_la_LIBADD = $(XINE_LIB) $(LTLIBINTL)
 
diff --git a/src/demuxers/demux_mpeg.c b/src/demuxers/demux_mpeg.c
index 85b62f48e..ae0a50ae6 100644
--- a/src/demuxers/demux_mpeg.c
+++ b/src/demuxers/demux_mpeg.c
@@ -246,6 +246,8 @@ static void parse_mpeg2_packet (demux_mpeg_t *this, int stream_id, int64_t scr)
 
   len = read_bytes(this, 2);
 
+  //printf( "parse_mpeg2_packet: stream_id=%X\n", stream_id);
+
   if (stream_id==0xbd) {
 
     int track;
@@ -483,7 +485,7 @@ static void parse_mpeg2_packet (demux_mpeg_t *this, int stream_id, int64_t scr)
 
     }
 
-  } else if ((stream_id >= 0xbc) && ((stream_id & 0xf0) == 0xe0)) {
+  } else if ( ((stream_id >= 0xbc) && ((stream_id & 0xf0) == 0xe0)) || stream_id==0xfd ) {
 
     w = read_bytes(this, 1);
     flags = read_bytes(this, 1);
@@ -532,7 +534,7 @@ static void parse_mpeg2_packet (demux_mpeg_t *this, int stream_id, int64_t scr)
 	return;
       }
 
-      buf->type = BUF_VIDEO_MPEG;
+      buf->type = (stream_id==0xfd) ? BUF_VIDEO_VC1 : BUF_VIDEO_MPEG;
       buf->pts  = pts;
       buf->decoder_info[0] = pts - dts;
       check_newpts( this, pts, PTS_VIDEO );
diff --git a/src/demuxers/demux_vc1es.c b/src/demuxers/demux_vc1es.c
new file mode 100644
index 000000000..820993609
--- /dev/null
+++ b/src/demuxers/demux_vc1es.c
@@ -0,0 +1,453 @@
+/*
+ * Copyright (C) 2008 the xine project
+ * Copyright (C) 2008 Christophe Thommeret <hftom@free.fr>
+ *
+ * This file is part of xine, a free video player.
+ *
+ * xine is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * xine is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
+ */
+
+/*
+ * demultiplexer for wmv9/vc1 elementary streams
+ *
+ *
+ *    SMP (.rcv) format:
+ *
+ *    ** header ***
+ *    le24 number of frames
+ *    C5 04 00 00 00
+ *    4 bytes sequence header
+ *    le32 height
+ *    le32 width
+ *    0C 00 00 00
+ *    8 bytes unknown
+ *    le32 fps
+ *    ************
+ *    le24 frame_size
+ *    80
+ *    le32 pts (ms)
+ *    frame_size bytes of picture data
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+
+/* #define LOG */
+#define LOG_MODULE "demux_vc1es"
+#define LOG_VERBOSE
+
+#include "xine_internal.h"
+#include "xineutils.h"
+#include "compat.h"
+#include "bswap.h"
+#include "demux.h"
+
+#define SCRATCH_SIZE 36
+#define PRIVATE_SIZE 44
+
+#define MODE_SMP 1
+#define MODE_AP  2
+
+
+
+typedef struct {
+  demux_plugin_t      demux_plugin;
+
+  xine_stream_t       *stream;
+  fifo_buffer_t       *video_fifo;
+  fifo_buffer_t       *audio_fifo;
+  input_plugin_t      *input;
+  int                  status;
+  int                  mode;
+  int                  first_chunk;
+  uint8_t              private[PRIVATE_SIZE];
+  uint32_t             video_step;
+
+  uint32_t             blocksize;
+} demux_vc1_es_t ;
+
+
+
+typedef struct {
+  demux_class_t     demux_class;
+} demux_vc1_es_class_t;
+
+
+
+static int demux_vc1_es_next_smp( demux_vc1_es_t *this )
+{
+  buf_element_t *buf;
+  uint32_t pts=0, frame_size=0;
+  off_t done;
+  uint8_t head[SCRATCH_SIZE];
+  int start_flag = 1;
+
+  if ( this->first_chunk ) {
+    this->input->read( this->input, head, SCRATCH_SIZE );
+    this->first_chunk = 0;
+  }
+
+  done = this->input->read( this->input, head, 8 );
+  frame_size = _X_LE_24( head );
+  pts = _X_LE_32( head+4 );
+
+  done = 0;
+  while ( frame_size>0 ) {
+    buf = this->video_fifo->buffer_pool_alloc(this->video_fifo);
+    off_t read = (frame_size>buf->max_size) ? buf->max_size : frame_size;
+    done = this->input->read( this->input, buf->mem, read );
+    if ( done<=0 ) {
+      buf->free_buffer( buf );
+      this->status = DEMUX_FINISHED;
+      return 0;
+    }
+    buf->size = done;
+    buf->content = buf->mem;
+    buf->type = BUF_VIDEO_WMV9;
+    buf->pts = pts*90;
+    frame_size -= done;
+    if ( start_flag ) {
+      buf->decoder_flags = BUF_FLAG_FRAME_START;
+      start_flag = 0;
+    }
+    if ( !(frame_size>0) )
+      buf->decoder_flags = BUF_FLAG_FRAME_END;
+    this->video_fifo->put(this->video_fifo, buf);
+  }
+
+  return 1;
+}
+
+
+
+static int demux_vc1_es_next_ap( demux_vc1_es_t *this )
+{
+  buf_element_t *buf;
+  uint32_t blocksize;
+  off_t done;
+
+  buf = this->video_fifo->buffer_pool_alloc(this->video_fifo);
+  blocksize = (this->blocksize ? this->blocksize : buf->max_size);
+  done = this->input->read(this->input, buf->mem, blocksize);
+
+  if (done <= 0) {
+    buf->free_buffer (buf);
+    this->status = DEMUX_FINISHED;
+    return 0;
+  }
+
+  buf->size = done;
+  buf->content = buf->mem;
+  buf->pts = 0;
+  buf->type = BUF_VIDEO_VC1;
+
+  if( this->input->get_length (this->input) )
+    buf->extra_info->input_normpos = (int)( (double)this->input->get_current_pos( this->input )*65535/this->input->get_length( this->input ) );
+
+  this->video_fifo->put(this->video_fifo, buf);
+
+  return 1;
+}
+
+
+
+static int demux_vc1_es_send_chunk( demux_plugin_t *this_gen )
+{
+  demux_vc1_es_t *this = (demux_vc1_es_t *) this_gen;
+
+  if ( this->mode==MODE_SMP ) {
+    if (!demux_vc1_es_next_smp(this))
+      this->status = DEMUX_FINISHED;
+    return this->status;
+  }
+
+  if (!demux_vc1_es_next_ap(this))
+    this->status = DEMUX_FINISHED;
+  return this->status;
+}
+
+
+
+static int demux_vc1_es_get_status( demux_plugin_t *this_gen )
+{
+  demux_vc1_es_t *this = (demux_vc1_es_t *) this_gen;
+
+  return this->status;
+}
+
+
+
+static void demux_vc1_es_send_headers( demux_plugin_t *this_gen )
+{
+  demux_vc1_es_t *this = (demux_vc1_es_t *) this_gen;
+
+  this->video_fifo  = this->stream->video_fifo;
+  this->audio_fifo  = this->stream->audio_fifo;
+  _x_stream_info_set(this->stream, XINE_STREAM_INFO_HAS_VIDEO, 1);
+  _x_stream_info_set(this->stream, XINE_STREAM_INFO_HAS_AUDIO, 0);
+  _x_demux_control_start(this->stream);
+  this->blocksize = this->input->get_blocksize(this->input);
+  this->status = DEMUX_OK;
+
+  if ( this->mode==MODE_SMP ) {
+    buf_element_t *buf;
+    buf = this->video_fifo->buffer_pool_alloc(this->video_fifo);
+    xine_fast_memcpy( buf->mem, this->private, PRIVATE_SIZE );
+    buf->size = PRIVATE_SIZE;
+    buf->content = buf->mem;
+    buf->decoder_flags = BUF_FLAG_HEADER|BUF_FLAG_STDHEADER|BUF_FLAG_FRAME_END;
+    if ( this->video_step ) {
+      buf->decoder_flags |= BUF_FLAG_FRAMERATE;
+      buf->decoder_info[0] = 90000/this->video_step;
+    }
+    buf->type = BUF_VIDEO_WMV9;
+    this->video_fifo->put(this->video_fifo, buf);
+  }
+}
+
+
+
+static int demux_vc1_es_seek( demux_plugin_t *this_gen, off_t start_pos, int start_time, int playing )
+{
+  demux_vc1_es_t *this = (demux_vc1_es_t *) this_gen;
+
+  if ( this->mode==MODE_SMP ) {
+    this->status = DEMUX_OK;
+    return this->status;
+  }
+
+  start_pos = (off_t) ( (double) start_pos / 65535 *
+              this->input->get_length (this->input) );
+
+  this->status = DEMUX_OK;
+
+  if (playing)
+    _x_demux_flush_engine(this->stream);
+
+  if (INPUT_IS_SEEKABLE(this->input)) {
+
+    /* FIXME: implement time seek */
+
+    if (start_pos != this->input->seek (this->input, start_pos, SEEK_SET)) {
+      this->status = DEMUX_FINISHED;
+      return this->status;
+    }
+    lprintf ("seeking to %"PRId64"\n", start_pos);
+  }
+
+  /*
+   * now start demuxing
+   */
+  this->status = DEMUX_OK;
+
+  return this->status;
+}
+
+
+
+static void demux_vc1_es_dispose( demux_plugin_t *this )
+{
+  free (this);
+}
+
+
+
+static int demux_vc1_es_get_stream_length( demux_plugin_t *this_gen )
+{
+  return 0 ; /*FIXME: implement */
+}
+
+
+
+static uint32_t demux_vc1_es_get_capabilities( demux_plugin_t *this_gen )
+{
+  return DEMUX_CAP_NOCAP;
+}
+
+
+
+static int demux_vc1_es_get_optional_data( demux_plugin_t *this_gen, void *data, int data_type )
+{
+  return DEMUX_OPTIONAL_UNSUPPORTED;
+}
+
+
+
+static demux_plugin_t *open_plugin( demux_class_t *class_gen, xine_stream_t *stream, input_plugin_t *input )
+{
+
+  demux_vc1_es_t *this;
+  uint8_t scratch[SCRATCH_SIZE];
+  int i, read, found=0;
+
+  switch (stream->content_detection_method) {
+
+  case METHOD_BY_CONTENT: {
+    read = _x_demux_read_header(input, scratch, SCRATCH_SIZE);
+    if (!read)
+      return NULL;
+    lprintf("read size =%d\n",read);
+
+    /* simple and main profiles */
+    if ( read>=SCRATCH_SIZE ) {
+      lprintf("searching for rcv format..\n");
+      if ( scratch[3]==0xc5 && scratch[4]==4 && scratch[5]==0 && scratch[6]==0 && scratch[7]==0 && scratch[20]==0x0c && scratch[21]==0 && scratch[22]==0 && scratch[23]==0 ) {
+        lprintf("rcv format found\n");
+        found = MODE_SMP;
+      }
+    }
+
+    if ( found==0 ) {
+      /* advanced profile */
+      for (i = 0; i < read-4; i++) {
+        lprintf ("%02x %02x %02x %02x\n", scratch[i], scratch[i+1], scratch[i+2], scratch[i+3]);
+        if ((scratch[i] == 0x00) && (scratch[i+1] == 0x00) && (scratch[i+2] == 0x01)) {
+          if (scratch[i+3] == 0x0f) {
+            found = MODE_AP;
+            lprintf ("found header at offset 0x%x\n", i);
+            break;
+          }
+        }
+      }
+    }
+
+    if (found == 0)
+      return NULL;
+    lprintf ("input accepted.\n");
+  }
+  break;
+
+  case METHOD_BY_EXTENSION: {
+    const char *extensions, *mrl;
+
+    mrl = input->get_mrl (input);
+    extensions = class_gen->get_extensions (class_gen);
+
+    if (!_x_demux_check_extension (mrl, extensions))
+      return NULL;
+  }
+  break;
+
+  case METHOD_EXPLICIT:
+  break;
+
+  default:
+    return NULL;
+  }
+
+  this = calloc(1, sizeof(demux_vc1_es_t));
+  this->mode = found;
+  this->first_chunk = 1;
+  if ( found==MODE_SMP ) {
+    xine_fast_memcpy( this->private+8, scratch+12, 4 ); /* height */
+    xine_fast_memcpy( this->private+4, scratch+16, 4 ); /* width */
+    xine_fast_memcpy( this->private+40, scratch+8, 4 ); /* sequence header */
+    this->video_step = _X_LE_32( scratch+32 );
+  }
+  this->stream = stream;
+  this->input  = input;
+
+  this->demux_plugin.send_headers      = demux_vc1_es_send_headers;
+  this->demux_plugin.send_chunk        = demux_vc1_es_send_chunk;
+  this->demux_plugin.seek              = demux_vc1_es_seek;
+  this->demux_plugin.dispose           = demux_vc1_es_dispose;
+  this->demux_plugin.get_status        = demux_vc1_es_get_status;
+  this->demux_plugin.get_stream_length = demux_vc1_es_get_stream_length;
+  this->demux_plugin.get_capabilities  = demux_vc1_es_get_capabilities;
+  this->demux_plugin.get_optional_data = demux_vc1_es_get_optional_data;
+  this->demux_plugin.demux_class       = class_gen;
+
+  this->status = DEMUX_FINISHED;
+
+  return &this->demux_plugin;
+}
+
+
+
+static const char *get_description( demux_class_t *this_gen )
+{
+  return "VC1 elementary stream demux plugin";
+}
+
+
+
+static const char *get_identifier( demux_class_t *this_gen )
+{
+  return "VC1_ES";
+}
+
+
+
+static const char *get_extensions( demux_class_t *this_gen )
+{
+  return "";
+}
+
+
+
+static const char *get_mimetypes( demux_class_t *this_gen )
+{
+  return NULL;
+}
+
+
+
+static void class_dispose( demux_class_t *this_gen )
+{
+  demux_vc1_es_class_t *this = (demux_vc1_es_class_t *) this_gen;
+
+  free (this);
+}
+
+
+
+static void *init_plugin( xine_t *xine, void *data )
+{
+  demux_vc1_es_class_t     *this;
+
+  this = calloc(1, sizeof(demux_vc1_es_class_t));
+
+  this->demux_class.open_plugin     = open_plugin;
+  this->demux_class.get_description = get_description;
+  this->demux_class.get_identifier  = get_identifier;
+  this->demux_class.get_mimetypes   = get_mimetypes;
+  this->demux_class.get_extensions  = get_extensions;
+  this->demux_class.dispose         = class_dispose;
+
+  return this;
+}
+
+
+/*
+ * exported plugin catalog entry
+ */
+static const demuxer_info_t demux_info_vc1es = {
+  0                       /* priority */
+};
+
+
+
+const plugin_info_t xine_plugin_info[] EXPORTED = {
+  /* type, API, "name", version, special_info, init_function */
+  { PLUGIN_DEMUX, 26, "vc1es", XINE_VERSION_CODE, &demux_info_vc1es, init_plugin },
+  { PLUGIN_NONE, 0, "", 0, NULL, NULL }
+};
diff --git a/src/libspudvb/xine_spudvb_decoder.c b/src/libspudvb/xine_spudvb_decoder.c
index bcb0cbf96..62dced2d5 100644
--- a/src/libspudvb/xine_spudvb_decoder.c
+++ b/src/libspudvb/xine_spudvb_decoder.c
@@ -32,7 +32,11 @@
 #include "osd.h"
 #define MAX_REGIONS 7
 
-/*#define LOG 1*/
+#define SPU_MAX_WIDTH 720
+#define SPU_MAX_HEIGHT 576
+
+/*#define LOG*/
+#define LOG_MODULE "spudvb"
 
 typedef struct {
   int 			x, y;
@@ -144,19 +148,15 @@ static void update_region (dvb_spu_decoder_t * this, int region_id, int region_w
   region_t *reg = &dvbsub->regions[region_id];
 
   /* reject invalid sizes and set some limits ! */
-  if ( region_width<=0 || region_height<=0 || region_width>720 || region_height>576 ) {
+  if ( region_width<=0 || region_height<=0 || region_width>SPU_MAX_WIDTH || region_height>SPU_MAX_HEIGHT ) {
     free( reg->img );
     reg->img = NULL;
-#ifdef LOG
-    printf("SPUDVB: rejected region %d = %dx%d\n", region_id, region_width, region_height );
-#endif
+    lprintf("rejected region %d = %dx%d\n", region_id, region_width, region_height );
     return;
   }
 
   if ( (reg->width*reg->height) < (region_width*region_height) ) {
-#ifdef LOG
-    printf("SPUDVB: update size of region %d = %dx%d\n", region_id, region_width, region_height);
-#endif
+    lprintf("update size of region %d = %dx%d\n", region_id, region_width, region_height);
     free( reg->img );
     reg->img = NULL;
   }
@@ -172,9 +172,7 @@ static void update_region (dvb_spu_decoder_t * this, int region_id, int region_w
   if ( fill ) {
     memset( reg->img, fill_color, region_width*region_height );
     reg->empty = 1;
-#ifdef LOG
-    printf("SPUDVB : FILL REGION %d\n", region_id);
-#endif
+    lprintf("FILL REGION %d\n", region_id);
   }
   reg->width = region_width;
   reg->height = region_height;
@@ -614,13 +612,11 @@ static void* dvbsub_timer_func(void *this_gen)
     if(this && this->stream && this->stream->osd_renderer) {
       int i;
       for ( i=0; i<MAX_REGIONS; i++ ) {
-	if ( !this->dvbsub->regions[i].osd )
-	  continue;
+        if ( !this->dvbsub->regions[i].osd )
+          continue;
 
-	this->stream->osd_renderer->hide( this->dvbsub->regions[i].osd, 0 );
-#ifdef LOG
-	printf("SPUDVB: thread hiding = %d\n",i);
-#endif
+        this->stream->osd_renderer->hide( this->dvbsub->regions[i].osd, 0 );
+        lprintf("thread hiding = %d\n",i);
       }
     }
     pthread_cond_wait(&this->dvbsub_restart_timeout, &this->dvbsub_osd_mutex);
@@ -647,7 +643,7 @@ static void draw_subtitles (dvb_spu_decoder_t * this)
   int dest_width=0, dest_height;
   this->stream->video_out->status(this->stream->video_out, NULL, &dest_width, &dest_height, &dum);
 
-  if ( !dest_width )
+  if ( !dest_width || !dest_height )
     return;
 
   /* render all regions onto the page */
@@ -657,8 +653,8 @@ static void draw_subtitles (dvb_spu_decoder_t * this)
     int display = 0;
     for ( r=0; r<MAX_REGIONS; r++ ) {
       if ( this->dvbsub->page.regions[r].is_visible ) {
-	display = 1;
-	break;
+        display = 1;
+        break;
       }
     }
     if ( !display )
@@ -678,7 +674,7 @@ static void draw_subtitles (dvb_spu_decoder_t * this)
 	uint8_t *reg;
 	int reg_width;
 	uint8_t tmp[dest_width*576];
-        if (this->dvbsub->regions[r].width>dest_width) {
+        if ( this->dvbsub->regions[r].width>dest_width && !(this->stream->video_driver->get_capabilities(this->stream->video_driver) & VO_CAP_CUSTOM_EXTENT_OVERLAY)) {
 	  downscale_region_image(&this->dvbsub->regions[r], tmp, dest_width);
 	  reg = tmp;
 	  reg_width = dest_width;
@@ -694,34 +690,25 @@ static void draw_subtitles (dvb_spu_decoder_t * this)
   }
 
   pthread_mutex_lock(&this->dvbsub_osd_mutex);
-#ifdef LOG
-  printf("SPUDVB: this->vpts=%llu\n",this->vpts);
-#endif
+  lprintf("this->vpts=%llu\n",this->vpts);
   for ( r=0; r<MAX_REGIONS; r++ ) {
-#ifdef LOG
-    printf("SPUDVB : region=%d, visible=%d, osd=%d, empty=%d\n", r, this->dvbsub->page.regions[r].is_visible, this->dvbsub->regions[r].osd?1:0, this->dvbsub->regions[r].empty );
-#endif
+    lprintf("region=%d, visible=%d, osd=%d, empty=%d\n", r, this->dvbsub->page.regions[r].is_visible, this->dvbsub->regions[r].osd?1:0, this->dvbsub->regions[r].empty );
     if ( this->dvbsub->page.regions[r].is_visible && this->dvbsub->regions[r].osd && !this->dvbsub->regions[r].empty ) {
+      this->stream->osd_renderer->set_extent(this->dvbsub->regions[r].osd, SPU_MAX_WIDTH, SPU_MAX_HEIGHT);
       this->stream->osd_renderer->set_position( this->dvbsub->regions[r].osd, this->dvbsub->page.regions[r].x, this->dvbsub->page.regions[r].y );
       this->stream->osd_renderer->show( this->dvbsub->regions[r].osd, this->vpts );
-#ifdef LOG
-      printf("SPUDVB: show region = %d\n",r);
-#endif
+      lprintf("show region = %d\n",r);
     }
     else {
       if ( this->dvbsub->regions[r].osd ) {
         this->stream->osd_renderer->hide( this->dvbsub->regions[r].osd, this->vpts );
-#ifdef LOG
-        printf("SPUDVB: hide region = %d\n",r);
-#endif
+        lprintf("hide region = %d\n",r);
       }
     }
   }
   this->dvbsub_hide_timeout.tv_nsec = 0;
   this->dvbsub_hide_timeout.tv_sec = time(NULL) + this->dvbsub->page.page_time_out;
-#ifdef LOG
-  printf("SPUDVB: page_time_out %d\n",this->dvbsub->page.page_time_out);
-#endif
+  lprintf("page_time_out %d\n",this->dvbsub->page.page_time_out);
   pthread_cond_signal(&this->dvbsub_restart_timeout);
   pthread_mutex_unlock(&this->dvbsub_osd_mutex);
 }
@@ -780,9 +767,7 @@ static void spudec_decode_data (spu_decoder_t * this_gen, buf_element_t * buf)
     metronom_clock_t *const clock = this->stream->xine->clock;
     const int64_t curvpts = clock->get_current_time( clock );
     /* if buf->pts is unreliable, show page asap (better than nothing) */
-#ifdef LOG
-    printf("SPUDVB: spu_vpts=%lld - current_vpts=%lld\n", vpts, curvpts);
-#endif
+    lprintf("spu_vpts=%lld - current_vpts=%lld\n", vpts, curvpts);
     if ( vpts<=curvpts || (vpts-curvpts)>(5*90000) )
       this->vpts = 0;
     else
diff --git a/src/libvdpau/Makefile.am b/src/libvdpau/Makefile.am
new file mode 100644
index 000000000..140168483
--- /dev/null
+++ b/src/libvdpau/Makefile.am
@@ -0,0 +1,27 @@
+include $(top_srcdir)/misc/Makefile.common
+
+AM_CFLAGS = $(VISIBILITY_FLAG)
+AM_LDFLAGS = $(xineplug_ldflags)
+
+if HAVE_VDPAU
+vdpau_h264_module = xineplug_decode_vdpau_h264.la
+VDPAU_CFLAGS = -D_ISOC99_SOURCE
+
+vdpau_mpeg12_module = xineplug_decode_vdpau_mpeg12.la
+
+vdpau_vc1_module = xineplug_decode_vdpau_vc1.la
+endif
+
+xineplug_LTLIBRARIES = $(vdpau_h264_module) $(vdpau_mpeg12_module) $(vdpau_vc1_module)
+
+xineplug_decode_vdpau_h264_la_SOURCES = nal.c dpb.c h264_parser.c vdpau_h264.c
+xineplug_decode_vdpau_h264_la_CFLAGS = $(AM_CFLAGS) $(VDPAU_CFLAGS)
+xineplug_decode_vdpau_h264_la_LIBADD = $(XINE_LIB) $(DYNAMIC_LD_LIBS) -lm
+
+xineplug_decode_vdpau_mpeg12_la_SOURCES = vdpau_mpeg12.c
+xineplug_decode_vdpau_mpeg12_la_CFLAGS = $(AM_CFLAGS)
+xineplug_decode_vdpau_mpeg12_la_LIBADD = $(XINE_LIB) $(DYNAMIC_LD_LIBS)
+
+xineplug_decode_vdpau_vc1_la_SOURCES = vdpau_vc1.c
+xineplug_decode_vdpau_vc1_la_CFLAGS = $(AM_CFLAGS)
+xineplug_decode_vdpau_vc1_la_LIBADD = $(XINE_LIB) $(DYNAMIC_LD_LIBS)
diff --git a/src/libvdpau/bits_reader.h b/src/libvdpau/bits_reader.h
new file mode 100644
index 000000000..9563c9d3b
--- /dev/null
+++ b/src/libvdpau/bits_reader.h
@@ -0,0 +1,36 @@
+#include <sys/types.h>
+
+
+
+typedef struct {
+  uint8_t *buffer;
+  int      offbits;
+} bits_reader_t;
+
+static void bits_reader_set( bits_reader_t *br, uint8_t *buf )
+{
+  br->buffer = buf;
+  br->offbits = 0;
+}
+
+static uint32_t read_bits( bits_reader_t *br, int nbits )
+{
+  int i, nbytes;
+  uint32_t ret = 0;
+  uint8_t *buf;
+
+  buf = br->buffer;
+  nbytes = (br->offbits + nbits)/8;
+  if ( ((br->offbits + nbits) %8 ) > 0 )
+    nbytes++;
+  for ( i=0; i<nbytes; i++ )
+    ret += buf[i]<<((nbytes-i-1)*8);
+  i = (4-nbytes)*8+br->offbits;
+  ret = ((ret<<i)>>i)>>((nbytes*8)-nbits-br->offbits);
+
+  br->offbits += nbits;
+  br->buffer += br->offbits / 8;
+  br->offbits %= 8;
+
+  return ret;
+}
diff --git a/src/libvdpau/dpb.c b/src/libvdpau/dpb.c
new file mode 100644
index 000000000..31677e51c
--- /dev/null
+++ b/src/libvdpau/dpb.c
@@ -0,0 +1,409 @@
+/*
+ * Copyright (C) 2008 Julian Scheel
+ *
+ * This file is part of xine, a free video player.
+ *
+ * xine is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * xine is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
+ *
+ * dpb.c: Implementing Decoded Picture Buffer
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "dpb.h"
+#include "nal.h"
+#include "video_out.h"
+
+struct decoded_picture* init_decoded_picture(struct nal_unit *src_nal,
+    VdpVideoSurface surface, vo_frame_t *img)
+{
+  struct decoded_picture *pic = calloc(1, sizeof(struct decoded_picture));
+  pic->nal = init_nal_unit();
+  copy_nal_unit(pic->nal, src_nal);
+  pic->top_is_reference = pic->nal->slc->field_pic_flag
+        ? (pic->nal->slc->bottom_field_flag ? 0 : 1) : 1;
+  pic->bottom_is_reference = pic->nal->slc->field_pic_flag
+        ? (pic->nal->slc->bottom_field_flag ? 1 : 0) : 1;
+  pic->surface = surface;
+  pic->img = img;
+
+  return pic;
+}
+
+void free_decoded_picture(struct decoded_picture *pic)
+{
+  pic->img->free(pic->img);
+  free_nal_unit(pic->nal);
+  free(pic);
+}
+
+struct decoded_picture* dpb_get_next_out_picture(struct dpb *dpb, int do_flush)
+{
+  struct decoded_picture *pic = dpb->pictures;
+  struct decoded_picture *outpic = NULL;
+
+  if(!do_flush && dpb->used < MAX_DPB_SIZE)
+    return NULL;
+
+  if (pic != NULL)
+    do {
+      if (pic->delayed_output &&
+          (outpic == NULL ||
+              (pic->nal->top_field_order_cnt <= outpic->nal->top_field_order_cnt &&
+                  pic->nal->bottom_field_order_cnt <= outpic->nal->bottom_field_order_cnt)||
+              (outpic->nal->top_field_order_cnt < 0 && pic->nal->top_field_order_cnt > 0 &&
+                  outpic->nal->bottom_field_order_cnt < 0 && pic->nal->bottom_field_order_cnt > 0)||
+              outpic->nal->nal_unit_type == NAL_SLICE_IDR))
+        outpic = pic;
+    } while ((pic = pic->next) != NULL);
+
+  return outpic;
+}
+
+struct decoded_picture* dpb_get_picture(struct dpb *dpb, uint32_t picnum)
+{
+  struct decoded_picture *pic = dpb->pictures;
+
+  if (pic != NULL)
+    do {
+      if (pic->nal->curr_pic_num == picnum)
+        return pic;
+    } while ((pic = pic->next) != NULL);
+
+  return NULL;
+}
+
+struct decoded_picture* dpb_get_picture_by_ltpn(struct dpb *dpb,
+    uint32_t longterm_picnum)
+{
+  struct decoded_picture *pic = dpb->pictures;
+
+  if (pic != NULL)
+    do {
+      if (pic->nal->long_term_pic_num == longterm_picnum)
+        return pic;
+    } while ((pic = pic->next) != NULL);
+
+  return NULL;
+}
+
+struct decoded_picture* dpb_get_picture_by_ltidx(struct dpb *dpb,
+    uint32_t longterm_idx)
+{
+  struct decoded_picture *pic = dpb->pictures;
+
+  if (pic != NULL)
+    do {
+      if (pic->nal->long_term_frame_idx == longterm_idx)
+        return pic;
+    } while ((pic = pic->next) != NULL);
+
+  return NULL;
+}
+
+int dpb_set_unused_ref_picture_a(struct dpb *dpb, struct decoded_picture *refpic)
+{
+  struct decoded_picture *pic = dpb->pictures;
+    if (pic != NULL)
+      do {
+        if (pic == refpic) {
+          pic->used_for_reference = 0;
+          if(!pic->delayed_output)
+            dpb_remove_picture(dpb, pic);
+          return 0;
+        }
+      } while ((pic = pic->next) != NULL);
+
+    return -1;
+}
+
+int dpb_set_unused_ref_picture(struct dpb *dpb, uint32_t picnum)
+{
+  struct decoded_picture *pic = dpb->pictures;
+  if (pic != NULL)
+    do {
+      if (pic->nal->curr_pic_num == picnum) {
+        pic->used_for_reference = 0;
+        if(!pic->delayed_output)
+          dpb_remove_picture(dpb, pic);
+        return 0;
+      }
+    } while ((pic = pic->next) != NULL);
+
+  return -1;
+}
+
+int dpb_set_unused_ref_picture_byltpn(struct dpb *dpb, uint32_t longterm_picnum)
+{
+  struct decoded_picture *pic = dpb->pictures;
+  if (pic != NULL)
+    do {
+      if (pic->nal->long_term_pic_num == longterm_picnum) {
+        pic->used_for_reference = 0;
+        if(!pic->delayed_output)
+          dpb_remove_picture(dpb, pic);
+        return 0;
+      }
+    } while ((pic = pic->next) != NULL);
+
+  return -1;
+}
+
+int dpb_set_unused_ref_picture_bylidx(struct dpb *dpb, uint32_t longterm_idx)
+{
+  struct decoded_picture *pic = dpb->pictures;
+  if (pic != NULL)
+    do {
+      if (pic->nal->long_term_frame_idx == longterm_idx) {
+        pic->nal->used_for_long_term_ref = 0;
+        pic->used_for_reference = 0;
+        if(!pic->delayed_output)
+          dpb_remove_picture(dpb, pic);
+        return 0;
+      }
+    } while ((pic = pic->next) != NULL);
+
+  return -1;
+}
+
+int dpb_set_unused_ref_picture_lidx_gt(struct dpb *dpb, uint32_t longterm_idx)
+{
+  struct decoded_picture *pic = dpb->pictures;
+  if (pic != NULL)
+    do {
+      if (pic->nal->long_term_frame_idx >= longterm_idx) {
+        pic->used_for_reference = 0;
+        if(!pic->delayed_output) {
+          struct decoded_picture *next_pic = pic->next;
+          dpb_remove_picture(dpb, pic);
+          pic = next_pic;
+          continue;
+        }
+      }
+    } while ((pic = pic->next) != NULL);
+
+  return -1;
+}
+
+
+int dpb_set_output_picture(struct dpb *dpb, struct decoded_picture *outpic)
+{
+  struct decoded_picture *pic = dpb->pictures;
+  if (pic != NULL)
+    do {
+      if (pic == outpic) {
+        pic->delayed_output = 0;
+        if(!pic->used_for_reference)
+          dpb_remove_picture(dpb, pic);
+        return 0;
+      }
+    } while ((pic = pic->next) != NULL);
+
+  return -1;
+}
+
+int dpb_remove_picture(struct dpb *dpb, struct decoded_picture *rempic)
+{
+  struct decoded_picture *pic = dpb->pictures;
+  struct decoded_picture *last_pic = NULL;
+
+  if (pic != NULL)
+    do {
+      if (pic == rempic) {
+        // FIXME: free the picture....
+
+        if (last_pic != NULL)
+          last_pic->next = pic->next;
+        else
+          dpb->pictures = pic->next;
+        free_decoded_picture(pic);
+        dpb->used--;
+        return 0;
+      }
+
+      last_pic = pic;
+    } while ((pic = pic->next) != NULL);
+
+  return -1;
+}
+
+static int dpb_remove_picture_by_img(struct dpb *dpb, vo_frame_t *remimg)
+{
+  struct decoded_picture *pic = dpb->pictures;
+  struct decoded_picture *last_pic = NULL;
+
+  if (pic != NULL)
+    do {
+      if (pic->img == remimg) {
+        // FIXME: free the picture....
+
+        if (last_pic != NULL)
+          last_pic->next = pic->next;
+        else
+          dpb->pictures = pic->next;
+        free_decoded_picture(pic);
+        dpb->used--;
+        return 0;
+      }
+
+      last_pic = pic;
+    } while ((pic = pic->next) != NULL);
+
+  return -1;
+}
+
+int dpb_remove_picture_by_picnum(struct dpb *dpb, uint32_t picnum)
+{
+  struct decoded_picture *pic = dpb->pictures;
+  struct decoded_picture *last_pic = NULL;
+
+  if (pic != NULL)
+    do {
+      if (pic->nal->curr_pic_num == picnum) {
+        dpb_remove_picture(dpb, pic);
+      }
+
+      last_pic = pic;
+    } while ((pic = pic->next) != NULL);
+
+  return -1;
+}
+
+int dpb_add_picture(struct dpb *dpb, struct decoded_picture *pic, uint32_t num_ref_frames)
+{
+  pic->img->lock(pic->img);
+  if (0 == dpb_remove_picture_by_img(dpb, pic->img))
+    fprintf(stderr, "broken stream: current img was already in dpb -- freed it\n");
+  else
+    pic->img->free(pic->img);
+
+  int i = 0;
+  struct decoded_picture *last_pic = dpb->pictures;
+
+  pic->next = dpb->pictures;
+  dpb->pictures = pic;
+  dpb->num_ref_frames = num_ref_frames;
+  dpb->used++;
+
+  if(pic != NULL && dpb->used > num_ref_frames) {
+    do {
+      if(pic->used_for_reference) {
+        i++;
+        if(i>num_ref_frames) {
+          pic->used_for_reference = 0;
+          if(pic == dpb->pictures)
+            last_pic = pic->next;
+
+          if(!pic->delayed_output) {
+            dpb_remove_picture(dpb, pic);
+          }
+          pic = last_pic;
+          if(pic == dpb->pictures)
+            continue;
+        }
+        last_pic = pic;
+      }
+    } while (pic != NULL && (pic = pic->next) != NULL);
+  }
+
+  return 0;
+}
+
+int dpb_flush(struct dpb *dpb)
+{
+  struct decoded_picture *pic = dpb->pictures;
+
+  if (pic != NULL)
+    do {
+      struct decoded_picture *next_pic = pic->next;
+      dpb_set_unused_ref_picture_a(dpb, pic);
+      pic = next_pic;
+    } while (pic != NULL);
+
+  //printf("Flushed, used: %d\n", dpb->used);
+
+  return 0;
+}
+
+void dpb_free_all( struct dpb *dpb )
+{
+  struct decoded_picture *pic = dpb->pictures;
+
+  if (pic != NULL)
+    do {
+      struct decoded_picture *next_pic = pic->next;
+      free_decoded_picture(pic);
+      --dpb->used;
+      pic = next_pic;
+    } while (pic != NULL);
+
+  printf("dpb_free_all, used: %d\n", dpb->used);
+  dpb->pictures = NULL;
+}
+
+void dpb_clear_all_pts( struct dpb *dpb )
+{
+  struct decoded_picture *pic = dpb->pictures;
+
+  while (pic != NULL) {
+    pic->img->pts = 0;
+    pic = pic->next;
+  }
+}
+
+int fill_vdpau_reference_list(struct dpb *dpb, VdpReferenceFrameH264 *reflist)
+{
+  struct decoded_picture *pic = dpb->pictures;
+  struct decoded_picture *last_pic = NULL;
+
+  int i = 0;
+  int used_refframes = 0;
+
+  if (pic != NULL)
+    do {
+      if (pic->used_for_reference) {
+        reflist[i].surface = pic->surface;
+        reflist[i].is_long_term = pic->nal->used_for_long_term_ref;
+        if(reflist[i].is_long_term)
+          reflist[i].frame_idx = pic->nal->slc->frame_num;
+        else
+          reflist[i].frame_idx = pic->nal->slc->frame_num;
+        reflist[i].top_is_reference = pic->top_is_reference;
+        reflist[i].bottom_is_reference = pic->bottom_is_reference;
+        reflist[i].field_order_cnt[0] = pic->nal->top_field_order_cnt;
+        reflist[i].field_order_cnt[1] = pic->nal->bottom_field_order_cnt;
+        i++;
+      }
+      last_pic = pic;
+    } while ((pic = pic->next) != NULL && i < 16);
+
+  used_refframes = i;
+
+  // fill all other frames with invalid handles
+  while(i < 16) {
+    reflist[i].bottom_is_reference = VDP_FALSE;
+    reflist[i].top_is_reference = VDP_FALSE;
+    reflist[i].frame_idx = 0;
+    reflist[i].is_long_term = VDP_FALSE;
+    reflist[i].surface = VDP_INVALID_HANDLE;
+    reflist[i].field_order_cnt[0] = 0;
+    reflist[i].field_order_cnt[1] = 0;
+    i++;
+  }
+
+  return used_refframes;
+}
diff --git a/src/libvdpau/dpb.h b/src/libvdpau/dpb.h
new file mode 100644
index 000000000..6be2acec8
--- /dev/null
+++ b/src/libvdpau/dpb.h
@@ -0,0 +1,80 @@
+/*
+ * Copyright (C) 2008 Julian Scheel
+ *
+ * This file is part of xine, a free video player.
+ *
+ * xine is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * xine is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
+ *
+ * dpb.h: Decoder Picture Buffer
+ */
+
+#ifndef DPB_H_
+#define DPB_H_
+
+#define MAX_DPB_SIZE 16
+
+#include "nal.h"
+#include "video_out.h"
+
+struct decoded_picture {
+  VdpVideoSurface surface;
+  vo_frame_t *img; /* this is the image we block, to make sure
+                    * the surface is not double-used */
+  struct nal_unit *nal;
+
+  uint8_t used_for_reference;
+  uint8_t top_is_reference;
+  uint8_t bottom_is_reference;
+
+  uint8_t delayed_output;
+
+  struct decoded_picture *next;
+};
+
+/* Decoded Picture Buffer */
+struct dpb {
+  struct decoded_picture *pictures;
+
+  uint32_t num_ref_frames;
+  uint32_t used;
+};
+
+struct decoded_picture* init_decoded_picture(struct nal_unit *src_nal,
+    VdpVideoSurface surface, vo_frame_t *img);
+void free_decoded_picture(struct decoded_picture *pic);
+
+struct decoded_picture* dpb_get_next_out_picture(struct dpb *dpb, int do_flush);
+
+struct decoded_picture* dpb_get_picture(struct dpb *dpb, uint32_t picnum);
+struct decoded_picture* dpb_get_picture_by_ltpn(struct dpb *dpb, uint32_t longterm_picnum);
+struct decoded_picture* dpb_get_picture_by_ltidx(struct dpb *dpb, uint32_t longterm_idx);
+
+int dpb_set_unused_ref_picture(struct dpb *dpb, uint32_t picnum);
+int dpb_set_unused_ref_picture_a(struct dpb *dpb, struct decoded_picture *refpic);
+int dpb_set_unused_ref_picture_byltpn(struct dpb *dpb, uint32_t longterm_picnum);
+int dpb_set_unused_ref_picture_bylidx(struct dpb *dpb, uint32_t longterm_idx);
+int dpb_set_unused_ref_picture_lidx_gt(struct dpb *dpb, uint32_t longterm_idx);
+
+int dpb_set_output_picture(struct dpb *dpb, struct decoded_picture *outpic);
+
+int dpb_remove_picture(struct dpb *dpb, struct decoded_picture *rempic);
+int dpb_add_picture(struct dpb *dpb, struct decoded_picture *pic, uint32_t num_ref_frames);
+int dpb_flush(struct dpb *dpb);
+void dpb_free_all( struct dpb *dpb );
+void dpb_clear_all_pts( struct dpb *dpb );
+
+int fill_vdpau_reference_list(struct dpb *dpb, VdpReferenceFrameH264 *reflist);
+
+#endif /* DPB_H_ */
diff --git a/src/libvdpau/h264_parser.c b/src/libvdpau/h264_parser.c
new file mode 100644
index 000000000..fffa1389e
--- /dev/null
+++ b/src/libvdpau/h264_parser.c
@@ -0,0 +1,1594 @@
+/*
+ * Copyright (C) 2008 Julian Scheel
+ *
+ * This file is part of xine, a free video player.
+ *
+ * xine is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * xine is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
+ *
+ * h264_parser.c: Almost full-features H264 NAL-Parser
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+
+#include "h264_parser.h"
+#include "nal.h"
+
+/* default scaling_lists according to Table 7-2 */
+uint8_t default_4x4_intra[16] = { 6, 13, 13, 20, 20, 20, 28, 28, 28, 28, 32,
+    32, 32, 37, 37, 42 };
+
+uint8_t default_4x4_inter[16] = { 10, 14, 14, 20, 20, 20, 24, 24, 24, 24, 27,
+    27, 27, 30, 30, 34 };
+
+uint8_t default_8x8_intra[64] = { 6, 10, 10, 13, 11, 13, 16, 16, 16, 16, 18,
+    18, 18, 18, 18, 23, 23, 23, 23, 23, 23, 25, 25, 25, 25, 25, 25, 25, 27, 27,
+    27, 27, 27, 27, 27, 27, 29, 29, 29, 29, 29, 29, 29, 31, 31, 31, 31, 31, 31,
+    33, 33, 33, 33, 33, 36, 36, 36, 36, 38, 38, 38, 40, 40, 42 };
+
+uint8_t default_8x8_inter[64] = { 9, 13, 13, 15, 13, 15, 17, 17, 17, 17, 19,
+    19, 19, 19, 19, 21, 21, 21, 21, 21, 21, 22, 22, 22, 22, 22, 22, 22, 24, 24,
+    24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 27, 27, 27, 27, 27, 27,
+    28, 28, 28, 28, 28, 30, 30, 30, 30, 32, 32, 32, 33, 33, 35 };
+
+struct buf_reader
+{
+  uint8_t *buf;
+  uint8_t *cur_pos;
+  int len;
+  int cur_offset;
+};
+
+static inline uint32_t read_bits(struct buf_reader *buf, int len);
+uint32_t read_exp_golomb(struct buf_reader *buf);
+int32_t read_exp_golomb_s(struct buf_reader *buf);
+
+void calculate_pic_order(struct nal_parser *parser);
+void skip_scaling_list(struct buf_reader *buf, int size);
+void parse_scaling_list(struct buf_reader *buf, uint8_t *scaling_list,
+    int length, int index);
+int parse_nal_header(struct buf_reader *buf, struct nal_parser *parser);
+static void sps_scaling_list_fallback(struct seq_parameter_set_rbsp *sps, int i);
+static void pps_scaling_list_fallback(struct seq_parameter_set_rbsp *sps, struct pic_parameter_set_rbsp *pps, int i);
+uint8_t parse_sps(struct buf_reader *buf, struct nal_parser *parser);
+void parse_vui_parameters(struct buf_reader *buf,
+    struct seq_parameter_set_rbsp *sps);
+void parse_hrd_parameters(struct buf_reader *buf, struct hrd_parameters *hrd);
+uint8_t parse_pps(struct buf_reader *buf, struct pic_parameter_set_rbsp *pps,
+    struct seq_parameter_set_rbsp *sps);
+void parse_sei(struct buf_reader *buf, struct nal_parser *parser);
+uint8_t parse_slice_header(struct buf_reader *buf, struct nal_parser *parser);
+void
+    parse_ref_pic_list_reordering(struct buf_reader *buf, struct nal_unit *nal,
+        struct nal_parser *parser);
+void decode_ref_pic_marking(struct nal_unit *nal,
+    uint32_t memory_management_control_operation,
+    uint32_t marking_nr,
+    struct nal_parser *parser);
+void parse_pred_weight_table(struct buf_reader *buf, struct nal_unit *nal);
+void parse_dec_ref_pic_marking(struct buf_reader *buf,
+    struct nal_parser *parser);
+
+/* here goes the parser implementation */
+
+static void decode_nal(uint8_t **ret, int *len_ret, uint8_t *buf, int buf_len)
+{
+  uint8_t *end = &buf[buf_len];
+  uint8_t *pos = malloc(buf_len);
+
+  *ret = pos;
+  while (buf < end) {
+    if (buf < end - 3 && buf[0] == 0x00 && buf[1] == 0x00 && buf[2] == 0x03) {
+
+      *pos++ = 0x00;
+      *pos++ = 0x00;
+
+      buf += 3;
+      continue;
+    }
+    *pos++ = *buf++;
+  }
+
+  *len_ret = pos - *ret;
+}
+
+#if 0
+static inline void dump_bits(const char *label, const struct buf_reader *buf, int bits)
+{
+  struct buf_reader lbuf;
+  memcpy(&lbuf, buf, sizeof(struct buf_reader));
+
+  int i;
+  printf("%s: 0b", label);
+  for(i=0; i < bits; i++)
+    printf("%d", read_bits(&lbuf, 1));
+  printf("\n");
+}
+#endif
+
+static inline uint32_t bits_read(struct buf_reader *buf)
+{
+  int bits_read = 0;
+  bits_read = (buf->cur_pos - buf->buf)*8;
+  bits_read += (8-buf->cur_offset);
+
+  return bits_read;
+}
+
+static inline uint32_t read_bits(struct buf_reader *buf, int len)
+{
+  static uint32_t i_mask[33] = { 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f,
+      0x7f, 0xff, 0x1ff, 0x3ff, 0x7ff, 0xfff, 0x1fff, 0x3fff, 0x7fff, 0xffff,
+      0x1ffff, 0x3ffff, 0x7ffff, 0xfffff, 0x1fffff, 0x3fffff, 0x7fffff,
+      0xffffff, 0x1ffffff, 0x3ffffff, 0x7ffffff, 0xfffffff, 0x1fffffff,
+      0x3fffffff, 0x7fffffff, 0xffffffff };
+
+  int i_shr;
+  uint32_t bits = 0;
+
+  while (len > 0 && (buf->cur_pos - buf->buf) < buf->len) {
+    if ((i_shr = buf->cur_offset - len) >= 0) {
+      bits |= (*buf->cur_pos >> i_shr) & i_mask[len];
+      buf->cur_offset -= len;
+      if (buf->cur_offset == 0) {
+        buf->cur_pos++;
+        buf->cur_offset = 8;
+      }
+      return bits;
+    }
+    else {
+      bits |= (*buf->cur_pos & i_mask[buf->cur_offset]) << -i_shr;
+      len -= buf->cur_offset;
+      buf->cur_pos++;
+      buf->cur_offset = 8;
+    }
+  }
+  return bits;
+}
+
+/* determines if following bits are rtsb_trailing_bits */
+static inline int rbsp_trailing_bits(uint8_t *buf, int buf_len)
+{
+  uint8_t *cur_buf = buf+(buf_len-1);
+  uint8_t cur_val;
+  int parsed_bits = 0;
+  int i;
+
+  while(buf_len > 0) {
+    cur_val = *cur_buf;
+    for(i = 0; i < 9; i++) {
+      if (cur_val&1)
+        return parsed_bits+i;
+      cur_val>>=1;
+    }
+    parsed_bits += 8;
+    cur_buf--;
+  }
+
+  printf("rbsp trailing bits could not be found\n");
+  return 0;
+}
+
+uint32_t read_exp_golomb(struct buf_reader *buf)
+{
+  int leading_zero_bits = 0;
+
+  while (read_bits(buf, 1) == 0 && leading_zero_bits < 32)
+    leading_zero_bits++;
+
+  uint32_t code = (1 << leading_zero_bits) - 1 + read_bits(buf,
+      leading_zero_bits);
+  return code;
+}
+
+int32_t read_exp_golomb_s(struct buf_reader *buf)
+{
+  uint32_t ue = read_exp_golomb(buf);
+  int32_t code = ue & 0x01 ? (ue + 1) / 2 : -(ue / 2);
+  return code;
+}
+
+int parse_nal_header(struct buf_reader *buf, struct nal_parser *parser)
+{
+  if (buf->len < 1)
+    return -1;
+
+  int ret = -1;
+
+  struct nal_unit *nal = parser->current_nal;
+
+  memset(nal, 0x00, sizeof(struct nal_unit) - sizeof(struct seq_parameter_set_rbsp*) - sizeof(struct pic_parameter_set_rbsp*) - sizeof(struct slice_header*));
+  nal->nal_ref_idc = (buf->buf[0] >> 5) & 0x03;
+  nal->nal_unit_type = buf->buf[0] & 0x1f;
+
+  buf->cur_pos = buf->buf + 1;
+  //printf("NAL: %d\n", nal->nal_unit_type);
+
+  struct buf_reader ibuf;
+  ibuf.cur_offset = 8;
+
+  switch (nal->nal_unit_type) {
+    case NAL_SPS:
+      decode_nal(&ibuf.buf, &ibuf.len, buf->cur_pos, buf->len - 1);
+      ibuf.cur_pos = ibuf.buf;
+
+      if (!nal->sps)
+        nal->sps = calloc(1, sizeof(struct seq_parameter_set_rbsp));
+      else
+        memset(nal->sps, 0x00, sizeof(struct seq_parameter_set_rbsp));
+
+      parse_sps(&ibuf, parser);
+      free(ibuf.buf);
+      ret = NAL_SPS;
+      break;
+    case NAL_PPS:
+      if (!nal->pps)
+        nal->pps = calloc(1, sizeof(struct pic_parameter_set_rbsp));
+      else
+        memset(nal->pps, 0x00, sizeof(struct pic_parameter_set_rbsp));
+
+      parse_pps(buf, nal->pps, nal->sps);
+      ret = NAL_PPS;
+      break;
+    case NAL_SLICE:
+    case NAL_PART_A:
+    case NAL_PART_B:
+    case NAL_PART_C:
+    case NAL_SLICE_IDR:
+      if (nal->sps && nal->pps) {
+        if (!nal->slc)
+          nal->slc = calloc(1, sizeof(struct slice_header));
+        else
+          memset(nal->slc, 0x00, sizeof(struct slice_header));
+
+        parse_slice_header(buf, parser);
+        ret = nal->nal_unit_type;
+      }
+      break;
+    case NAL_SEI:
+      memset(&(nal->sei), 0x00, sizeof(struct sei_message));
+      parse_sei(buf, parser);
+      ret = nal->nal_unit_type;
+      break;
+    default:
+      ret = nal->nal_unit_type;
+      break;
+  }
+
+  return ret;
+}
+
+void calculate_pic_order(struct nal_parser *parser)
+{
+  struct nal_unit *nal = parser->current_nal;
+
+  struct seq_parameter_set_rbsp *sps = nal->sps;
+  struct slice_header *slc = nal->slc;
+  if (!sps || !slc)
+    return;
+
+  if (nal->nal_unit_type == NAL_SLICE_IDR) {
+    parser->prev_pic_order_cnt_lsb = 0;
+    parser->prev_pic_order_cnt_msb = 0;
+    parser->frame_num_offset = 0;
+  }
+
+  if (sps->pic_order_cnt_type == 0) {
+
+    const int max_poc_lsb = 1 << (sps->log2_max_pic_order_cnt_lsb_minus4 + 4);
+
+    if (slc->pic_order_cnt_lsb < parser->prev_pic_order_cnt_lsb
+        && parser->prev_pic_order_cnt_lsb - slc->pic_order_cnt_lsb
+            >= max_poc_lsb / 2)
+      parser->pic_order_cnt_msb = parser->prev_pic_order_cnt_msb + max_poc_lsb;
+    else if (slc->pic_order_cnt_lsb > parser->prev_pic_order_cnt_lsb
+        && parser->prev_pic_order_cnt_lsb - slc->pic_order_cnt_lsb
+            < -max_poc_lsb / 2)
+      parser->pic_order_cnt_msb = parser->prev_pic_order_cnt_msb - max_poc_lsb;
+    else
+      parser->pic_order_cnt_msb = parser->prev_pic_order_cnt_msb;
+
+    if(!slc->field_pic_flag || !slc->bottom_field_flag)
+      nal->top_field_order_cnt = parser->pic_order_cnt_msb + slc->pic_order_cnt_lsb;
+
+    nal->bottom_field_order_cnt = 0;
+
+    if(!slc->field_pic_flag)
+      nal->bottom_field_order_cnt = nal->top_field_order_cnt + slc->delta_pic_order_cnt_bottom;
+    else //if(slc->bottom_field_flag) TODO: this is not spec compliant, but works...
+      nal->bottom_field_order_cnt = parser->pic_order_cnt_msb + slc->pic_order_cnt_lsb;
+
+    /*if(slc->bottom_field_flag)
+      nal->top_field_order_cnt = parser->last_nal->top_field_order_cnt;*/
+
+  } else if (sps->pic_order_cnt_type == 2) {
+    uint32_t prev_frame_num = parser->last_nal->slc->frame_num;
+    uint32_t prev_frame_num_offset = parser->frame_num_offset;
+    uint32_t max_frame_num = 1 << (sps->log2_max_frame_num_minus4+4);
+    uint32_t temp_pic_order_cnt = 0;
+
+    if (parser->is_idr)
+      parser->frame_num_offset = 0;
+    else if (prev_frame_num > slc->frame_num)
+      parser->frame_num_offset = prev_frame_num_offset + max_frame_num;
+    else
+      parser->frame_num_offset = prev_frame_num_offset;
+
+    if(parser->is_idr)
+      temp_pic_order_cnt = 0;
+    else if(nal->nal_ref_idc == 0)
+      temp_pic_order_cnt = 2 * (parser->frame_num_offset + slc->frame_num)-1;
+    else
+      temp_pic_order_cnt = 2 * (parser->frame_num_offset + slc->frame_num);
+
+    if(!slc->field_pic_flag)
+      nal->top_field_order_cnt = nal->bottom_field_order_cnt = temp_pic_order_cnt;
+    else if(slc->bottom_field_flag)
+      nal->bottom_field_order_cnt = temp_pic_order_cnt;
+    else
+      nal->top_field_order_cnt = temp_pic_order_cnt;
+
+  } else {
+    printf("FIXME: Unsupported poc_type: %d\n", sps->pic_order_cnt_type);
+  }
+
+}
+
+void skip_scaling_list(struct buf_reader *buf, int size)
+{
+  int i;
+  for (i = 0; i < size; i++) {
+    read_exp_golomb_s(buf);
+  }
+}
+
+void parse_scaling_list(struct buf_reader *buf, uint8_t *scaling_list,
+    int length, int index)
+{
+  int last_scale = 8;
+  int next_scale = 8;
+  int32_t delta_scale;
+  uint8_t use_default_scaling_matrix_flag = 0;
+  int i;
+
+  uint8_t *zigzag = (length==64) ? zigzag_8x8 : zigzag_4x4;
+
+  for (i = 0; i < length; i++) {
+    if (next_scale != 0) {
+      delta_scale = read_exp_golomb_s(buf);
+      next_scale = (last_scale + delta_scale + 256) % 256;
+      if (i == 0 && next_scale == 0) {
+        use_default_scaling_matrix_flag = 1;
+        break;
+      }
+    }
+    scaling_list[zigzag[i]] = last_scale = (next_scale == 0) ? last_scale : next_scale;
+  }
+
+  if (use_default_scaling_matrix_flag) {
+    switch (index) {
+      case 0:
+      case 1:
+      case 2: {
+        for(i = 0; i < sizeof(default_4x4_intra); i++) {
+          scaling_list[zigzag_4x4[i]] = default_4x4_intra[i];
+        }
+        //memcpy(scaling_list, default_4x4_intra, sizeof(default_4x4_intra));
+        break;
+      }
+      case 3:
+      case 4:
+      case 5: {
+        for(i = 0; i < sizeof(default_4x4_inter); i++) {
+          scaling_list[zigzag_4x4[i]] = default_4x4_inter[i];
+        }
+        //memcpy(scaling_list, default_4x4_inter, sizeof(default_4x4_inter));
+        break;
+      }
+      case 6: {
+        for(i = 0; i < sizeof(default_8x8_intra); i++) {
+          scaling_list[zigzag_8x8[i]] = default_8x8_intra[i];
+        }
+        //memcpy(scaling_list, default_8x8_intra, sizeof(default_8x8_intra));
+        break;
+      }
+      case 7: {
+        for(i = 0; i < sizeof(default_8x8_inter); i++) {
+          scaling_list[zigzag_8x8[i]] = default_8x8_inter[i];
+        }
+        //memcpy(scaling_list, default_8x8_inter, sizeof(default_8x8_inter));
+        break;
+      }
+    }
+  }
+}
+
+static void sps_scaling_list_fallback(struct seq_parameter_set_rbsp *sps, int i)
+{
+  int j;
+  switch (i) {
+    case 0: {
+      for(j = 0; j < sizeof(default_4x4_intra); j++) {
+        sps->scaling_lists_4x4[i][zigzag_4x4[j]] = default_4x4_intra[j];
+      }
+      //memcpy(sps->scaling_lists_4x4[i], default_4x4_intra, sizeof(sps->scaling_lists_4x4[i]));
+      break;
+    }
+    case 3: {
+      for(j = 0; j < sizeof(default_4x4_inter); j++) {
+        sps->scaling_lists_4x4[i][zigzag_4x4[j]] = default_4x4_inter[j];
+      }
+      //memcpy(sps->scaling_lists_4x4[i], default_4x4_inter, sizeof(sps->scaling_lists_4x4[i]));
+      break;
+    }
+    case 1:
+    case 2:
+    case 4:
+    case 5:
+      memcpy(sps->scaling_lists_4x4[i], sps->scaling_lists_4x4[i-1], sizeof(sps->scaling_lists_4x4[i]));
+      break;
+    case 6: {
+      for(j = 0; j < sizeof(default_8x8_intra); j++) {
+        sps->scaling_lists_8x8[i-6][zigzag_8x8[j]] = default_8x8_intra[j];
+      }
+      //memcpy(sps->scaling_lists_8x8[i-6], default_8x8_intra, sizeof(sps->scaling_lists_8x8[i-6]));
+      break;
+    }
+    case 7: {
+      for(j = 0; j < sizeof(default_8x8_inter); j++) {
+        sps->scaling_lists_8x8[i-6][zigzag_8x8[j]] = default_8x8_inter[j];
+      }
+      //memcpy(sps->scaling_lists_8x8[i-6], default_8x8_inter, sizeof(sps->scaling_lists_8x8[i-6]));
+      break;
+    }
+
+  }
+}
+
+static void pps_scaling_list_fallback(struct seq_parameter_set_rbsp *sps, struct pic_parameter_set_rbsp *pps, int i)
+{
+  switch (i) {
+    case 0:
+    case 3:
+      memcpy(pps->scaling_lists_4x4[i], sps->scaling_lists_4x4[i], sizeof(pps->scaling_lists_4x4[i]));
+      break;
+    case 1:
+    case 2:
+    case 4:
+    case 5:
+      memcpy(pps->scaling_lists_4x4[i], pps->scaling_lists_4x4[i-1], sizeof(pps->scaling_lists_4x4[i]));
+      break;
+    case 6:
+    case 7:
+      memcpy(pps->scaling_lists_8x8[i-6], sps->scaling_lists_8x8[i-6], sizeof(pps->scaling_lists_8x8[i-6]));
+      break;
+
+  }
+}
+
+
+uint8_t parse_sps(struct buf_reader *buf, struct nal_parser *parser)
+{
+  struct seq_parameter_set_rbsp *sps = parser->current_nal->sps;
+  sps->profile_idc = read_bits(buf, 8);
+  sps->constraint_setN_flag = read_bits(buf, 4);
+  read_bits(buf, 4);
+  sps->level_idc = read_bits(buf, 8);
+
+  sps->seq_parameter_set_id = read_exp_golomb(buf);
+
+  memset(sps->scaling_lists_4x4, 16, sizeof(sps->scaling_lists_4x4));
+  memset(sps->scaling_lists_8x8, 16, sizeof(sps->scaling_lists_8x8));
+  if (sps->profile_idc == 100 || sps->profile_idc == 110 || sps->profile_idc
+      == 122 || sps->profile_idc == 244 || sps->profile_idc == 44 ||
+      sps->profile_idc == 83 || sps->profile_idc == 86) {
+    sps->chroma_format_idc = read_exp_golomb(buf);
+    if (sps->chroma_format_idc == 3) {
+      sps->separate_colour_plane_flag = read_bits(buf, 1);
+    }
+
+    sps->bit_depth_luma_minus8 = read_exp_golomb(buf);
+    sps->bit_depth_chroma_minus8 = read_exp_golomb(buf);
+    sps->qpprime_y_zero_transform_bypass_flag = read_bits(buf, 1);
+    sps->seq_scaling_matrix_present_flag = read_bits(buf, 1);
+    if (sps->seq_scaling_matrix_present_flag) {
+      int i;
+      for (i = 0; i < 8; i++) {
+        sps->seq_scaling_list_present_flag[i] = read_bits(buf, 1);
+
+        if (sps->seq_scaling_list_present_flag[i]) {
+          if (i < 6)
+            parse_scaling_list(buf, sps->scaling_lists_4x4[i], 16, i);
+          else
+            parse_scaling_list(buf, sps->scaling_lists_8x8[i - 6], 64, i);
+        } else {
+          sps_scaling_list_fallback(sps, i);
+        }
+      }
+    }
+  } else
+    sps->chroma_format_idc = 1;
+
+  sps->log2_max_frame_num_minus4 = read_exp_golomb(buf);
+
+  sps->pic_order_cnt_type = read_exp_golomb(buf);
+  if (!sps->pic_order_cnt_type)
+    sps->log2_max_pic_order_cnt_lsb_minus4 = read_exp_golomb(buf);
+  else if(sps->pic_order_cnt_type == 1) {
+    sps->delta_pic_order_always_zero_flag = read_bits(buf, 1);
+    sps->offset_for_non_ref_pic = read_exp_golomb_s(buf);
+    sps->offset_for_top_to_bottom_field = read_exp_golomb_s(buf);
+    sps->num_ref_frames_in_pic_order_cnt_cycle = read_exp_golomb(buf);
+    int i;
+    for (i = 0; i < sps->num_ref_frames_in_pic_order_cnt_cycle; i++) {
+      sps->offset_for_ref_frame[i] = read_exp_golomb_s(buf);
+    }
+  }
+
+  sps->num_ref_frames = read_exp_golomb(buf);
+  sps->gaps_in_frame_num_value_allowed_flag = read_bits(buf, 1);
+
+  /*sps->pic_width_in_mbs_minus1 = read_exp_golomb(buf);
+   sps->pic_height_in_map_units_minus1 = read_exp_golomb(buf);*/
+  sps->pic_width = 16 * (read_exp_golomb(buf) + 1);
+  sps->pic_height = 16 * (read_exp_golomb(buf) + 1);
+
+  sps->frame_mbs_only_flag = read_bits(buf, 1);
+
+  /* compute the height correctly even for interlaced material */
+  sps->pic_height = (2 - sps->frame_mbs_only_flag) * sps->pic_height;
+  if (sps->pic_height == 1088)
+    sps->pic_height = 1080;
+
+  if (!sps->frame_mbs_only_flag)
+    sps->mb_adaptive_frame_field_flag = read_bits(buf, 1);
+
+  sps->direct_8x8_inference_flag = read_bits(buf, 1);
+  sps->frame_cropping_flag = read_bits(buf, 1);
+  if (sps->frame_cropping_flag) {
+    sps->frame_crop_left_offset = read_exp_golomb(buf);
+    sps->frame_crop_right_offset = read_exp_golomb(buf);
+    sps->frame_crop_top_offset = read_exp_golomb(buf);
+    sps->frame_crop_bottom_offset = read_exp_golomb(buf);
+  }
+  sps->vui_parameters_present_flag = read_bits(buf, 1);
+  if (sps->vui_parameters_present_flag) {
+    parse_vui_parameters(buf, sps);
+    if(sps->vui_parameters.nal_hrd_parameters_present_flag ||
+        sps->vui_parameters.vc1_hrd_parameters_present_flag) {
+      parser->cpb_dpb_delays_present_flag = 1;
+    } else
+      parser->cpb_dpb_delays_present_flag = 0;
+  } else
+    parser->cpb_dpb_delays_present_flag = 0;
+
+  return 0;
+}
+
+void parse_sei(struct buf_reader *buf, struct nal_parser *parser)
+{
+  struct sei_message *sei = &(parser->current_nal->sei);
+  struct seq_parameter_set_rbsp *sps = parser->current_nal->sps;
+  uint8_t tmp;
+
+  sei->payload_type = 0;
+  while((tmp = read_bits(buf, 8)) == 0xff) {
+    sei->payload_type += 255;
+  }
+  sei->last_payload_type_byte = tmp;
+  sei->payload_type += sei->last_payload_type_byte;
+
+  sei->payload_size = 0;
+  while((tmp = read_bits(buf, 8)) == 0xff) {
+    sei->payload_size += 255;
+  }
+  sei->last_payload_size_byte = tmp;
+  sei->payload_size += sei->last_payload_size_byte;
+
+  /* pic_timing */
+  if(sei->payload_type == 1) {
+    if(parser->cpb_dpb_delays_present_flag) {
+      sei->pic_timing.cpb_removal_delay = read_bits(buf, 5);
+      sei->pic_timing.dpb_output_delay = read_bits(buf, 5);
+    }
+
+    if(sps && sps->vui_parameters_present_flag &&
+        sps->vui_parameters.pic_struct_present_flag) {
+      sei->pic_timing.pic_struct = read_bits(buf, 4);
+      switch(sei->pic_timing.pic_struct) {
+        case DISP_FRAME:
+          parser->current_nal->interlaced = 0;
+          parser->current_nal->repeat_pic = 0;
+          break;
+        case DISP_TOP:
+        case DISP_BOTTOM:
+        case DISP_TOP_BOTTOM:
+        case DISP_BOTTOM_TOP:
+          parser->current_nal->interlaced = 1;
+          break;
+        case DISP_TOP_BOTTOM_TOP:
+        case DISP_BOTTOM_TOP_BOTTOM:
+          parser->current_nal->interlaced = 1;
+          parser->current_nal->repeat_pic = 1;
+          break;
+        case DISP_FRAME_DOUBLING:
+          parser->current_nal->interlaced = 0;
+          parser->current_nal->repeat_pic = 2;
+          break;
+        case DISP_FRAME_TRIPLING:
+          parser->current_nal->interlaced = 0;
+          parser->current_nal->repeat_pic = 3;
+      }
+    }
+  }
+}
+
+void parse_vui_parameters(struct buf_reader *buf,
+    struct seq_parameter_set_rbsp *sps)
+{
+  sps->vui_parameters.aspect_ration_info_present_flag = read_bits(buf, 1);
+  if (sps->vui_parameters.aspect_ration_info_present_flag == 1) {
+    sps->vui_parameters.aspect_ratio_idc = read_bits(buf, 8);
+    if (sps->vui_parameters.aspect_ratio_idc == ASPECT_EXTENDED_SAR) {
+      sps->vui_parameters.sar_width = read_bits(buf, 16);
+      sps->vui_parameters.sar_height = read_bits(buf, 16);
+    }
+  }
+
+  sps->vui_parameters.overscan_info_present_flag = read_bits(buf, 1);
+  if (sps->vui_parameters.overscan_info_present_flag) {
+    sps->vui_parameters.overscan_appropriate_flag = read_bits(buf, 1);
+  }
+
+  sps->vui_parameters.video_signal_type_present_flag = read_bits(buf, 1);
+  if (sps->vui_parameters.video_signal_type_present_flag) {
+    sps->vui_parameters.video_format = read_bits(buf, 3);
+    sps->vui_parameters.video_full_range_flag = read_bits(buf, 1);
+    sps->vui_parameters.colour_description_present = read_bits(buf, 1);
+    if (sps->vui_parameters.colour_description_present) {
+      sps->vui_parameters.colour_primaries = read_bits(buf, 8);
+      sps->vui_parameters.transfer_characteristics = read_bits(buf, 8);
+      sps->vui_parameters.matrix_coefficients = read_bits(buf, 8);
+    }
+  }
+
+  sps->vui_parameters.chroma_loc_info_present_flag = read_bits(buf, 1);
+  if (sps->vui_parameters.chroma_loc_info_present_flag) {
+    sps->vui_parameters.chroma_sample_loc_type_top_field = read_exp_golomb(buf);
+    sps->vui_parameters.chroma_sample_loc_type_bottom_field = read_exp_golomb(
+        buf);
+  }
+
+  sps->vui_parameters.timing_info_present_flag = read_bits(buf, 1);
+  if (sps->vui_parameters.timing_info_present_flag) {
+    uint32_t num_units_in_tick = read_bits(buf, 32);
+    uint32_t time_scale = read_bits(buf, 32);
+    sps->vui_parameters.num_units_in_tick = num_units_in_tick;
+    sps->vui_parameters.time_scale = time_scale;
+    sps->vui_parameters.fixed_frame_rate_flag = read_bits(buf, 1);
+  }
+
+  sps->vui_parameters.nal_hrd_parameters_present_flag = read_bits(buf, 1);
+  if (sps->vui_parameters.nal_hrd_parameters_present_flag)
+    parse_hrd_parameters(buf, &sps->vui_parameters.nal_hrd_parameters);
+
+  sps->vui_parameters.vc1_hrd_parameters_present_flag = read_bits(buf, 1);
+  if (sps->vui_parameters.vc1_hrd_parameters_present_flag)
+    parse_hrd_parameters(buf, &sps->vui_parameters.vc1_hrd_parameters);
+
+  if (sps->vui_parameters.nal_hrd_parameters_present_flag
+      || sps->vui_parameters.vc1_hrd_parameters_present_flag)
+    sps->vui_parameters.low_delay_hrd_flag = read_bits(buf, 1);
+
+  sps->vui_parameters.pic_struct_present_flag = read_bits(buf, 1);
+  sps->vui_parameters.bitstream_restriction_flag = read_bits(buf, 1);
+
+  if (sps->vui_parameters.bitstream_restriction_flag) {
+    sps->vui_parameters.motion_vectors_over_pic_boundaries = read_bits(buf, 1);
+    sps->vui_parameters.max_bytes_per_pic_denom = read_exp_golomb(buf);
+    sps->vui_parameters.max_bits_per_mb_denom = read_exp_golomb(buf);
+    sps->vui_parameters.log2_max_mv_length_horizontal = read_exp_golomb(buf);
+    sps->vui_parameters.log2_max_mv_length_vertical = read_exp_golomb(buf);
+    sps->vui_parameters.num_reorder_frames = read_exp_golomb(buf);
+    sps->vui_parameters.max_dec_frame_buffering = read_exp_golomb(buf);
+  }
+}
+
+void parse_hrd_parameters(struct buf_reader *buf, struct hrd_parameters *hrd)
+{
+  hrd->cpb_cnt_minus1 = read_exp_golomb(buf);
+  hrd->bit_rate_scale = read_bits(buf, 4);
+  hrd->cpb_size_scale = read_bits(buf, 4);
+
+  int i;
+  for (i = 0; i <= hrd->cpb_cnt_minus1; i++) {
+    hrd->bit_rate_value_minus1[i] = read_exp_golomb(buf);
+    hrd->cpb_size_value_minus1[i] = read_exp_golomb(buf);
+    hrd->cbr_flag[i] = read_bits(buf, 1);
+  }
+
+  hrd->initial_cpb_removal_delay_length_minus1 = read_bits(buf, 5);
+  hrd->cpb_removal_delay_length_minus1 = read_bits(buf, 5);
+  hrd->dpb_output_delay_length_minus1 = read_bits(buf, 5);
+  hrd->time_offset_length = read_bits(buf, 5);
+}
+
+uint8_t parse_pps(struct buf_reader *buf, struct pic_parameter_set_rbsp *pps,
+    struct seq_parameter_set_rbsp *sps)
+{
+  pps->pic_parameter_set_id = read_exp_golomb(buf);
+  pps->seq_parameter_set_id = read_exp_golomb(buf);
+  pps->entropy_coding_mode_flag = read_bits(buf, 1);
+  pps->pic_order_present_flag = read_bits(buf, 1);
+
+  pps->num_slice_groups_minus1 = read_exp_golomb(buf);
+  if (pps->num_slice_groups_minus1 > 0) {
+    pps->slice_group_map_type = read_exp_golomb(buf);
+    if (pps->slice_group_map_type == 0) {
+      int i_group;
+      for (i_group = 0; i_group <= pps->num_slice_groups_minus1; i_group++) {
+        if (i_group < 64)
+          pps->run_length_minus1[i_group] = read_exp_golomb(buf);
+        else { // FIXME: skips if more than 64 groups exist
+          fprintf(stderr, "Error: Only 64 slice_groups are supported\n");
+          read_exp_golomb(buf);
+        }
+      }
+    }
+    else if (pps->slice_group_map_type == 3 || pps->slice_group_map_type == 4
+        || pps->slice_group_map_type == 5) {
+      pps->slice_group_change_direction_flag = read_bits(buf, 1);
+      pps->slice_group_change_rate_minus1 = read_exp_golomb(buf);
+    }
+    else if (pps->slice_group_map_type == 6) {
+      pps->pic_size_in_map_units_minus1 = read_exp_golomb(buf);
+      int i_group;
+      for (i_group = 0; i_group <= pps->num_slice_groups_minus1; i_group++) {
+        pps->slice_group_id[i_group] = read_bits(buf, ceil(log(
+            pps->num_slice_groups_minus1 + 1)));
+      }
+    }
+  }
+
+  pps->num_ref_idx_l0_active_minus1 = read_exp_golomb(buf);
+  pps->num_ref_idx_l1_active_minus1 = read_exp_golomb(buf);
+  pps->weighted_pred_flag = read_bits(buf, 1);
+  pps->weighted_bipred_idc = read_bits(buf, 2);
+  pps->pic_init_qp_minus26 = read_exp_golomb_s(buf);
+  pps->pic_init_qs_minus26 = read_exp_golomb_s(buf);
+  pps->chroma_qp_index_offset = read_exp_golomb_s(buf);
+  pps->deblocking_filter_control_present_flag = read_bits(buf, 1);
+  pps->constrained_intra_pred_flag = read_bits(buf, 1);
+  pps->redundant_pic_cnt_present_flag = read_bits(buf, 1);
+
+  int bit_length = (buf->len*8)-rbsp_trailing_bits(buf->buf, buf->len);
+  int bit_read = bits_read(buf);
+
+  memset(pps->scaling_lists_4x4, 16, sizeof(pps->scaling_lists_4x4));
+  memset(pps->scaling_lists_8x8, 16, sizeof(pps->scaling_lists_8x8));
+  if (bit_length-bit_read > 1) {
+    pps->transform_8x8_mode_flag = read_bits(buf, 1);
+    pps->pic_scaling_matrix_present_flag = read_bits(buf, 1);
+    if (pps->pic_scaling_matrix_present_flag) {
+      int i;
+      for (i = 0; i < 8; i++) {
+        if(i < 6 || pps->transform_8x8_mode_flag)
+          pps->pic_scaling_list_present_flag[i] = read_bits(buf, 1);
+        else
+          pps->pic_scaling_list_present_flag[i] = 0;
+
+        if (pps->pic_scaling_list_present_flag[i]) {
+          if (i < 6)
+            parse_scaling_list(buf, pps->scaling_lists_4x4[i], 16, i);
+          else
+            parse_scaling_list(buf, pps->scaling_lists_8x8[i - 6], 64, i);
+        } else {
+          pps_scaling_list_fallback(sps, pps, i);
+        }
+      }
+    }
+
+    pps->second_chroma_qp_index_offset = read_exp_golomb_s(buf);
+  } else
+    pps->second_chroma_qp_index_offset = pps->chroma_qp_index_offset;
+
+  if (!pps->pic_scaling_matrix_present_flag && sps != NULL) {
+    memcpy(pps->scaling_lists_4x4, sps->scaling_lists_4x4,
+        sizeof(pps->scaling_lists_4x4));
+    memcpy(pps->scaling_lists_8x8, sps->scaling_lists_8x8,
+        sizeof(pps->scaling_lists_8x8));
+  }
+
+  return 0;
+}
+
+uint8_t parse_slice_header(struct buf_reader *buf, struct nal_parser *parser)
+{
+  struct nal_unit *nal = parser->current_nal;
+
+  struct seq_parameter_set_rbsp *sps = nal->sps;
+  struct pic_parameter_set_rbsp *pps = nal->pps;
+  struct slice_header *slc = nal->slc;
+  memset(slc, 0x00, sizeof(struct slice_header));
+  if (!sps || !pps)
+    return -1;
+
+  slc->first_mb_in_slice = read_exp_golomb(buf);
+  /* we do some parsing on the slice type, because the list is doubled */
+  slc->slice_type = slice_type(read_exp_golomb(buf));
+
+  //print_slice_type(slc->slice_type);
+  slc->pic_parameter_set_id = read_exp_golomb(buf);
+  if(sps->separate_colour_plane_flag)
+    slc->colour_plane_id = read_bits(buf, 2);
+
+  slc->frame_num = read_bits(buf, sps->log2_max_frame_num_minus4 + 4);
+  if (!sps->frame_mbs_only_flag) {
+    slc->field_pic_flag = read_bits(buf, 1);
+    if (slc->field_pic_flag)
+      slc->bottom_field_flag = read_bits(buf, 1);
+    else
+      slc->bottom_field_flag = 0;
+  }
+  else {
+    slc->field_pic_flag = 0;
+    slc->bottom_field_flag = 0;
+  }
+
+  if (slc->field_pic_flag == 0) {
+    nal->max_pic_num = 1 << (sps->log2_max_frame_num_minus4+4);
+    nal->curr_pic_num = slc->frame_num;
+  } else {
+    nal->curr_pic_num = 2 * slc->frame_num + 1;
+    nal->max_pic_num = 2 * (1 << (sps->log2_max_frame_num_minus4+4));
+  }
+
+  if (nal->nal_unit_type == NAL_SLICE_IDR)
+    slc->idr_pic_id = read_exp_golomb(buf);
+
+  if (!sps->pic_order_cnt_type) {
+    slc->pic_order_cnt_lsb = read_bits(buf,
+        sps->log2_max_pic_order_cnt_lsb_minus4 + 4);
+    if (pps->pic_order_present_flag && !slc->field_pic_flag)
+      slc->delta_pic_order_cnt_bottom = read_exp_golomb_s(buf);
+  }
+
+  if (sps->pic_order_cnt_type == 1 && !sps->delta_pic_order_always_zero_flag) {
+    slc->delta_pic_order_cnt[0] = read_exp_golomb_s(buf);
+    if (pps->pic_order_present_flag && !slc->field_pic_flag)
+      slc->delta_pic_order_cnt[1] = read_exp_golomb_s(buf);
+  }
+
+  if (pps->redundant_pic_cnt_present_flag == 1) {
+    slc->redundant_pic_cnt = read_exp_golomb(buf);
+  }
+
+  if (slc->slice_type == SLICE_B)
+    slc->direct_spatial_mv_pred_flag = read_bits(buf, 1);
+
+  /* take default values in case they are not set here */
+  slc->num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_active_minus1;
+  slc->num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_active_minus1;
+
+  if (slc->slice_type == SLICE_P || slc->slice_type == SLICE_SP
+      || slc->slice_type == SLICE_B) {
+    slc->num_ref_idx_active_override_flag = read_bits(buf, 1);
+
+    if (slc->num_ref_idx_active_override_flag == 1) {
+      slc->num_ref_idx_l0_active_minus1 = read_exp_golomb(buf);
+
+      if (slc->slice_type == SLICE_B) {
+        slc->num_ref_idx_l1_active_minus1 = read_exp_golomb(buf);
+      }
+    }
+  }
+
+  /* --- ref_pic_list_reordering --- */
+  parse_ref_pic_list_reordering(buf, nal, parser);
+
+  /* --- pred_weight_table --- */
+  if ((pps->weighted_pred_flag && (slc->slice_type == SLICE_P
+      || slc->slice_type == SLICE_SP)) || (pps->weighted_bipred_idc == 1
+      && slc->slice_type == SLICE_B)) {
+    parse_pred_weight_table(buf, nal);
+  }
+
+  /* --- dec_ref_pic_marking --- */
+  if (nal->nal_ref_idc != 0)
+    parse_dec_ref_pic_marking(buf, parser);
+  else
+    slc->dec_ref_pic_marking_count = 0;
+
+  return 0;
+}
+
+void parse_ref_pic_list_reordering(struct buf_reader *buf, struct nal_unit *nal, struct nal_parser *parser)
+{
+  struct slice_header *slc = nal->slc;
+
+  if (slc->slice_type != SLICE_I && slc->slice_type != SLICE_SI) {
+    slc->ref_pic_list_reordering.ref_pic_list_reordering_flag_l0 = read_bits(
+        buf, 1);
+
+    if (slc->ref_pic_list_reordering.ref_pic_list_reordering_flag_l0 == 1) {
+      do {
+        slc->ref_pic_list_reordering.reordering_of_pic_nums_idc
+            = read_exp_golomb(buf);
+
+        if (slc->ref_pic_list_reordering.reordering_of_pic_nums_idc == 0
+            || slc->ref_pic_list_reordering.reordering_of_pic_nums_idc == 1) {
+          slc->ref_pic_list_reordering.abs_diff_pic_num_minus1
+              = read_exp_golomb(buf);
+        }
+        else if (slc->ref_pic_list_reordering.reordering_of_pic_nums_idc == 2) {
+          slc->ref_pic_list_reordering.long_term_pic_num = read_exp_golomb(buf);
+        }
+      } while (slc->ref_pic_list_reordering.reordering_of_pic_nums_idc != 3);
+    }
+  }
+
+  if (slc->slice_type == SLICE_B) {
+    slc->ref_pic_list_reordering.ref_pic_list_reordering_flag_l1 = read_bits(
+        buf, 1);
+
+    if (slc->ref_pic_list_reordering.ref_pic_list_reordering_flag_l1 == 1) {
+      do {
+        slc->ref_pic_list_reordering.reordering_of_pic_nums_idc
+            = read_exp_golomb(buf);
+
+        if (slc->ref_pic_list_reordering.reordering_of_pic_nums_idc == 0
+            || slc->ref_pic_list_reordering.reordering_of_pic_nums_idc == 1) {
+          slc->ref_pic_list_reordering.abs_diff_pic_num_minus1
+              = read_exp_golomb(buf);
+        }
+        else if (slc->ref_pic_list_reordering.reordering_of_pic_nums_idc == 2) {
+          slc->ref_pic_list_reordering.long_term_pic_num = read_exp_golomb(buf);
+        }
+      } while (slc->ref_pic_list_reordering.reordering_of_pic_nums_idc != 3);
+    }
+  }
+}
+
+void parse_pred_weight_table(struct buf_reader *buf, struct nal_unit *nal)
+{
+  struct seq_parameter_set_rbsp *sps = nal->sps;
+  struct pic_parameter_set_rbsp *pps = nal->pps;
+  struct slice_header *slc = nal->slc;
+  if (!sps || !pps)
+    return;
+
+  nal->slc->pred_weight_table.luma_log2_weight_denom = read_exp_golomb(buf);
+
+  uint32_t ChromaArrayType = sps->chroma_format_idc;
+  if(sps->separate_colour_plane_flag)
+    ChromaArrayType = 0;
+
+  if (ChromaArrayType != 0)
+    nal->slc->pred_weight_table.chroma_log2_weight_denom = read_exp_golomb(buf);
+
+  int i;
+  for (i = 0; i <= slc->num_ref_idx_l0_active_minus1; i++) {
+    uint8_t luma_weight_l0_flag = read_bits(buf, 1);
+
+    if (luma_weight_l0_flag == 1) {
+      nal->slc->pred_weight_table.luma_weight_l0[i] = read_exp_golomb_s(buf);
+      nal->slc->pred_weight_table.luma_offset_l0[i] = read_exp_golomb_s(buf);
+    }
+
+    if (ChromaArrayType != 0) {
+      uint8_t chroma_weight_l0_flag = read_bits(buf, 1);
+
+      if (chroma_weight_l0_flag == 1) {
+        int j;
+        for (j = 0; j < 2; j++) {
+          nal->slc->pred_weight_table.chroma_weight_l0[i][j]
+              = read_exp_golomb_s(buf);
+          nal->slc->pred_weight_table.chroma_offset_l0[i][j]
+              = read_exp_golomb_s(buf);
+        }
+      }
+    }
+  }
+
+  if ((slc->slice_type % 5) == SLICE_B) {
+    /* FIXME: Being spec-compliant here and loop to num_ref_idx_l0_active_minus1
+     * will break Divx7 files. Keep this in mind if any other streams are broken
+     */
+    for (i = 0; i <= slc->num_ref_idx_l1_active_minus1; i++) {
+      uint8_t luma_weight_l1_flag = read_bits(buf, 1);
+
+      if (luma_weight_l1_flag == 1) {
+        nal->slc->pred_weight_table.luma_weight_l1[i] = read_exp_golomb_s(buf);
+        nal->slc->pred_weight_table.luma_offset_l1[i] = read_exp_golomb_s(buf);
+      }
+
+      if (ChromaArrayType != 0) {
+        uint8_t chroma_weight_l1_flag = read_bits(buf, 1);
+
+        if (chroma_weight_l1_flag == 1) {
+          int j;
+          for (j = 0; j < 2; j++) {
+            nal->slc->pred_weight_table.chroma_weight_l1[i][j]
+                = read_exp_golomb_s(buf);
+            nal->slc->pred_weight_table.chroma_offset_l1[i][j]
+                = read_exp_golomb_s(buf);
+          }
+        }
+      }
+    }
+  }
+}
+
+void decode_ref_pic_marking(struct nal_unit *nal,
+    uint32_t memory_management_control_operation,
+    uint32_t marking_nr,
+    struct nal_parser *parser)
+{
+  struct slice_header *slc = nal->slc;
+  struct dpb *dpb = &parser->dpb;
+  if (!slc)
+    return;
+
+  if (memory_management_control_operation == 1) {
+    // short-term -> unused for reference
+    uint32_t pic_num_x = (nal->curr_pic_num
+        - (slc->dec_ref_pic_marking[marking_nr].difference_of_pic_nums_minus1 + 1))%nal->max_pic_num;
+    struct decoded_picture* pic = NULL;
+    if ((pic = dpb_get_picture(dpb, pic_num_x)) != NULL) {
+      if (pic->nal->slc->field_pic_flag == 0) {
+        dpb_set_unused_ref_picture_a(dpb, pic);
+      } else {
+        //if(!pic->top_is_reference)
+          dpb_set_unused_ref_picture_a(dpb, pic);
+        /*else
+          pic->top_is_reference = 0;*/
+
+        //printf("FIXME: We might need do delete more from the DPB...\n");
+        // FIXME: some more handling needed here?! See 8.2.5.4.1, p. 120
+      }
+    }
+  } else if (memory_management_control_operation == 2) {
+    // long-term -> unused for reference
+    struct decoded_picture* pic = dpb_get_picture_by_ltpn(dpb,
+        slc->dec_ref_pic_marking[marking_nr].long_term_pic_num);
+    if (pic != NULL) {
+      if (pic->nal->slc->field_pic_flag == 0)
+        dpb_set_unused_ref_picture(dpb,
+            slc->dec_ref_pic_marking[marking_nr].long_term_pic_num);
+      else {
+        dpb_set_unused_ref_picture(dpb,
+            slc->dec_ref_pic_marking[marking_nr].long_term_pic_num);
+        printf("FIXME: We might need do delete more from the DPB...\n");
+      }
+    }
+  } else if (memory_management_control_operation == 3) {
+    // short-term -> long-term, set long-term frame index
+    uint32_t pic_num_x = nal->curr_pic_num
+        - (slc->dec_ref_pic_marking[marking_nr].difference_of_pic_nums_minus1 + 1);
+    struct decoded_picture* pic = dpb_get_picture_by_ltidx(dpb,
+        slc->dec_ref_pic_marking[marking_nr].long_term_pic_num);
+    if (pic != NULL)
+      dpb_set_unused_ref_picture_bylidx(dpb,
+          slc->dec_ref_pic_marking[marking_nr].long_term_frame_idx);
+
+    pic = dpb_get_picture(dpb, pic_num_x);
+    if (pic) {
+      if (pic->nal->slc->field_pic_flag == 0) {
+        pic = dpb_get_picture(dpb, pic_num_x);
+        pic->nal->long_term_frame_idx
+            = slc->dec_ref_pic_marking[marking_nr].long_term_frame_idx;
+      }
+      else
+        printf("FIXME: B Set frame %d to long-term ref\n", pic_num_x);
+    }
+    else {
+      printf("memory_management_control_operation: 3 failed. No such picture.\n");
+    }
+
+  } else if (memory_management_control_operation == 4) {
+    // set max-long-term frame index,
+    // mark all long-term pictures with long-term frame idx
+    // greater max-long-term farme idx as unused for ref
+    if (slc->dec_ref_pic_marking[marking_nr].max_long_term_frame_idx_plus1 == 0)
+      dpb_set_unused_ref_picture_lidx_gt(dpb, 0);
+    else
+      dpb_set_unused_ref_picture_lidx_gt(dpb,
+          slc->dec_ref_pic_marking[marking_nr].max_long_term_frame_idx_plus1 - 1);
+  } else if (memory_management_control_operation == 5) {
+    // mark all ref pics as unused for reference,
+    // set max-long-term frame index = no long-term frame idxs
+    dpb_flush(dpb);
+    parser->pic_order_cnt_lsb = 0;
+    parser->pic_order_cnt_msb = 0;
+    parser->prev_pic_order_cnt_lsb = 0;
+    parser->prev_pic_order_cnt_msb = 0;
+  } else if (memory_management_control_operation == 6) {
+    // mark current picture as used for long-term ref,
+    // assing long-term frame idx to it
+    struct decoded_picture* pic = dpb_get_picture_by_ltidx(dpb,
+        slc->dec_ref_pic_marking[marking_nr].long_term_frame_idx);
+    if (pic != NULL)
+      dpb_set_unused_ref_picture_bylidx(dpb,
+          slc->dec_ref_pic_marking[marking_nr].long_term_frame_idx);
+
+    nal->long_term_frame_idx = slc->dec_ref_pic_marking[marking_nr].long_term_frame_idx;
+
+    if (slc->field_pic_flag == 0) {
+      nal->used_for_long_term_ref = 1;
+    }
+    else
+      printf("FIXME: BY Set frame to long-term ref\n");
+  }
+  /* FIXME: Do we need to care about MMC=0? */
+}
+
+void parse_dec_ref_pic_marking(struct buf_reader *buf,
+    struct nal_parser *parser)
+{
+  struct nal_unit *nal = parser->current_nal;
+  struct pic_parameter_set_rbsp *pps = parser->current_nal->pps;
+  struct slice_header *slc = nal->slc;
+
+  if (!slc || !pps)
+    return;
+
+  slc->dec_ref_pic_marking_count = 0;
+  int i = slc->dec_ref_pic_marking_count;
+
+  if (nal->nal_unit_type == NAL_SLICE_IDR) {
+    slc->dec_ref_pic_marking[i].no_output_of_prior_pics_flag = read_bits(buf, 1);
+    slc->dec_ref_pic_marking[i].long_term_reference_flag = read_bits(buf, 1);
+  } else {
+    slc->dec_ref_pic_marking[i].adaptive_ref_pic_marking_mode_flag = read_bits(
+        buf, 1);
+
+    if (slc->dec_ref_pic_marking[i].adaptive_ref_pic_marking_mode_flag) {
+      do {
+        slc->dec_ref_pic_marking[i].memory_management_control_operation
+            = read_exp_golomb(buf);
+
+        if (slc->dec_ref_pic_marking[i].memory_management_control_operation == 1
+            || slc->dec_ref_pic_marking[i].memory_management_control_operation
+                == 3)
+          slc->dec_ref_pic_marking[i].difference_of_pic_nums_minus1
+              = read_exp_golomb(buf);
+
+        if (slc->dec_ref_pic_marking[i].memory_management_control_operation == 2)
+          slc->dec_ref_pic_marking[i].long_term_pic_num = read_exp_golomb(buf);
+
+        if (slc->dec_ref_pic_marking[i].memory_management_control_operation == 3
+            || slc->dec_ref_pic_marking[i].memory_management_control_operation
+                == 6)
+          slc->dec_ref_pic_marking[i].long_term_frame_idx = read_exp_golomb(buf);
+
+        if (slc->dec_ref_pic_marking[i].memory_management_control_operation == 4)
+          slc->dec_ref_pic_marking[i].max_long_term_frame_idx_plus1
+              = read_exp_golomb(buf);
+
+        i++;
+        if(i >= 10) {
+          printf("Error: Not more than 10 MMC operations supported per slice. Dropping some.\n");
+          i = 0;
+        }
+      } while (slc->dec_ref_pic_marking[i-1].memory_management_control_operation
+          != 0);
+    }
+  }
+
+  slc->dec_ref_pic_marking_count = (i>0) ? (i-1) : 0;
+}
+
+/* ----------------- NAL parser ----------------- */
+
+struct nal_parser* init_parser()
+{
+  struct nal_parser *parser = calloc(1, sizeof(struct nal_parser));
+  parser->nal0 = init_nal_unit();
+  parser->nal1 = init_nal_unit();
+  parser->current_nal = parser->nal0;
+  parser->last_nal = parser->nal1;
+  parser->slice_cnt = 1;
+
+  parser->field = -1;
+
+  /* no idea why we do that. inspired by libavcodec,
+   * as we couldn't figure in the specs....
+   */
+  parser->prev_pic_order_cnt_msb = parser->pic_order_cnt_lsb = 1 << 16;
+
+  return parser;
+}
+
+void free_parser(struct nal_parser *parser)
+{
+  free_nal_unit(parser->nal0);
+  free_nal_unit(parser->nal1);
+  free(parser);
+}
+
+void parse_codec_private(struct nal_parser *parser, uint8_t *inbuf, int inbuf_len)
+{
+  struct buf_reader bufr;
+
+  bufr.buf = inbuf;
+  bufr.cur_pos = inbuf;
+  bufr.cur_offset = 8;
+  bufr.len = inbuf_len;
+
+  struct nal_unit *nal = parser->current_nal;
+  struct nal_unit *nal1 = parser->last_nal;
+
+  if (!nal->sps)
+    nal->sps = calloc(1, sizeof(struct seq_parameter_set_rbsp));
+  else
+    memset(nal->sps, 0x00, sizeof(struct seq_parameter_set_rbsp));
+
+  /* reserved */
+  read_bits(&bufr, 8);
+  nal->sps->profile_idc = read_bits(&bufr, 8);
+  read_bits(&bufr, 8);
+  nal->sps->level_idc = read_bits(&bufr, 8);
+  read_bits(&bufr, 6);
+
+  parser->nal_size_length = read_bits(&bufr, 2) + 1;
+  parser->nal_size_length_buf = calloc(1, parser->nal_size_length);
+  read_bits(&bufr, 3);
+  uint8_t sps_count = read_bits(&bufr, 5);
+
+  inbuf += 6;
+  inbuf_len -= 6;
+  int i;
+  for(i = 0; i < sps_count; i++) {
+    uint16_t sps_size = read_bits(&bufr, 16);
+    inbuf += 2;
+    inbuf_len -= 2;
+    parse_nal(inbuf, sps_size, parser);
+    inbuf += sps_size;
+    inbuf_len -= sps_size;
+  }
+
+  bufr.buf = inbuf;
+  bufr.cur_pos = inbuf;
+  bufr.cur_offset = 8;
+  bufr.len = inbuf_len;
+
+  uint8_t pps_count = read_bits(&bufr, 8);
+  inbuf += 1;
+  for(i = 0; i < pps_count; i++) {
+    uint16_t pps_size = read_bits(&bufr, 16);
+    inbuf += 2;
+    inbuf_len -= 2;
+    parse_nal(inbuf, pps_size, parser);
+    inbuf += pps_size;
+    inbuf_len -= pps_size;
+  }
+
+  copy_nal_unit(nal1, nal);
+  printf("done parsing extradata\n");
+}
+
+void process_mmc_operations(struct nal_parser *parser)
+{
+  if(parser->last_nal_res == 1 && parser->current_nal &&
+        parser->current_nal->slc) {
+    int i;
+    for(i = 0; i < parser->current_nal->slc->dec_ref_pic_marking_count; i++) {
+      decode_ref_pic_marking(
+          parser->current_nal,
+          parser->current_nal->slc->dec_ref_pic_marking[i].memory_management_control_operation,
+          i,
+          parser);
+    }
+
+    if (parser->last_nal->slc != NULL)
+      parser->prev_pic_order_cnt_lsb
+          = parser->last_nal->slc->pic_order_cnt_lsb;
+    parser->prev_pic_order_cnt_msb = parser->pic_order_cnt_msb;
+  }
+}
+
+int parse_frame(struct nal_parser *parser, uint8_t *inbuf, int inbuf_len,
+    uint8_t **ret_buf, uint32_t *ret_len, uint32_t *ret_slice_cnt)
+{
+  int32_t next_nal = 0;
+  int32_t offset = 0;
+  int start_seq_len = 3;
+
+  if(parser->nal_size_length > 0)
+    start_seq_len = offset = parser->nal_size_length;
+
+  if (parser->prebuf_len + inbuf_len > MAX_FRAME_SIZE) {
+    printf("buf underrun!!\n");
+    *ret_len = 0;
+    *ret_buf = NULL;
+    parser->prebuf_len = 0;
+    return inbuf_len;
+  }
+
+  /* copy the whole inbuf to the prebuf,
+   * then search for a nal-start sequence in the prebuf,
+   * if it's in there, parse the nal and append to parser->buf
+   * or return a frame */
+
+  xine_fast_memcpy(parser->prebuf + parser->prebuf_len, inbuf, inbuf_len);
+  parser->prebuf_len += inbuf_len;
+
+  while((next_nal = seek_for_nal(parser->prebuf+start_seq_len-offset, parser->prebuf_len-start_seq_len+offset, parser)) > 0) {
+
+    if(!parser->nal_size_length &&
+        (parser->prebuf[0] != 0x00 || parser->prebuf[1] != 0x00 || parser->prebuf[2] != 0x01)) {
+      printf("Broken NAL, skip it.\n");
+      parser->last_nal_res = 2;
+    } else
+      parser->last_nal_res = parse_nal(parser->prebuf+start_seq_len, next_nal, parser);
+
+    if ((parser->last_nal_res == 1 || parser->last_nal_res == 3) && parser->buf_len > 0) {
+
+      //printf("Frame complete: %d bytes\n", parser->buf_len);
+      *ret_len = parser->buf_len;
+      *ret_buf = malloc(*ret_len);
+      xine_fast_memcpy(*ret_buf, parser->buf, parser->buf_len);
+      *ret_slice_cnt = parser->slice_cnt;
+
+      parser->slice_cnt = 1;
+      parser->buf_len = 0;
+
+      /* this is a SLICE, keep it in the buffer */
+
+      if(parser->last_nal_res != 3) {
+        if(parser->nal_size_length > 0) {
+          static const uint8_t start_seq[3] = { 0x00, 0x00, 0x01 };
+          xine_fast_memcpy(parser->buf, start_seq, 3);
+          parser->buf_len += 3;
+        }
+
+        xine_fast_memcpy(parser->buf+parser->buf_len, parser->prebuf+offset, next_nal+start_seq_len-2*offset);
+        parser->buf_len += next_nal+start_seq_len-2*offset;
+      }
+
+      memmove(parser->prebuf, parser->prebuf+(next_nal+start_seq_len-offset), parser->prebuf_len-(next_nal+start_seq_len-offset));
+      parser->prebuf_len -= next_nal+start_seq_len-offset;
+
+      return inbuf_len;
+    }
+
+    /* got a new nal, which is part of the current
+     * coded picture. add it to buf
+     */
+    if (parser->last_nal_res < 2) {
+      if (parser->buf_len + next_nal+start_seq_len-offset > MAX_FRAME_SIZE) {
+        printf("buf underrun 1!!\n");
+        parser->buf_len = 0;
+        *ret_len = 0;
+        *ret_buf = NULL;
+        return inbuf_len;
+      }
+
+      if(parser->nal_size_length > 0) {
+        static const uint8_t start_seq[3] = { 0x00, 0x00, 0x01 };
+        xine_fast_memcpy(parser->buf+parser->buf_len, start_seq, 3);
+        parser->buf_len += 3;
+      }
+
+      xine_fast_memcpy(parser->buf+parser->buf_len, parser->prebuf+offset, next_nal+start_seq_len-2*offset);
+      parser->buf_len += next_nal+start_seq_len-2*offset;
+
+      memmove(parser->prebuf, parser->prebuf+(next_nal+start_seq_len-offset), parser->prebuf_len-(next_nal+start_seq_len-offset));
+      parser->prebuf_len -= next_nal+start_seq_len-offset;
+    } else {
+      /* got a non-relevant nal, just remove it */
+      memmove(parser->prebuf, parser->prebuf+(next_nal+start_seq_len-offset), parser->prebuf_len-(next_nal+start_seq_len-offset));
+      parser->prebuf_len -= next_nal+start_seq_len-offset;
+    }
+  }
+
+  *ret_buf = NULL;
+  *ret_len = 0;
+  return inbuf_len;
+}
+
+
+/**
+ * @return 0: NAL is part of coded picture
+ *         2: NAL is not part of coded picture
+ *         1: NAL is the beginning of a new coded picture
+ *         3: NAL is marked as END_OF_SEQUENCE
+ */
+int parse_nal(uint8_t *buf, int buf_len, struct nal_parser *parser)
+{
+  struct buf_reader bufr;
+
+  bufr.buf = buf;
+  bufr.cur_pos = buf;
+  bufr.cur_offset = 8;
+  bufr.len = buf_len;
+
+  struct nal_unit *nal = parser->current_nal;
+  struct nal_unit *last_nal = parser->last_nal;
+
+  int res = parse_nal_header(&bufr, parser);
+  if (res == NAL_SLICE_IDR) {
+    parser->is_idr = 1;
+  }
+
+  calculate_pic_order(parser);
+
+  if (res >= NAL_SLICE && res <= NAL_SLICE_IDR) {
+    // now detect if it's a new frame!
+    int ret = 0;
+    uint8_t reason = 0;
+    if (nal->slc->field_pic_flag == 1)
+      parser->field = nal->slc->bottom_field_flag;
+    else {
+      parser->have_top = 1;
+      parser->field = -1;
+    }
+
+    if (nal->slc->field_pic_flag == 1 && nal->slc->bottom_field_flag == 0)
+      parser->have_top = 1;
+
+    parser->slice = 1;
+
+    if (nal->slc == NULL || last_nal->slc == NULL) {
+      ret = 1;
+      reason++;
+    }
+    if (nal->slc && last_nal->slc && (nal->slc->frame_num
+        != last_nal->slc->frame_num)) {
+      ret = 1;
+      reason++;
+    }
+    if (nal->slc && last_nal->slc && (nal->slc->pic_parameter_set_id
+        != last_nal->slc->pic_parameter_set_id)) {
+      ret = 1;
+      reason++;
+    }
+    if (nal->slc && last_nal->slc && (nal->slc->field_pic_flag
+        != last_nal->slc->field_pic_flag)) {
+      ret = 1;
+      reason++;
+    }
+    if (nal->slc && last_nal->slc && nal->slc->bottom_field_flag
+        != last_nal->slc->bottom_field_flag) {
+      ret = 1;
+      reason++;
+    }
+    if (nal->nal_ref_idc != last_nal->nal_ref_idc && (nal->nal_ref_idc == 0
+        || last_nal->nal_ref_idc == 0)) {
+      ret = 1;
+      reason++;
+    }
+    if (nal->sps && nal->slc && last_nal->slc && (nal->sps->pic_order_cnt_type
+        == 0 && last_nal->sps->pic_order_cnt_type == 0
+        && (nal->slc->pic_order_cnt_lsb != last_nal->slc->pic_order_cnt_lsb
+            || nal->slc->delta_pic_order_cnt_bottom
+                != last_nal->slc->delta_pic_order_cnt_bottom))) {
+      ret = 1;
+      reason++;
+      /*printf("C: Reason: %d, %d, %d\n", res, nal->slc->pic_order_cnt_lsb,
+          last_nal->slc->pic_order_cnt_lsb);*/
+    }
+    if (nal->slc && last_nal->slc && (nal->sps->pic_order_cnt_type == 1
+        && last_nal->sps->pic_order_cnt_type == 1
+        && (nal->slc->delta_pic_order_cnt[0]
+            != last_nal->slc->delta_pic_order_cnt[0]
+            || nal->slc->delta_pic_order_cnt[1]
+                != last_nal->slc->delta_pic_order_cnt[1]))) {
+      ret = 1;
+      reason++;
+    }
+    if (nal->nal_unit_type != last_nal->nal_unit_type && (nal->nal_unit_type
+        == 5 || last_nal->nal_unit_type == 5)) {
+      ret = 1;
+      reason++;
+    }
+    if (nal->slc && last_nal->slc && (nal->nal_unit_type == 5
+        && last_nal->nal_unit_type == 5 && nal->slc->idr_pic_id
+        != last_nal->slc->idr_pic_id)) {
+      ret = 1;
+      reason++;
+    }
+
+    if (parser->current_nal == parser->nal0) {
+      parser->current_nal = parser->nal1;
+      parser->last_nal = parser->nal0;
+    }
+    else {
+      parser->current_nal = parser->nal0;
+      parser->last_nal = parser->nal1;
+    }
+
+    if(!parser->current_nal->sps && parser->last_nal->sps) {
+      parser->current_nal->sps = malloc(sizeof(struct seq_parameter_set_rbsp));
+      xine_fast_memcpy(parser->current_nal->sps, parser->last_nal->sps, sizeof(struct seq_parameter_set_rbsp));
+    }
+
+    if(!parser->current_nal->pps && parser->last_nal->pps) {
+      parser->current_nal->pps = malloc(sizeof(struct pic_parameter_set_rbsp));
+      xine_fast_memcpy(parser->current_nal->pps, parser->last_nal->pps, sizeof(struct pic_parameter_set_rbsp));
+    }
+
+    /* increase the slice_cnt until a new frame is detected */
+    if (!ret)
+      parser->slice_cnt++;
+
+    return ret;
+  } else if (res == NAL_PPS || res == NAL_SPS) {
+    return 2;
+  } else if (res == NAL_END_OF_SEQUENCE) {
+    if (parser->current_nal == parser->nal0) {
+      parser->current_nal = parser->nal1;
+      parser->last_nal = parser->nal0;
+    }
+    else {
+      parser->current_nal = parser->nal0;
+      parser->last_nal = parser->nal1;
+    }
+
+    return 3;
+  } else if (res >= NAL_SEI) {
+    return 2;
+  }
+
+  return 0;
+}
+
+int seek_for_nal(uint8_t *buf, int buf_len, struct nal_parser *parser)
+{
+  if(buf_len <= 0)
+    return -1;
+
+  if(parser->nal_size_length > 0) {
+    if(buf_len < parser->nal_size_length) {
+      return -1;
+    }
+
+    uint32_t next_nal = parser->next_nal_position;
+    if(!next_nal) {
+      struct buf_reader bufr;
+
+      bufr.buf = buf;
+      bufr.cur_pos = buf;
+      bufr.cur_offset = 8;
+      bufr.len = buf_len;
+
+      next_nal = read_bits(&bufr, parser->nal_size_length*8)+parser->nal_size_length;
+    }
+
+    if(next_nal > buf_len) {
+      parser->next_nal_position = next_nal;
+      return -1;
+    } else
+      parser->next_nal_position = 0;
+
+    return next_nal;
+  }
+
+  /* NAL_END_OF_SEQUENCE has only 1 byte, so
+   * we do not need to search for the next start sequence */
+  if(buf[0] == NAL_END_OF_SEQUENCE)
+    return 1;
+
+  int i;
+  for (i = 0; i < buf_len - 2; i++) {
+    if (buf[i] == 0x00 && buf[i + 1] == 0x00 && buf[i + 2] == 0x01) {
+      //printf("found nal at: %d\n", i);
+      return i;
+    }
+  }
+
+  return -1;
+}
diff --git a/src/libvdpau/h264_parser.h b/src/libvdpau/h264_parser.h
new file mode 100644
index 000000000..bf3d34100
--- /dev/null
+++ b/src/libvdpau/h264_parser.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (C) 2008 Julian Scheel
+ *
+ * This file is part of xine, a free video player.
+ *
+ * xine is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * xine is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
+ *
+ * h264_parser.h: Almost full-features H264 NAL-Parser
+ */
+
+#ifndef NAL_PARSER_H_
+#define NAL_PARSER_H_
+
+#include <stdlib.h>
+
+#include "xine_internal.h"
+#include "nal.h"
+#include "dpb.h"
+
+#define MAX_FRAME_SIZE  1024*1024
+
+struct nal_parser {
+    uint8_t buf[MAX_FRAME_SIZE];
+    uint32_t buf_len;
+
+    /* prebuf is used to store the currently
+     * processed nal unit */
+    uint8_t prebuf[MAX_FRAME_SIZE];
+    uint32_t prebuf_len;
+    uint32_t next_nal_position;
+    uint8_t incomplete_nal;
+
+    uint8_t found_sps;
+    uint8_t found_pps;
+    uint8_t last_nal_res;
+
+    uint8_t is_idr;
+
+    int field; /* 0=top, 1=bottom, -1=both */
+    int slice;
+    int slice_cnt;
+
+    uint8_t have_top;
+    uint8_t have_frame;
+
+    uint8_t nal_size_length;
+    uint32_t next_nal_size;
+    uint8_t *nal_size_length_buf;
+    uint8_t have_nal_size_length_buf;
+
+    struct nal_unit *nal0;
+    struct nal_unit *nal1;
+    struct nal_unit *current_nal;
+    struct nal_unit *last_nal;
+
+    uint8_t cpb_dpb_delays_present_flag;
+
+    uint32_t pic_order_cnt_lsb;
+    uint32_t pic_order_cnt_msb;
+    uint32_t prev_pic_order_cnt_lsb;
+    uint32_t prev_pic_order_cnt_msb;
+    uint32_t frame_num_offset;
+
+    /* this is dpb used for reference frame
+     * heading to vdpau + unordered frames
+     */
+    struct dpb dpb;
+};
+
+int parse_nal(uint8_t *buf, int buf_len, struct nal_parser *parser);
+
+int seek_for_nal(uint8_t *buf, int buf_len, struct nal_parser *parser);
+
+struct nal_parser* init_parser();
+void free_parser(struct nal_parser *parser);
+int parse_frame(struct nal_parser *parser, uint8_t *inbuf, int inbuf_len,
+                uint8_t **ret_buf, uint32_t *ret_len, uint32_t *ret_slice_cnt);
+
+/* this has to be called after decoding the frame delivered by parse_frame,
+ * but before adding a decoded frame to the dpb.
+ */
+void process_mmc_operations(struct nal_parser *parser);
+
+void parse_codec_private(struct nal_parser *parser, uint8_t *inbuf, int inbuf_len);
+
+#endif
diff --git a/src/libvdpau/nal.c b/src/libvdpau/nal.c
new file mode 100644
index 000000000..7e0cbdf7f
--- /dev/null
+++ b/src/libvdpau/nal.c
@@ -0,0 +1,75 @@
+/*
+ * Copyright (C) 2008 Julian Scheel
+ *
+ * This file is part of xine, a free video player.
+ *
+ * xine is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * xine is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
+ *
+ * nal.c: nal-structure utility functions
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "nal.h"
+#include "xine_internal.h"
+
+struct nal_unit* init_nal_unit()
+{
+  struct nal_unit *nal = calloc(1, sizeof(struct nal_unit));
+
+  /*nal->sps = calloc(1, sizeof(struct seq_parameter_set_rbsp));
+  nal->pps = calloc(1, sizeof(struct pic_parameter_set_rbsp));
+  nal->slc = calloc(1, sizeof(struct slice_header));*/
+
+  return nal;
+}
+
+void free_nal_unit(struct nal_unit *nal)
+{
+  if(!nal)
+    return;
+
+  free(nal->sps);
+  free(nal->pps);
+  free(nal->slc);
+  free(nal);
+}
+
+void copy_nal_unit(struct nal_unit *dest, struct nal_unit *src)
+{
+  /* size without pps, sps and slc units: */
+  int size = sizeof(struct nal_unit) - sizeof(struct seq_parameter_set_rbsp*)
+      - sizeof(struct pic_parameter_set_rbsp*) - sizeof(struct slice_header*);
+
+  xine_fast_memcpy(dest, src, size);
+
+  if(!dest->sps)
+    dest->sps = calloc(1, sizeof(struct seq_parameter_set_rbsp));
+
+  if(!dest->pps)
+    dest->pps = calloc(1, sizeof(struct pic_parameter_set_rbsp));
+
+  if(!dest->slc)
+    dest->slc = calloc(1, sizeof(struct slice_header));
+
+  if(src->sps)
+    xine_fast_memcpy(dest->sps, src->sps, sizeof(struct seq_parameter_set_rbsp));
+  if(src->pps)
+    xine_fast_memcpy(dest->pps, src->pps, sizeof(struct pic_parameter_set_rbsp));
+  if(src->slc)
+    xine_fast_memcpy(dest->slc, src->slc, sizeof(struct slice_header));
+}
diff --git a/src/libvdpau/nal.h b/src/libvdpau/nal.h
new file mode 100644
index 000000000..4e039b8b8
--- /dev/null
+++ b/src/libvdpau/nal.h
@@ -0,0 +1,466 @@
+/*
+ * Copyright (C) 2008 Julian Scheel
+ *
+ * This file is part of xine, a free video player.
+ *
+ * xine is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * xine is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
+ *
+ * nal.h: H264 NAL structures
+ */
+
+#ifndef NAL_H_
+#define NAL_H_
+#include <stdint.h>
+#include <vdpau/vdpau.h>
+
+enum nal_unit_types
+{
+  NAL_UNSPECIFIED = 0,
+  NAL_SLICE,
+  NAL_PART_A,
+  NAL_PART_B,
+  NAL_PART_C,
+  NAL_SLICE_IDR,
+  NAL_SEI,
+  NAL_SPS,
+  NAL_PPS,
+  NAL_AU_DELIMITER,
+  NAL_END_OF_SEQUENCE,
+  NAL_END_OF_STREAM,
+  NAL_FILLER_DATA,
+  NAL_SPS_EXT
+};
+
+enum pic_struct {
+  DISP_FRAME = 0,
+  DISP_TOP,
+  DISP_BOTTOM,
+  DISP_TOP_BOTTOM,
+  DISP_BOTTOM_TOP,
+  DISP_TOP_BOTTOM_TOP,
+  DISP_TOP_TOP_BOTTOM,
+  DISP_BOTTOM_TOP_BOTTOM,
+  DISP_FRAME_DOUBLING,
+  DISP_FRAME_TRIPLING
+};
+
+/* slice types repeat from 5-9, we
+ * need a helper function for comparison
+ */
+enum slice_types
+{
+  SLICE_P = 0, SLICE_B, SLICE_I, SLICE_SP, SLICE_SI
+};
+
+enum aspect_ratio
+{
+  ASPECT_UNSPECIFIED = 0,
+  ASPECT_1_1,
+  ASPECT_12_11,
+  ASPECT_10_11,
+  ASPECT_16_11,
+  ASPECT_40_33,
+  ASPECT_24_11,
+  ASPECT_20_11,
+  ASPECT_32_11,
+  ASPECT_80_33,
+  ASPECT_18_11,
+  ASPECT_15_11,
+  ASPECT_64_33,
+  ASPECT_160_99,
+  ASPECT_4_3,
+  ASPECT_3_2,
+  ASPECT_2_1,
+  ASPECT_RESERVED,
+  ASPECT_EXTENDED_SAR=255
+};
+
+static const uint8_t zigzag_4x4[16] = {
+  0+0*4, 1+0*4, 0+1*4, 0+2*4,
+  1+1*4, 2+0*4, 3+0*4, 2+1*4,
+  1+2*4, 0+3*4, 1+3*4, 2+2*4,
+  3+1*4, 3+2*4, 2+3*4, 3+3*4,
+};
+
+static const uint8_t zigzag_8x8[64] = {
+  0+0*8, 1+0*8, 0+1*8, 0+2*8,
+  1+1*8, 2+0*8, 3+0*8, 2+1*8,
+  1+2*8, 0+3*8, 0+4*8, 1+3*8,
+  2+2*8, 3+1*8, 4+0*8, 5+0*8,
+  4+1*8, 3+2*8, 2+3*8, 1+4*8,
+  0+5*8, 0+6*8, 1+5*8, 2+4*8,
+  3+3*8, 4+2*8, 5+1*8, 6+0*8,
+  7+0*8, 6+1*8, 5+2*8, 4+3*8,
+  3+4*8, 2+5*8, 1+6*8, 0+7*8,
+  1+7*8, 2+6*8, 3+5*8, 4+4*8,
+  5+3*8, 6+2*8, 7+1*8, 7+2*8,
+  6+3*8, 5+4*8, 4+5*8, 3+6*8,
+  2+7*8, 3+7*8, 4+6*8, 5+5*8,
+  6+4*8, 7+3*8, 7+4*8, 6+5*8,
+  5+6*8, 4+7*8, 5+7*8, 6+6*8,
+  7+5*8, 7+6*8, 6+7*8, 7+7*8,
+};
+
+static inline uint32_t slice_type(uint32_t slice_type)
+{
+  return (slice_type < 10 ? slice_type % 5 : slice_type);
+}
+
+static inline void print_slice_type(uint32_t slice_type)
+{
+  switch(slice_type) {
+    case SLICE_P:
+      printf("SLICE_P\n");
+      break;
+    case SLICE_B:
+      printf("SLICE_B\n");
+      break;
+    case SLICE_I:
+      printf("SLICE_I\n");
+      break;
+    case SLICE_SP:
+      printf("SLICE_SP\n");
+      break;
+    case SLICE_SI:
+      printf("SLICE_SI\n");
+      break;
+    default:
+      printf("Unknown SLICE\n");
+  }
+}
+
+struct hrd_parameters
+{
+  uint32_t cpb_cnt_minus1;
+  uint8_t bit_rate_scale;
+  uint8_t cpb_size_scale;
+
+  uint32_t bit_rate_value_minus1[32];
+  uint32_t cpb_size_value_minus1[32];
+  uint8_t cbr_flag[32];
+
+  uint8_t initial_cpb_removal_delay_length_minus1;
+  uint8_t cpb_removal_delay_length_minus1;
+  uint8_t dpb_output_delay_length_minus1;
+  uint8_t time_offset_length;
+};
+
+struct seq_parameter_set_rbsp
+{
+  uint8_t profile_idc; // 0xff
+  uint8_t constraint_setN_flag; // 0x0f
+  uint8_t level_idc; // 0xff
+  uint32_t seq_parameter_set_id;
+  uint32_t chroma_format_idc;
+  uint8_t separate_colour_plane_flag; // 0x01
+  uint32_t bit_depth_luma_minus8;
+  uint32_t bit_depth_chroma_minus8;
+  uint8_t qpprime_y_zero_transform_bypass_flag;
+  uint8_t seq_scaling_matrix_present_flag;
+
+  /* if(seq_scaling_matrix_present_flag) */
+  uint8_t seq_scaling_list_present_flag[8];
+
+  uint8_t scaling_lists_4x4[6][16];
+  uint8_t scaling_lists_8x8[2][64];
+  /* endif */
+
+  uint32_t log2_max_frame_num_minus4;
+  uint32_t pic_order_cnt_type;
+  // if pic_order_cnt_type==0
+  uint32_t log2_max_pic_order_cnt_lsb_minus4;
+  // else
+  uint8_t delta_pic_order_always_zero_flag;
+  int32_t offset_for_non_ref_pic;
+  int32_t offset_for_top_to_bottom_field;
+  uint8_t num_ref_frames_in_pic_order_cnt_cycle;
+  int32_t offset_for_ref_frame[256];
+  // TODO: some more ignored here
+  uint32_t num_ref_frames;
+  uint8_t gaps_in_frame_num_value_allowed_flag;
+  /*uint32_t    pic_width_in_mbs_minus1;
+   uint32_t    pic_height_in_map_units_minus1;*/
+  uint32_t pic_width;
+  uint32_t pic_height;
+  uint8_t frame_mbs_only_flag;
+  uint8_t mb_adaptive_frame_field_flag;
+  uint8_t direct_8x8_inference_flag;
+  uint8_t frame_cropping_flag;
+  uint32_t frame_crop_left_offset;
+  uint32_t frame_crop_right_offset;
+  uint32_t frame_crop_top_offset;
+  uint32_t frame_crop_bottom_offset;
+  uint8_t vui_parameters_present_flag;
+
+  /* vui_parameters */
+  struct
+  {
+    uint8_t aspect_ration_info_present_flag;
+
+    /* aspect_ration_info_present_flag == 1 */
+    uint8_t aspect_ratio_idc;
+    uint16_t sar_width;
+    uint16_t sar_height;
+
+    uint8_t overscan_info_present_flag;
+    /* overscan_info_present_flag == 1 */
+    uint8_t overscan_appropriate_flag;
+
+    uint8_t video_signal_type_present_flag;
+    /* video_signal_type_present_flag == 1 */
+    uint8_t video_format;
+    uint8_t video_full_range_flag;
+    uint8_t colour_description_present;
+    /* colour_description_present == 1 */
+    uint8_t colour_primaries;
+    uint8_t transfer_characteristics;
+    uint8_t matrix_coefficients;
+
+    uint8_t chroma_loc_info_present_flag;
+    /* chroma_loc_info_present_flag == 1 */
+    uint8_t chroma_sample_loc_type_top_field;
+    uint8_t chroma_sample_loc_type_bottom_field;
+
+    uint8_t timing_info_present_flag;
+    /* timing_info_present_flag == 1 */
+    uint32_t num_units_in_tick;
+    uint32_t time_scale;
+    uint8_t fixed_frame_rate_flag;
+
+    uint8_t nal_hrd_parameters_present_flag;
+    struct hrd_parameters nal_hrd_parameters;
+
+    uint8_t vc1_hrd_parameters_present_flag;
+    struct hrd_parameters vc1_hrd_parameters;
+
+    uint8_t low_delay_hrd_flag;
+
+    uint8_t pic_struct_present_flag;
+    uint8_t bitstream_restriction_flag;
+
+    /* bitstream_restriction_flag == 1 */
+    uint8_t motion_vectors_over_pic_boundaries;
+    uint32_t max_bytes_per_pic_denom;
+    uint32_t max_bits_per_mb_denom;
+    uint32_t log2_max_mv_length_horizontal;
+    uint32_t log2_max_mv_length_vertical;
+    uint32_t num_reorder_frames;
+    uint32_t max_dec_frame_buffering;
+  } vui_parameters;
+
+};
+
+struct pic_parameter_set_rbsp
+{
+  uint32_t pic_parameter_set_id;
+  uint32_t seq_parameter_set_id;
+  uint8_t entropy_coding_mode_flag;
+  uint8_t pic_order_present_flag;
+
+  uint32_t num_slice_groups_minus1;
+
+  /* num_slice_groups_minus1 > 0 */
+  uint32_t slice_group_map_type;
+
+  /* slice_group_map_type == 1 */
+  uint32_t run_length_minus1[64];
+
+  /* slice_group_map_type == 2 */
+  uint32_t top_left[64];
+  uint32_t bottom_right[64];
+
+  /* slice_group_map_type == 3,4,5 */
+  uint8_t slice_group_change_direction_flag;
+  uint32_t slice_group_change_rate_minus1;
+
+  /* slice_group_map_type == 6 */
+  uint32_t pic_size_in_map_units_minus1;
+  uint8_t slice_group_id[64];
+
+  uint32_t num_ref_idx_l0_active_minus1;
+  uint32_t num_ref_idx_l1_active_minus1;
+  uint8_t weighted_pred_flag;
+  uint8_t weighted_bipred_idc;
+  int32_t pic_init_qp_minus26;
+  int32_t pic_init_qs_minus26;
+  int32_t chroma_qp_index_offset;
+  uint8_t deblocking_filter_control_present_flag;
+  uint8_t constrained_intra_pred_flag;
+  uint8_t redundant_pic_cnt_present_flag;
+
+  /* if(more_rbsp_data) */
+  uint8_t transform_8x8_mode_flag;
+  uint8_t pic_scaling_matrix_present_flag;
+
+  /* if(pic_scaling_matrix_present_flag) */
+  uint8_t pic_scaling_list_present_flag[8];
+
+  uint8_t scaling_lists_4x4[6][16];
+  uint8_t scaling_lists_8x8[2][64];
+
+  int32_t second_chroma_qp_index_offset;
+};
+
+/*struct clock_timestamp {
+  uint8_t ct_type;
+  uint8_t nuit_fiel_based_flag;
+  uint8_t counting_type;
+  uint8_t full_timestamp_flag;
+  uint8_t discontinuity_flag;
+  uint8_t cnt_dropped_flag;
+  uint8_t n_frames
+};*/
+
+/* sei contains several additional info, we do
+ * only care for pic_timing, to handle display
+ * reordering
+ */
+struct sei_message
+{
+  uint32_t payload_type;
+  uint8_t last_payload_type_byte;
+  uint32_t payload_size;
+  uint8_t last_payload_size_byte;
+
+  struct
+  {
+    /* cpb_dpb_delays_present_flag == 1 */
+    uint8_t cpb_removal_delay;
+    uint8_t dpb_output_delay;
+
+    uint8_t pic_struct;
+    //uint8_t clock_timestamp_flag[3];
+  } pic_timing;
+};
+
+struct slice_header
+{
+  uint32_t first_mb_in_slice;
+  uint32_t slice_type;
+  uint32_t pic_parameter_set_id;
+  uint8_t colour_plane_id;
+  uint32_t frame_num;
+  uint8_t field_pic_flag;
+  uint8_t bottom_field_flag;
+  uint32_t idr_pic_id;
+
+  /* sps->pic_order_cnt_type == 0 */
+  uint32_t pic_order_cnt_lsb;
+  int32_t delta_pic_order_cnt_bottom;
+  /* sps->pic_order_cnt_type == 1 && !sps->delta_pic_order_always_zero_flag */
+  int32_t delta_pic_order_cnt[2];
+
+  /* pps->redundant_pic_cnt_present_flag == 1 */
+  int32_t redundant_pic_cnt;
+
+  /* slice_type == B */
+  uint8_t direct_spatial_mv_pred_flag;
+
+  /* slice_type == P, SP, B */
+  uint8_t num_ref_idx_active_override_flag;
+  /* num_ref_idx_active_override_flag == 1 */
+  uint32_t num_ref_idx_l0_active_minus1;
+  /* slice type == B */
+  uint32_t num_ref_idx_l1_active_minus1;
+
+  /* ref_pic_list_reordering */
+  struct
+  {
+    /* slice_type != I && slice_type != SI */
+    uint8_t ref_pic_list_reordering_flag_l0;
+
+    /* slice_type == B */
+    uint8_t ref_pic_list_reordering_flag_l1;
+
+    /* ref_pic_list_reordering_flag_l0 == 1 */
+    uint32_t reordering_of_pic_nums_idc;
+
+    /* reordering_of_pic_nums_idc == 0, 1 */
+    uint32_t abs_diff_pic_num_minus1;
+
+    /* reordering_of_pic_nums_idc == 2) */
+    uint32_t long_term_pic_num;
+  } ref_pic_list_reordering;
+
+  /* pred_weight_table */
+  struct
+  {
+    uint32_t luma_log2_weight_denom;
+
+    /* chroma_format_idc != 0 */
+    uint32_t chroma_log2_weight_denom;
+
+    int32_t luma_weight_l0[32];
+    int32_t luma_offset_l0[32];
+
+    int32_t chroma_weight_l0[32][2];
+    int32_t chroma_offset_l0[32][2];
+
+    int32_t luma_weight_l1[32];
+    int32_t luma_offset_l1[32];
+
+    int32_t chroma_weight_l1[32][2];
+    int32_t chroma_offset_l1[32][2];
+  } pred_weight_table;
+
+  /* def_rec_pic_marking */
+  struct
+  {
+
+    /* nal_unit_type == NAL_SLICE_IDR */
+    uint8_t no_output_of_prior_pics_flag;
+    uint8_t long_term_reference_flag;
+
+    /* else */
+    uint8_t adaptive_ref_pic_marking_mode_flag;
+    uint32_t memory_management_control_operation;
+
+    uint32_t difference_of_pic_nums_minus1;
+    uint32_t long_term_pic_num;
+    uint32_t long_term_frame_idx;
+    uint32_t max_long_term_frame_idx_plus1;
+  } dec_ref_pic_marking[10];
+  uint32_t dec_ref_pic_marking_count;
+};
+
+struct nal_unit
+{
+  uint8_t nal_ref_idc; // 0x03
+  uint8_t nal_unit_type; // 0x1f
+
+  uint32_t max_pic_num;
+  uint32_t curr_pic_num;
+  uint8_t used_for_long_term_ref;
+  uint32_t long_term_pic_num;
+  uint32_t long_term_frame_idx;
+
+  int32_t top_field_order_cnt;
+  int32_t bottom_field_order_cnt;
+
+  uint8_t interlaced;
+  uint8_t repeat_pic;
+
+  struct sei_message sei;
+
+  struct seq_parameter_set_rbsp *sps;
+  struct pic_parameter_set_rbsp *pps;
+  struct slice_header *slc;
+};
+
+struct nal_unit* init_nal_unit();
+void free_nal_unit(struct nal_unit *nal);
+void copy_nal_unit(struct nal_unit *dest, struct nal_unit *src);
+
+#endif /* NAL_H_ */
diff --git a/src/libvdpau/vdpau_h264.c b/src/libvdpau/vdpau_h264.c
new file mode 100644
index 000000000..40ae04059
--- /dev/null
+++ b/src/libvdpau/vdpau_h264.c
@@ -0,0 +1,892 @@
+/*
+ * Copyright (C) 2008 Julian Scheel
+ *
+ * This file is part of xine, a free video player.
+ *
+ * xine is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * xine is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
+ *
+ * vdpau_h264.c: H264 Video Decoder utilizing nvidia VDPAU engine
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <vdpau/vdpau.h>
+
+#include "xine_internal.h"
+#include "video_out.h"
+#include "buffer.h"
+#include "xineutils.h"
+#include "bswap.h"
+#include "accel_vdpau.h"
+#include "h264_parser.h"
+#include "dpb.h"
+
+#define VIDEOBUFSIZE 128*1024
+
+typedef struct {
+  video_decoder_class_t   decoder_class;
+} vdpau_h264_class_t;
+
+typedef struct vdpau_h264_decoder_s {
+  video_decoder_t   video_decoder;  /* parent video decoder structure */
+
+  vdpau_h264_class_t *class;
+  xine_stream_t    *stream;
+
+  /* these are traditional variables in a video decoder object */
+  uint64_t          video_step;  /* frame duration in pts units */
+
+  int               width;       /* the width of a video frame */
+  int               height;      /* the height of a video frame */
+  double            ratio;       /* the width to height ratio */
+
+
+  struct nal_parser *nal_parser;  /* h264 nal parser. extracts stream data for vdpau */
+  uint8_t           wait_for_bottom_field;
+  struct decoded_picture *last_ref_pic;
+  uint32_t          last_top_field_order_cnt;
+
+  int               have_frame_boundary_marks;
+  int               wait_for_frame_start;
+
+  VdpDecoder        decoder;
+  int               decoder_started;
+
+  VdpColorStandard  color_standard;
+  VdpDecoderProfile profile;
+  vdpau_accel_t     *vdpau_accel;
+
+  xine_t            *xine;
+
+  int64_t           curr_pts;
+  int64_t           next_pts;
+
+  vo_frame_t        *last_img;
+  vo_frame_t        *dangling_img;
+
+  uint8_t           *codec_private;
+  uint32_t          codec_private_len;
+
+  int               vdp_runtime_nr;
+
+} vdpau_h264_decoder_t;
+
+static void vdpau_h264_reset (video_decoder_t *this_gen);
+static void vdpau_h264_flush (video_decoder_t *this_gen);
+
+/**************************************************************************
+ * vdpau_h264 specific decode functions
+ *************************************************************************/
+
+/**************************************************************************
+ * xine video plugin functions
+ *************************************************************************/
+
+
+static inline void dump_pictureinfo_h264(VdpPictureInfoH264 *pic)
+{
+  printf("C: slice_count: %d\n", pic->slice_count);
+  printf("C: field_order_cnt[0]: %d\n", pic->field_order_cnt[0]);
+  printf("C: field_order_cnt[1]: %d\n", pic->field_order_cnt[1]);
+  printf("C: is_reference: %d\n", pic->is_reference);
+  printf("C: frame_num: %d\n", pic->frame_num);
+  printf("C: field_pic_flag: %d\n", pic->field_pic_flag);
+  printf("C: bottom_field_flag: %d\n", pic->bottom_field_flag);
+  printf("C: num_ref_frames: %d\n", pic->num_ref_frames);
+  printf("C: mb_adaptive_frame_field_flag: %d\n", pic->mb_adaptive_frame_field_flag);
+  printf("C: constrained_intra_pred_flag: %d\n", pic->constrained_intra_pred_flag);
+  printf("C: weighted_pred_flag: %d\n", pic->weighted_pred_flag);
+  printf("C: weighted_bipred_idc: %d\n", pic->weighted_bipred_idc);
+  printf("C: frame_mbs_only_flag: %d\n", pic->frame_mbs_only_flag);
+  printf("C: transform_8x8_mode_flag: %d\n", pic->transform_8x8_mode_flag);
+  printf("C: chroma_qp_index_offset: %d\n", pic->chroma_qp_index_offset);
+  printf("C: second_chroma_qp_index_offset: %d\n", pic->second_chroma_qp_index_offset);
+  printf("C: pic_init_qp_minus26: %d\n", pic->pic_init_qp_minus26);
+  printf("C: num_ref_idx_l0_active_minus1: %d\n", pic->num_ref_idx_l0_active_minus1);
+  printf("C: num_ref_idx_l1_active_minus1: %d\n", pic->num_ref_idx_l1_active_minus1);
+  printf("C: log2_max_frame_num_minus4: %d\n", pic->log2_max_frame_num_minus4);
+  printf("C: pic_order_cnt_type: %d\n", pic->pic_order_cnt_type);
+  printf("C: log2_max_pic_order_cnt_lsb_minus4: %d\n", pic->log2_max_pic_order_cnt_lsb_minus4);
+  printf("C: delta_pic_order_always_zero_flag: %d\n", pic->delta_pic_order_always_zero_flag);
+  printf("C: direct_8x8_inference_flag: %d\n", pic->direct_8x8_inference_flag);
+  printf("C: entropy_coding_mode_flag: %d\n", pic->entropy_coding_mode_flag);
+  printf("C: pic_order_present_flag: %d\n", pic->pic_order_present_flag);
+  printf("C: deblocking_filter_control_present_flag: %d\n", pic->deblocking_filter_control_present_flag);
+  printf("C: redundant_pic_cnt_present_flag: %d\n", pic->redundant_pic_cnt_present_flag);
+
+  int i, j;
+  for(i = 0; i < 6; i++) {
+    printf("C: scalint_list4x4[%d]:\nC:", i);
+    for(j = 0; j < 16; j++) {
+      printf(" [%d]", pic->scaling_lists_4x4[i][j]);
+      if(j%8 == 0)
+        printf("\nC:");
+    }
+    printf("C: \n");
+  }
+  for(i = 0; i < 2; i++) {
+    printf("C: scalint_list8x8[%d]:\nC:", i);
+    for(j = 0; j < 64; j++) {
+      printf(" [%d] ", pic->scaling_lists_8x8[i][j]);
+      if(j%8 == 0)
+        printf("\nC:");
+    }
+    printf("C: \n");
+  }
+
+  //int i;
+  for(i = 0; i < 16; i++) {
+    if(pic->referenceFrames[i].surface != VDP_INVALID_HANDLE) {
+    printf("C: -------------------\n");
+      printf("C: Reference Frame %d:\n", i);
+    printf("C: frame_idx: %d\n", pic->referenceFrames[i].frame_idx);
+    printf("C: field_order_cnt[0]: %d\n", pic->referenceFrames[i].field_order_cnt[0]);
+    printf("C: field_order_cnt[1]: %d\n", pic->referenceFrames[i].field_order_cnt[0]);
+    printf("C: is_long_term: %d\n", pic->referenceFrames[i].is_long_term);
+    printf("C: top_is_reference: %d\n", pic->referenceFrames[i].top_is_reference);
+    printf("C: bottom_is_reference: %d\n", pic->referenceFrames[i].bottom_is_reference);
+    }
+  }
+  printf("C: ---------------------------------------------------------------\n");
+  /*memcpy(pic.scaling_lists_4x4, pps->scaling_lists_4x4, 6*16);
+  memcpy(pic.scaling_lists_8x8, pps->scaling_lists_8x8, 2*64);
+  memcpy(pic.referenceFrames, this->reference_frames, sizeof(this->reference_frames));*/
+
+}
+
+static void set_ratio(video_decoder_t *this_gen)
+{
+  vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t *)this_gen;
+
+  this->ratio = (double)this->width / (double)this->height;
+  if(this->nal_parser->current_nal->sps->vui_parameters.aspect_ration_info_present_flag) {
+    switch(this->nal_parser->current_nal->sps->vui_parameters.aspect_ratio_idc) {
+      case ASPECT_1_1:
+        this->ratio = 1 * this->ratio;
+        break;
+      case ASPECT_12_11:
+        this->ratio *= 12.0/11.0;
+        break;
+      case ASPECT_10_11:
+        this->ratio *= 10.0/11.0;
+        break;
+      case ASPECT_16_11:
+        this->ratio *= 16.0/11.0;
+        break;
+      case ASPECT_40_33:
+        this->ratio *= 40.0/33.0;
+        break;
+      case ASPECT_24_11:
+        this->ratio *= 24.0/11.0;
+        break;
+      case ASPECT_20_11:
+        this->ratio *= 20.0/11.0;
+        break;
+      case ASPECT_32_11:
+        this->ratio *= 32.0/11.0;
+        break;
+      case ASPECT_80_33:
+        this->ratio *= 80.0/33.0;
+        break;
+      case ASPECT_18_11:
+        this->ratio *= 18.0/11.0;
+        break;
+      case ASPECT_15_11:
+        this->ratio *= 15.0/11.0;
+        break;
+      case ASPECT_64_33:
+        this->ratio *= 64.0/33.0;
+        break;
+      case ASPECT_160_99:
+        this->ratio *= 160.0/99.0;
+        break;
+      case ASPECT_4_3:
+        this->ratio *= 4.0/3.0;
+        break;
+      case ASPECT_3_2:
+        this->ratio *= 3.0/2.0;
+        break;
+      case ASPECT_2_1:
+        this->ratio *= 2.0/1.0;
+        break;
+      case ASPECT_EXTENDED_SAR:
+        this->ratio *=
+          (double)this->nal_parser->current_nal->sps->vui_parameters.sar_width/
+          (double)this->nal_parser->current_nal->sps->vui_parameters.sar_height;
+        break;
+    }
+  }
+}
+
+static void fill_vdpau_pictureinfo_h264(video_decoder_t *this_gen, uint32_t slice_count, VdpPictureInfoH264 *pic)
+{
+  vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t *)this_gen;
+
+  struct pic_parameter_set_rbsp *pps = this->nal_parser->current_nal->pps;
+  struct seq_parameter_set_rbsp *sps = this->nal_parser->current_nal->sps;
+  struct slice_header *slc = this->nal_parser->current_nal->slc;
+
+  pic->slice_count = slice_count;
+  pic->field_order_cnt[0] = this->nal_parser->current_nal->top_field_order_cnt;
+  pic->field_order_cnt[1] = this->nal_parser->current_nal->bottom_field_order_cnt;
+  pic->is_reference =
+    (this->nal_parser->current_nal->nal_ref_idc != 0) ? VDP_TRUE : VDP_FALSE;
+  pic->frame_num = slc->frame_num;
+  pic->field_pic_flag = slc->field_pic_flag;
+  pic->bottom_field_flag = slc->bottom_field_flag;
+  pic->num_ref_frames = sps->num_ref_frames;
+  pic->mb_adaptive_frame_field_flag = sps->mb_adaptive_frame_field_flag && !slc->field_pic_flag;
+  pic->constrained_intra_pred_flag = pps->constrained_intra_pred_flag;
+  pic->weighted_pred_flag = pps->weighted_pred_flag;
+  pic->weighted_bipred_idc = pps->weighted_bipred_idc;
+  pic->frame_mbs_only_flag = sps->frame_mbs_only_flag;
+  pic->transform_8x8_mode_flag = pps->transform_8x8_mode_flag;
+  pic->chroma_qp_index_offset = pps->chroma_qp_index_offset;
+  pic->second_chroma_qp_index_offset = pps->second_chroma_qp_index_offset;
+  pic->pic_init_qp_minus26 = pps->pic_init_qp_minus26;
+  pic->num_ref_idx_l0_active_minus1 = pps->num_ref_idx_l0_active_minus1;
+  pic->num_ref_idx_l1_active_minus1 = pps->num_ref_idx_l1_active_minus1;
+  pic->log2_max_frame_num_minus4 = sps->log2_max_frame_num_minus4;
+  pic->pic_order_cnt_type = sps->pic_order_cnt_type;
+  pic->log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_pic_order_cnt_lsb_minus4;
+  pic->delta_pic_order_always_zero_flag = sps->delta_pic_order_always_zero_flag;
+  pic->direct_8x8_inference_flag = sps->direct_8x8_inference_flag;
+  pic->entropy_coding_mode_flag = pps->entropy_coding_mode_flag;
+  pic->pic_order_present_flag = pps->pic_order_present_flag;
+  pic->deblocking_filter_control_present_flag = pps->deblocking_filter_control_present_flag;
+  pic->redundant_pic_cnt_present_flag = pps->redundant_pic_cnt_present_flag;
+
+  memcpy(pic->scaling_lists_4x4, pps->scaling_lists_4x4, sizeof(pic->scaling_lists_4x4));
+  memcpy(pic->scaling_lists_8x8, pps->scaling_lists_8x8, sizeof(pic->scaling_lists_8x8));
+
+  /* set num_ref_frames to the number of actually available reference frames,
+   * if this is not set generation 3 decoders will fail. */
+  /*pic->num_ref_frames =*/
+  fill_vdpau_reference_list(&(this->nal_parser->dpb), pic->referenceFrames);
+
+}
+
+static int vdpau_decoder_init(video_decoder_t *this_gen)
+{
+  vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t *)this_gen;
+  vo_frame_t *img;
+
+  this->curr_pts = this->next_pts;
+  this->next_pts = 0;
+
+  if(this->width == 0) {
+    this->width = this->nal_parser->current_nal->sps->pic_width;
+    this->height = this->nal_parser->current_nal->sps->pic_height;
+  }
+
+  set_ratio(this_gen);
+
+  _x_stream_info_set( this->stream, XINE_STREAM_INFO_VIDEO_WIDTH, this->width );
+  _x_stream_info_set( this->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, this->height );
+  _x_stream_info_set( this->stream, XINE_STREAM_INFO_VIDEO_RATIO, ((double)10000*this->ratio) );
+  _x_stream_info_set( this->stream, XINE_STREAM_INFO_FRAME_DURATION, this->video_step );
+  _x_meta_info_set_utf8( this->stream, XINE_META_INFO_VIDEOCODEC, "H264/AVC (vdpau)" );
+  xine_event_t event;
+  xine_format_change_data_t data;
+  event.type = XINE_EVENT_FRAME_FORMAT_CHANGE;
+  event.stream = this->stream;
+  event.data = &data;
+  event.data_length = sizeof(data);
+  data.width = this->width;
+  data.height = this->height;
+  data.aspect = this->ratio;
+  xine_event_send( this->stream, &event );
+
+  switch(this->nal_parser->current_nal->sps->profile_idc) {
+    case 100:
+      this->profile = VDP_DECODER_PROFILE_H264_HIGH;
+      break;
+    case 77:
+      this->profile = VDP_DECODER_PROFILE_H264_MAIN;
+      break;
+    case 66:
+    default:
+      // nvidia's VDPAU doesn't support BASELINE. But most (every?) streams marked BASELINE do not use BASELINE specifics,
+      // so, just force MAIN.
+      //this->profile = VDP_DECODER_PROFILE_H264_BASELINE;
+      this->profile = VDP_DECODER_PROFILE_H264_MAIN;
+      break;
+  }
+
+  // Level 4.1 limits:
+  int ref_frames = 0;
+  if(this->nal_parser->current_nal->sps->num_ref_frames) {
+    ref_frames = this->nal_parser->current_nal->sps->num_ref_frames;
+  } else {
+    uint32_t round_width = (this->width + 15) & ~15;
+    uint32_t round_height = (this->height + 15) & ~15;
+    uint32_t surf_size = (round_width * round_height * 3) / 2;
+    ref_frames = (12 * 1024 * 1024) / surf_size;
+  }
+
+  if (ref_frames > 16) {
+      ref_frames = 16;
+  }
+
+  printf("Allocate %d reference frames\n", ref_frames);
+  /* get the vdpau context from vo */
+  //(this->stream->video_out->open) (this->stream->video_out, this->stream);
+  img = this->stream->video_out->get_frame (this->stream->video_out,
+                                    this->width, this->height,
+                                    this->ratio,
+                                    XINE_IMGFMT_VDPAU, VO_BOTH_FIELDS);
+
+  img->duration = this->video_step;
+  img->pts = this->curr_pts;
+
+  if (this->dangling_img) {
+    fprintf(stderr, "broken stream: current img wasn't processed -- freeing it\n!");
+    this->dangling_img->free(this->dangling_img);
+  }
+  this->dangling_img = img;
+  this->last_img = img;
+
+  this->vdpau_accel = (vdpau_accel_t*)img->accel_data;
+
+  /*VdpBool is_supported;
+  uint32_t max_level, max_references, max_width, max_height;*/
+  if(this->vdpau_accel->vdp_runtime_nr > 0) {
+   xprintf(this->xine, XINE_VERBOSITY_LOG,
+       "Create decoder: vdp_device: %d, profile: %d, res: %dx%d\n",
+       this->vdpau_accel->vdp_device, this->profile, this->width, this->height);
+
+   VdpStatus status = this->vdpau_accel->vdp_decoder_create(this->vdpau_accel->vdp_device,
+       this->profile, this->width, this->height, 16, &this->decoder);
+
+   if(status != VDP_STATUS_OK) {
+     xprintf(this->xine, XINE_VERBOSITY_LOG, "vdpau_h264: ERROR: VdpDecoderCreate returned status != OK (%s)\n", this->vdpau_accel->vdp_get_error_string(status));
+     return 0;
+   }
+  }
+  return 1;
+}
+
+static int vdpau_decoder_render(video_decoder_t *this_gen, VdpBitstreamBuffer *vdp_buffer, uint32_t slice_count)
+{
+  vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t *)this_gen;
+  vo_frame_t *img = this->last_img;
+
+  if(this->nal_parser->current_nal->nal_unit_type == NAL_SLICE_IDR) {
+    dpb_flush(&(this->nal_parser->dpb));
+  }
+
+  VdpPictureInfoH264 pic;
+
+  fill_vdpau_pictureinfo_h264(this_gen, slice_count, &pic);
+
+  //printf("next decode: %d, %d\n", pic.field_order_cnt[0], pic.field_order_cnt[1]);
+
+  if(!this->decoder_started && !pic.is_reference)
+    return 0;
+
+  this->decoder_started = 1;
+
+  struct seq_parameter_set_rbsp *sps = this->nal_parser->current_nal->sps;
+  struct slice_header *slc = this->nal_parser->current_nal->slc;
+
+  if(sps->vui_parameters_present_flag &&
+      sps->vui_parameters.timing_info_present_flag &&
+      this->video_step == 0) {
+    this->video_step = 2*90000/(1/((double)sps->vui_parameters.num_units_in_tick/(double)sps->vui_parameters.time_scale));
+  }
+
+  /* flush the DPB if this frame was an IDR */
+  //printf("is_idr: %d\n", this->nal_parser->is_idr);
+  this->nal_parser->is_idr = 0;
+
+  /* go and decode a frame */
+
+  //dump_pictureinfo_h264(&pic);
+
+  /*int i;
+  printf("Decode data: \n");
+  for(i = 0; i < ((vdp_buffer->bitstream_bytes < 20) ? vdp_buffer->bitstream_bytes : 20); i++) {
+    printf("%02x ", ((uint8_t*)vdp_buffer->bitstream)[i]);
+    if((i+1) % 10 == 0)
+      printf("\n");
+  }
+  printf("\n...\n");
+  for(i = vdp_buffer->bitstream_bytes - 20; i < vdp_buffer->bitstream_bytes; i++) {
+    printf("%02x ", ((uint8_t*)vdp_buffer->bitstream)[i]);
+    if((i+1) % 10 == 0)
+      printf("\n");
+  }*/
+
+
+  if(img == NULL) {
+    img = this->stream->video_out->get_frame (this->stream->video_out,
+                                              this->width, this->height,
+                                              this->ratio,
+                                              XINE_IMGFMT_VDPAU, VO_BOTH_FIELDS);
+    this->vdpau_accel = (vdpau_accel_t*)img->accel_data;
+
+    img->duration  = this->video_step;
+    img->pts       = this->curr_pts;
+
+    if (this->dangling_img) {
+      fprintf(stderr, "broken stream: current img wasn't processed -- freeing it\n!");
+      this->dangling_img->free(this->dangling_img);
+    }
+    this->dangling_img = img;
+  }
+
+  if(this->vdp_runtime_nr != *(this->vdpau_accel->current_vdp_runtime_nr)) {
+    printf("VDPAU was preempted. Reinitialise the decoder.\n");
+    this->decoder = VDP_INVALID_HANDLE;
+    vdpau_h264_reset(this_gen);
+    this->vdp_runtime_nr = this->vdpau_accel->vdp_runtime_nr;
+    return 0;
+  }
+
+  VdpVideoSurface surface = this->vdpau_accel->surface;
+
+  //printf("Decode: NUM: %d, REF: %d, BYTES: %d, PTS: %lld\n", pic.frame_num, pic.is_reference, vdp_buffer->bitstream_bytes, this->curr_pts);
+  VdpStatus status = this->vdpau_accel->vdp_decoder_render(this->decoder,
+      surface, (VdpPictureInfo*)&pic, 1, vdp_buffer);
+
+  /* only free the actual data, as the start seq is only
+   * locally allocated anyway. */
+  if(((uint8_t*)vdp_buffer->bitstream) != NULL) {
+    free((uint8_t*)vdp_buffer->bitstream);
+  }
+
+  this->curr_pts = this->next_pts;
+  this->next_pts = 0;
+
+  process_mmc_operations(this->nal_parser);
+
+  if(status != VDP_STATUS_OK)
+  {
+    xprintf(this->xine, XINE_VERBOSITY_LOG, "vdpau_h264: Decoder failure: %s\n",  this->vdpau_accel->vdp_get_error_string(status));
+    if (this->dangling_img)
+      this->dangling_img->free(this->dangling_img);
+    img = this->last_img = this->dangling_img = NULL;
+  }
+  else {
+    img->bad_frame = 0;
+
+    if((sps->vui_parameters_present_flag &&
+        sps->vui_parameters.pic_struct_present_flag &&
+        !this->nal_parser->current_nal->interlaced) ||
+        (!pic.field_pic_flag && !pic.mb_adaptive_frame_field_flag))
+      img->progressive_frame = 1;
+    else
+      img->progressive_frame = 0;
+
+    if(!img->progressive_frame && this->nal_parser->current_nal->repeat_pic)
+      img->repeat_first_field = 1;
+    //else if(img->progressive_frame && this->nal_parser->current_nal->repeat_pic)
+    //  img->duration *= this->nal_parser->current_nal->repeat_pic;
+
+    /* only bt601 and bt701 handled so far. others seem to be rarely used */
+    if(sps->vui_parameters.colour_description_present) {
+      switch (sps->vui_parameters.colour_primaries) {
+        case 1:
+          this->color_standard = VDP_COLOR_STANDARD_ITUR_BT_709;
+          break;
+        case 5:
+        case 6:
+        default:
+          this->color_standard = VDP_COLOR_STANDARD_ITUR_BT_601;
+          break;
+      }
+    }
+
+    this->vdpau_accel->color_standard = this->color_standard;
+
+    struct decoded_picture *decoded_pic = NULL;
+    if(pic.is_reference) {
+      if(!slc->field_pic_flag || !this->wait_for_bottom_field) {
+        decoded_pic = init_decoded_picture(this->nal_parser->current_nal, surface, img);
+        this->last_ref_pic = decoded_pic;
+        decoded_pic->used_for_reference = 1;
+        dpb_add_picture(&(this->nal_parser->dpb), decoded_pic, sps->num_ref_frames);
+        this->dangling_img = NULL;
+      } else if(slc->field_pic_flag && this->wait_for_bottom_field) {
+        if(this->last_ref_pic) {
+          decoded_pic = this->last_ref_pic;
+          //copy_nal_unit(decoded_pic->nal, this->nal_parser->current_nal);
+          decoded_pic->nal->bottom_field_order_cnt = this->nal_parser->current_nal->bottom_field_order_cnt;
+          this->last_ref_pic->bottom_is_reference = 1;
+        }
+      }
+    }
+
+    if(!slc->field_pic_flag ||
+        (slc->field_pic_flag && slc->bottom_field_flag && this->wait_for_bottom_field)) {
+
+      if(!decoded_pic) {
+        decoded_pic = init_decoded_picture(this->nal_parser->current_nal, surface, img);
+        //decoded_pic->nal->top_field_order_cnt = this->last_top_field_order_cnt;
+        dpb_add_picture(&(this->nal_parser->dpb), decoded_pic, sps->num_ref_frames);
+        this->dangling_img = NULL;
+
+        if(decoded_pic->nal->slc->bottom_field_flag)
+          decoded_pic->nal->top_field_order_cnt = this->last_top_field_order_cnt;
+      }
+
+      decoded_pic->delayed_output = 1;
+
+      if(this->wait_for_bottom_field && slc->bottom_field_flag)
+        decoded_pic->nal->bottom_field_order_cnt = this->nal_parser->current_nal->bottom_field_order_cnt;
+
+      this->last_img = img = NULL;
+
+      /* now retrieve the next output frame */
+      if ((decoded_pic = dpb_get_next_out_picture(&(this->nal_parser->dpb), 0)) != NULL) {
+        decoded_pic->img->top_field_first = (decoded_pic->nal->top_field_order_cnt <= decoded_pic->nal->bottom_field_order_cnt);
+        //printf("draw pts: %lld\n", decoded_pic->img->pts);
+        decoded_pic->img->draw(decoded_pic->img, this->stream);
+        dpb_set_output_picture(&(this->nal_parser->dpb), decoded_pic);
+      }
+
+      this->wait_for_bottom_field = 0;
+
+    } else if(slc->field_pic_flag && !slc->bottom_field_flag) {
+      // don't draw yet, second field is missing.
+      this->last_top_field_order_cnt = this->nal_parser->current_nal->top_field_order_cnt;
+      this->wait_for_bottom_field = 1;
+      this->last_img = img;
+    }
+  }
+
+  return 1;
+}
+
+/*
+ * This function receives a buffer of data from the demuxer layer and
+ * figures out how to handle it based on its header flags.
+ */
+static void vdpau_h264_decode_data (video_decoder_t *this_gen,
+  buf_element_t *buf) {
+
+  vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t *) this_gen;
+
+  VdpBitstreamBuffer vdp_buffer;
+  vdp_buffer.struct_version = VDP_BITSTREAM_BUFFER_VERSION;
+
+  /* a video decoder does not care about this flag (?) */
+  if (buf->decoder_flags & BUF_FLAG_PREVIEW)
+    return;
+
+  if(buf->decoder_flags & BUF_FLAG_FRAME_START || buf->decoder_flags & BUF_FLAG_FRAME_END)
+    this->have_frame_boundary_marks = 1;
+
+  if (buf->decoder_flags & BUF_FLAG_FRAMERATE) {
+    this->video_step = buf->decoder_info[0];
+    _x_stream_info_set(this->stream, XINE_STREAM_INFO_FRAME_DURATION, this->video_step);
+  }
+
+  if (buf->decoder_flags & BUF_FLAG_STDHEADER) { /* need to initialize */
+    this->have_frame_boundary_marks = 0;
+
+    xine_bmiheader *bih = (xine_bmiheader*)buf->content;
+    this->width                         = bih->biWidth;
+    this->height                        = bih->biHeight;
+
+    uint8_t *codec_private = buf->content + sizeof(xine_bmiheader);
+    uint32_t codec_private_len = bih->biSize - sizeof(xine_bmiheader);
+    this->codec_private_len = codec_private_len;
+    this->codec_private = malloc(codec_private_len);
+    memcpy(this->codec_private, codec_private, codec_private_len);
+
+    if(codec_private_len > 0) {
+      parse_codec_private(this->nal_parser, codec_private, codec_private_len);
+    }
+  } else if (buf->decoder_flags & BUF_FLAG_SPECIAL) {
+    this->have_frame_boundary_marks = 0;
+
+    if(buf->decoder_info[1] == BUF_SPECIAL_DECODER_CONFIG) {
+      uint8_t *codec_private = buf->decoder_info_ptr[2];
+      uint32_t codec_private_len = buf->decoder_info[2];
+      this->codec_private_len = codec_private_len;
+      this->codec_private = malloc(codec_private_len);
+      memcpy(this->codec_private, codec_private, codec_private_len);
+
+      if(codec_private_len > 0) {
+        parse_codec_private(this->nal_parser, codec_private, codec_private_len);
+      }
+    } else if (buf->decoder_info[1] == BUF_SPECIAL_PALETTE) {
+      printf("SPECIAL PALETTE is not yet handled\n");
+    } else
+      printf("UNKNOWN SPECIAL HEADER\n");
+
+  } else {
+    /* parse the first nal packages to retrieve profile type */
+    int len = 0;
+    uint32_t slice_count;
+
+    while(len < buf->size && !(this->wait_for_frame_start && !(buf->decoder_flags & BUF_FLAG_FRAME_START))) {
+      this->wait_for_frame_start = 0;
+      len += parse_frame(this->nal_parser, buf->content + len, buf->size - len,
+          (uint8_t*)&vdp_buffer.bitstream, &vdp_buffer.bitstream_bytes, &slice_count);
+
+      if(this->decoder == VDP_INVALID_HANDLE &&
+          this->nal_parser->current_nal->sps != NULL &&
+          this->nal_parser->current_nal->sps->pic_width > 0 &&
+          this->nal_parser->current_nal->sps->pic_height > 0) {
+
+        vdpau_decoder_init(this_gen);
+      }
+
+      if(this->decoder != VDP_INVALID_HANDLE &&
+          vdp_buffer.bitstream_bytes > 0 &&
+          this->nal_parser->current_nal->slc != NULL &&
+          this->nal_parser->current_nal->pps != NULL) {
+        vdpau_decoder_render(this_gen, &vdp_buffer, slice_count);
+      }
+
+      /* in case the last nal was detected as END_OF_SEQUENCE
+       * we will flush the dpb, so that all pictures get drawn
+       */
+      if(this->nal_parser->last_nal_res == 3)
+        vdpau_h264_flush(this_gen);
+    }
+
+    if(buf->pts != 0 && buf->pts != this->next_pts) {
+      //printf("next pts: %lld\n", buf->pts);
+      this->next_pts = buf->pts;
+    }
+  }
+
+  if(buf->decoder_flags & BUF_FLAG_FRAME_END)
+    this->wait_for_frame_start = 0;
+}
+
+/*
+ * This function is called when xine needs to flush the system.
+ */
+static void vdpau_h264_flush (video_decoder_t *this_gen) {
+  vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t*) this_gen;
+  struct decoded_picture *decoded_pic = NULL;
+
+  while ((decoded_pic = dpb_get_next_out_picture(&(this->nal_parser->dpb), 1)) != NULL) {
+    decoded_pic->img->top_field_first = (decoded_pic->nal->top_field_order_cnt <= decoded_pic->nal->bottom_field_order_cnt);
+    printf("FLUSH draw pts: %lld\n", decoded_pic->img->pts);
+    decoded_pic->img->draw(decoded_pic->img, this->stream);
+    dpb_set_output_picture(&(this->nal_parser->dpb), decoded_pic);
+  }
+}
+
+/*
+ * This function resets the video decoder.
+ */
+static void vdpau_h264_reset (video_decoder_t *this_gen) {
+  vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t *) this_gen;
+
+  printf("vdpau_h264_reset\n");
+
+  dpb_free_all( &(this->nal_parser->dpb) );
+
+  if (this->decoder != VDP_INVALID_HANDLE) {
+    this->vdpau_accel->vdp_decoder_destroy( this->decoder );
+    this->decoder = VDP_INVALID_HANDLE;
+  }
+
+  free_parser(this->nal_parser);
+
+  this->color_standard = VDP_COLOR_STANDARD_ITUR_BT_601;
+  this->wait_for_bottom_field = 0;
+  this->video_step = 0;
+  this->curr_pts = 0;
+  this->next_pts = 0;
+
+  this->nal_parser = init_parser();
+  if(this->codec_private_len > 0) {
+    parse_codec_private(this->nal_parser, this->codec_private, this->codec_private_len);
+
+    /* if the stream does not contain frame boundary marks we
+     * have to hope that the next nal will start with the next
+     * incoming buf... seems to work, though...
+     */
+    this->wait_for_frame_start = this->have_frame_boundary_marks;
+  }
+
+  if (this->dangling_img) {
+    this->dangling_img->free(this->dangling_img);
+    this->dangling_img = NULL;
+  }
+
+  this->last_img = NULL;
+}
+
+/*
+ * The decoder should forget any stored pts values here.
+ */
+static void vdpau_h264_discontinuity (video_decoder_t *this_gen) {
+  vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t *) this_gen;
+
+  this->curr_pts = 0;
+  this->next_pts = 0;
+  dpb_clear_all_pts(&this->nal_parser->dpb);
+}
+
+/*
+ * This function frees the video decoder instance allocated to the decoder.
+ */
+static void vdpau_h264_dispose (video_decoder_t *this_gen) {
+
+  vdpau_h264_decoder_t *this = (vdpau_h264_decoder_t *) this_gen;
+
+  if (this->dangling_img) {
+    this->dangling_img->free(this->dangling_img);
+    this->dangling_img = NULL;
+  }
+
+  dpb_free_all( &(this->nal_parser->dpb) );
+
+  if (this->decoder != VDP_INVALID_HANDLE) {
+    this->vdpau_accel->vdp_decoder_destroy( this->decoder );
+    this->decoder = VDP_INVALID_HANDLE;
+  }
+
+  this->stream->video_out->close( this->stream->video_out, this->stream );
+
+  free_parser (this->nal_parser);
+  free (this_gen);
+}
+
+/*
+ * This function allocates, initializes, and returns a private video
+ * decoder structure.
+ */
+static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, xine_stream_t *stream) {
+
+  vdpau_h264_decoder_t  *this ;
+
+  /* the videoout must be vdpau-capable to support this decoder */
+  if ( !(stream->video_driver->get_capabilities(stream->video_driver) & VO_CAP_VDPAU_H264) )
+	  return NULL;
+
+  /* now check if vdpau has free decoder resource */
+  vo_frame_t *img = stream->video_out->get_frame( stream->video_out, 1920, 1080, 1, XINE_IMGFMT_VDPAU, VO_BOTH_FIELDS );
+  vdpau_accel_t *accel = (vdpau_accel_t*)img->accel_data;
+  int runtime_nr = accel->vdp_runtime_nr;
+  img->free(img);
+  VdpDecoder decoder;
+  VdpStatus st = accel->vdp_decoder_create( accel->vdp_device, VDP_DECODER_PROFILE_H264_MAIN, 1920, 1080, 16, &decoder );
+  if ( st!=VDP_STATUS_OK ) {
+    lprintf( "can't create vdpau decoder.\n" );
+    return 1;
+  }
+
+  accel->vdp_decoder_destroy( decoder );
+
+  this = (vdpau_h264_decoder_t *) calloc(1, sizeof(vdpau_h264_decoder_t));
+
+  this->video_decoder.decode_data         = vdpau_h264_decode_data;
+  this->video_decoder.flush               = vdpau_h264_flush;
+  this->video_decoder.reset               = vdpau_h264_reset;
+  this->video_decoder.discontinuity       = vdpau_h264_discontinuity;
+  this->video_decoder.dispose             = vdpau_h264_dispose;
+
+  this->stream                            = stream;
+  this->xine                              = stream->xine;
+  this->class                             = (vdpau_h264_class_t *) class_gen;
+
+  this->decoder                           = VDP_INVALID_HANDLE;
+  this->vdp_runtime_nr                    = runtime_nr;
+  this->color_standard                    = VDP_COLOR_STANDARD_ITUR_BT_601;
+
+  this->nal_parser = init_parser();
+
+  (this->stream->video_out->open) (this->stream->video_out, this->stream);
+
+  return &this->video_decoder;
+}
+
+/*
+ * This function returns a brief string that describes (usually with the
+ * decoder's most basic name) the video decoder plugin.
+ */
+static char *get_identifier (video_decoder_class_t *this) {
+  return "vdpau_h264";
+}
+
+/*
+ * This function returns a slightly longer string describing the video
+ * decoder plugin.
+ */
+static char *get_description (video_decoder_class_t *this) {
+  return "vdpau_h264: h264 decoder plugin using VDPAU hardware decoding.\n"
+	  "Must be used along with video_out_vdpau.";
+}
+
+/*
+ * This function frees the video decoder class and any other memory that was
+ * allocated.
+ */
+static void dispose_class (video_decoder_class_t *this) {
+  free (this);
+}
+
+/*
+ * This function allocates a private video decoder class and initializes
+ * the class's member functions.
+ */
+static void *init_plugin (xine_t *xine, void *data) {
+
+  vdpau_h264_class_t *this;
+
+  this = (vdpau_h264_class_t *) calloc(1, sizeof(vdpau_h264_class_t));
+
+  this->decoder_class.open_plugin     = open_plugin;
+  this->decoder_class.get_identifier  = get_identifier;
+  this->decoder_class.get_description = get_description;
+  this->decoder_class.dispose         = dispose_class;
+
+  return this;
+}
+
+/*
+ * This is a list of all of the internal xine video buffer types that
+ * this decoder is able to handle. Check src/xine-engine/buffer.h for a
+ * list of valid buffer types (and add a new one if the one you need does
+ * not exist). Terminate the list with a 0.
+ */
+static const uint32_t video_types[] = {
+  /* BUF_VIDEO_FOOVIDEO, */
+  BUF_VIDEO_H264,
+  0
+};
+
+/*
+ * This data structure combines the list of supported xine buffer types and
+ * the priority that the plugin should be given with respect to other
+ * plugins that handle the same buffer type. A plugin with priority (n+1)
+ * will be used instead of a plugin with priority (n).
+ */
+static const decoder_info_t dec_info_video = {
+  video_types,         /* supported types */
+  7                    /* priority        */
+};
+
+/*
+ * The plugin catalog entry. This is the only information that this plugin
+ * will export to the public.
+ */
+const plugin_info_t xine_plugin_info[] EXPORTED = {
+  /* { type, API, "name", version, special_info, init_function } */
+  { PLUGIN_VIDEO_DECODER, 18, "vdpau_h264", XINE_VERSION_CODE, &dec_info_video, init_plugin },
+  { PLUGIN_NONE, 0, "", 0, NULL, NULL }
+};
diff --git a/src/libvdpau/vdpau_mpeg12.c b/src/libvdpau/vdpau_mpeg12.c
new file mode 100644
index 000000000..f3ad9a03c
--- /dev/null
+++ b/src/libvdpau/vdpau_mpeg12.c
@@ -0,0 +1,1027 @@
+/*
+ * Copyright (C) 2008 the xine project
+ * Copyright (C) 2008 Christophe Thommeret <hftom@free.fr>
+ *
+ * This file is part of xine, a free video player.
+ *
+ * xine is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * xine is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
+ *
+ * vdpau_mpeg12.c, a mpeg1/2 video stream parser using VDPAU hardware decoder
+ *
+ */
+
+/*#define LOG*/
+#define LOG_MODULE "vdpau_mpeg12"
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "xine_internal.h"
+#include "video_out.h"
+#include "buffer.h"
+#include "xineutils.h"
+#include "accel_vdpau.h"
+#include "bits_reader.h"
+
+#include <vdpau/vdpau.h>
+
+#define sequence_header_code    0xb3
+#define sequence_error_code     0xb4
+#define sequence_end_code       0xb7
+#define group_start_code        0xb8
+#define extension_start_code    0xb5
+#define user_data_start_code    0xb2
+#define picture_start_code      0x00
+#define begin_slice_start_code  0x01
+#define end_slice_start_code    0xaf
+
+#define sequence_ext_sc         1
+#define quant_matrix_ext_sc     3
+#define picture_coding_ext_sc   8
+#define sequence_display_ext_sc 2
+
+#define I_FRAME   1
+#define P_FRAME   2
+#define B_FRAME   3
+
+#define PICTURE_TOP     1
+#define PICTURE_BOTTOM  2
+#define PICTURE_FRAME   3
+
+#define WANT_HEADER 1
+#define WANT_EXT    2
+#define WANT_SLICE  3
+
+/*#define MAKE_DAT*/ /*do NOT define this, unless you know what you do */
+#ifdef MAKE_DAT
+static int nframes;
+static FILE *outfile;
+#endif
+
+
+
+/* default intra quant matrix, in zig-zag order */
+static const uint8_t default_intra_quantizer_matrix[64] = {
+    8,
+    16, 16,
+    19, 16, 19,
+    22, 22, 22, 22,
+    22, 22, 26, 24, 26,
+    27, 27, 27, 26, 26, 26,
+    26, 27, 27, 27, 29, 29, 29,
+    34, 34, 34, 29, 29, 29, 27, 27,
+    29, 29, 32, 32, 34, 34, 37,
+    38, 37, 35, 35, 34, 35,
+    38, 38, 40, 40, 40,
+    48, 48, 46, 46,
+    56, 56, 58,
+    69, 69,
+    83
+};
+
+uint8_t mpeg2_scan_norm[64] = {
+    /* Zig-Zag scan pattern */
+     0, 1, 8,16, 9, 2, 3,10,
+    17,24,32,25,18,11, 4, 5,
+    12,19,26,33,40,48,41,34,
+    27,20,13, 6, 7,14,21,28,
+    35,42,49,56,57,50,43,36,
+    29,22,15,23,30,37,44,51,
+    58,59,52,45,38,31,39,46,
+    53,60,61,54,47,55,62,63
+};
+
+
+
+typedef struct {
+  VdpPictureInfoMPEG1Or2  vdp_infos; /* first field, also used for frame */
+  VdpPictureInfoMPEG1Or2  vdp_infos2; /* second field */
+  int                     slices_count, slices_count2;
+  uint8_t                 *slices;
+  int                     slices_size;
+  int                     slices_pos, slices_pos_top;
+
+  int                     progressive_frame;
+  int                     state;
+} picture_t;
+
+
+
+typedef struct {
+  uint32_t    coded_width;
+  uint32_t    coded_height;
+
+  uint64_t    video_step; /* frame duration in pts units */
+  double      ratio;
+  VdpDecoderProfile profile;
+  int         chroma;
+  int         top_field_first;
+
+  int         have_header;
+
+  uint8_t     *buf; /* accumulate data */
+  int         bufseek;
+  uint32_t    bufsize;
+  uint32_t    bufpos;
+  int         start;
+
+  picture_t   picture;
+  vo_frame_t  *forward_ref;
+  vo_frame_t  *backward_ref;
+
+  int64_t    cur_pts, seq_pts;
+
+  vdpau_accel_t *accel_vdpau;
+
+  bits_reader_t  br;
+
+  int         vdp_runtime_nr;
+
+} sequence_t;
+
+
+
+typedef struct {
+  video_decoder_class_t   decoder_class;
+} vdpau_mpeg12_class_t;
+
+
+
+typedef struct vdpau_mpeg12_decoder_s {
+  video_decoder_t         video_decoder;  /* parent video decoder structure */
+
+  vdpau_mpeg12_class_t    *class;
+  xine_stream_t           *stream;
+
+  sequence_t              sequence;
+
+  VdpDecoder              decoder;
+  VdpDecoderProfile       decoder_profile;
+  uint32_t                decoder_width;
+  uint32_t                decoder_height;
+
+} vdpau_mpeg12_decoder_t;
+
+
+
+static void reset_picture( picture_t *pic )
+{
+  lprintf( "reset_picture\n" );
+  pic->vdp_infos.picture_structure = 0;
+  pic->vdp_infos2.intra_dc_precision = pic->vdp_infos.intra_dc_precision = 0;
+  pic->vdp_infos2.frame_pred_frame_dct = pic->vdp_infos.frame_pred_frame_dct = 1;
+  pic->vdp_infos2.concealment_motion_vectors = pic->vdp_infos.concealment_motion_vectors = 0;
+  pic->vdp_infos2.intra_vlc_format = pic->vdp_infos.intra_vlc_format = 0;
+  pic->vdp_infos2.alternate_scan = pic->vdp_infos.alternate_scan = 0;
+  pic->vdp_infos2.q_scale_type = pic->vdp_infos.q_scale_type = 0;
+  pic->vdp_infos2.top_field_first = pic->vdp_infos.top_field_first = 1;
+  pic->slices_count = 0;
+  pic->slices_count2 = 0;
+  pic->slices_pos = 0;
+  pic->slices_pos_top = 0;
+  pic->progressive_frame = 0;
+  pic->state = WANT_HEADER;
+}
+
+
+
+static void init_picture( picture_t *pic )
+{
+  pic->slices_size = 2048;
+  pic->slices = (uint8_t*)malloc(pic->slices_size);
+  reset_picture( pic );
+}
+
+
+
+static void reset_sequence( sequence_t *sequence, int free_refs )
+{
+  sequence->cur_pts = sequence->seq_pts = 0;
+  if ( sequence->forward_ref )
+    sequence->forward_ref->pts = 0;
+  if ( sequence->backward_ref )
+    sequence->backward_ref->pts = 0;
+
+  if ( !free_refs )
+    return;
+
+  sequence->bufpos = 0;
+  sequence->bufseek = 0;
+  sequence->start = -1;
+  if ( sequence->forward_ref )
+    sequence->forward_ref->free( sequence->forward_ref );
+  sequence->forward_ref = NULL;
+  if ( sequence->backward_ref )
+    sequence->backward_ref->free( sequence->backward_ref );
+  sequence->backward_ref = NULL;
+  sequence->top_field_first = 0;
+}
+
+
+
+static void free_sequence( sequence_t *sequence )
+{
+  lprintf( "init_sequence\n" );
+  sequence->have_header = 0;
+  sequence->profile = VDP_DECODER_PROFILE_MPEG1;
+  sequence->chroma = 0;
+	sequence->video_step = 3600;
+  reset_sequence( sequence, 1 );
+}
+
+
+
+static void sequence_header( vdpau_mpeg12_decoder_t *this_gen, uint8_t *buf, int len )
+{
+  sequence_t *sequence = (sequence_t*)&this_gen->sequence;
+
+  int i, j;
+
+  if ( sequence->cur_pts ) {
+    sequence->seq_pts = sequence->cur_pts;
+  }
+
+  bits_reader_set( &sequence->br, buf );
+  sequence->coded_width = read_bits( &sequence->br, 12 );
+  lprintf( "coded_width: %d\n", sequence->coded_width );
+  sequence->coded_height = read_bits( &sequence->br, 12 );
+  lprintf( "coded_height: %d\n", sequence->coded_height );
+  int rt = read_bits( &sequence->br, 4 );
+  switch ( rt ) {
+    case 1: sequence->ratio = 1.0; break;
+    case 2: sequence->ratio = 4.0/3.0; break;
+    case 3: sequence->ratio = 16.0/9.0; break;
+    case 4: sequence->ratio = 2.21; break;
+    default: sequence->ratio = (double)sequence->coded_width/(double)sequence->coded_height;
+  }
+  lprintf( "ratio: %d\n", rt );
+  int fr = read_bits( &sequence->br, 4 );
+  switch ( fr ) {
+    case 1: sequence->video_step = 3913; break; /* 23.976.. */
+    case 2: sequence->video_step = 3750; break; /* 24 */
+    case 3: sequence->video_step = 3600; break; /* 25 */
+    case 4: sequence->video_step = 3003; break; /* 29.97.. */
+    case 5: sequence->video_step = 3000; break; /* 30 */
+    case 6: sequence->video_step = 1800; break; /* 50 */
+    case 7: sequence->video_step = 1525; break; /* 59.94.. */
+    case 8: sequence->video_step = 1509; break; /* 60 */
+  }
+  lprintf( "frame_rate: %d\n", fr );
+  int tmp;
+  tmp = read_bits( &sequence->br, 18 );
+  lprintf( "bit_rate_value: %d\n", tmp );
+  tmp = read_bits( &sequence->br, 1 );
+  lprintf( "marker_bit: %d\n", tmp );
+  tmp = read_bits( &sequence->br, 10 );
+  lprintf( "vbv_buffer_size_value: %d\n", tmp );
+  tmp = read_bits( &sequence->br, 1 );
+  lprintf( "constrained_parameters_flag: %d\n", tmp );
+  i = read_bits( &sequence->br, 1 );
+  lprintf( "load_intra_quantizer_matrix: %d\n", i );
+  if ( i ) {
+    for ( j=0; j<64; ++j ) {
+      sequence->picture.vdp_infos2.intra_quantizer_matrix[mpeg2_scan_norm[j]] = sequence->picture.vdp_infos.intra_quantizer_matrix[mpeg2_scan_norm[j]] = read_bits( &sequence->br, 8 );
+    }
+  }
+  else {
+    for ( j=0; j<64; ++j ) {
+      sequence->picture.vdp_infos2.intra_quantizer_matrix[mpeg2_scan_norm[j]] = sequence->picture.vdp_infos.intra_quantizer_matrix[mpeg2_scan_norm[j]] = default_intra_quantizer_matrix[j];
+    }
+  }
+
+  i = read_bits( &sequence->br, 1 );
+  lprintf( "load_non_intra_quantizer_matrix: %d\n", i );
+  if ( i ) {
+    for ( j=0; j<64; ++j ) {
+      sequence->picture.vdp_infos2.non_intra_quantizer_matrix[mpeg2_scan_norm[j]] = sequence->picture.vdp_infos.non_intra_quantizer_matrix[mpeg2_scan_norm[j]] = read_bits( &sequence->br, 8 );
+    }
+  }
+  else {
+    memset( sequence->picture.vdp_infos.non_intra_quantizer_matrix, 16, 64 );
+    memset( sequence->picture.vdp_infos2.non_intra_quantizer_matrix, 16, 64 );
+  }
+
+  if ( !sequence->have_header ) {
+    sequence->have_header = 1;
+    _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_VIDEO_WIDTH, sequence->coded_width );
+    _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, sequence->coded_height );
+    _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_VIDEO_RATIO, ((double)10000*sequence->ratio) );
+    _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_FRAME_DURATION, sequence->video_step );
+    _x_meta_info_set_utf8( this_gen->stream, XINE_META_INFO_VIDEOCODEC, "MPEG1/2 (vdpau)" );
+    xine_event_t event;
+    xine_format_change_data_t data;
+    event.type = XINE_EVENT_FRAME_FORMAT_CHANGE;
+    event.stream = this_gen->stream;
+    event.data = &data;
+    event.data_length = sizeof(data);
+    data.width = sequence->coded_width;
+    data.height = sequence->coded_height;
+    data.aspect = sequence->ratio;
+    xine_event_send( this_gen->stream, &event );
+  }
+}
+
+
+
+static void picture_header( sequence_t *sequence, uint8_t *buf, int len )
+{
+  if ( sequence->picture.state!=WANT_HEADER )
+    return;
+
+  if ( sequence->cur_pts ) {
+    sequence->seq_pts = sequence->cur_pts;
+  }
+
+  if ( sequence->profile==VDP_DECODER_PROFILE_MPEG1 )
+    sequence->picture.vdp_infos.picture_structure = PICTURE_FRAME;
+
+  VdpPictureInfoMPEG1Or2 *infos = &sequence->picture.vdp_infos;
+
+  if ( sequence->picture.vdp_infos.picture_structure && sequence->picture.slices_count2 )
+      reset_picture( &sequence->picture );
+
+  if ( sequence->picture.vdp_infos.picture_structure==PICTURE_FRAME ) {
+    reset_picture( &sequence->picture );
+  }
+  else if ( sequence->picture.vdp_infos.picture_structure ) {
+    infos = &sequence->picture.vdp_infos2;
+  }
+
+  bits_reader_set( &sequence->br, buf );
+  int tmp = read_bits( &sequence->br, 10 );
+  lprintf( "temporal_reference: %d\n", tmp );
+  infos->picture_coding_type = read_bits( &sequence->br, 3 );
+  lprintf( "picture_coding_type: %d\n", infos->picture_coding_type );
+  infos->forward_reference = VDP_INVALID_HANDLE;
+  infos->backward_reference = VDP_INVALID_HANDLE;
+  read_bits( &sequence->br, 16 );
+  if ( infos->picture_coding_type > I_FRAME ) {
+    infos->full_pel_forward_vector = read_bits( &sequence->br, 1 );
+    infos->f_code[0][0] = infos->f_code[0][1] = read_bits( &sequence->br, 3 );
+    if ( infos->picture_coding_type==B_FRAME ) {
+      infos->full_pel_backward_vector = read_bits( &sequence->br, 1 );
+      infos->f_code[1][0] = infos->f_code[1][1] = read_bits( &sequence->br, 3 );
+    }
+  }
+  else {
+    infos->full_pel_forward_vector = 0;
+    infos->full_pel_backward_vector = 0;
+  }
+  if ( sequence->profile==VDP_DECODER_PROFILE_MPEG1 )
+    sequence->picture.state = WANT_SLICE;
+  else
+    sequence->picture.state = WANT_EXT;
+}
+
+
+
+static void sequence_extension( sequence_t *sequence, uint8_t *buf, int len )
+{
+  bits_reader_set( &sequence->br, buf );
+  int tmp = read_bits( &sequence->br, 4 );
+  lprintf( "extension_start_code_identifier: %d\n", tmp );
+  read_bits( &sequence->br, 1 );
+  switch ( read_bits( &sequence->br, 3 ) ) {
+    case 5: sequence->profile = VDP_DECODER_PROFILE_MPEG2_SIMPLE; break;
+    default: sequence->profile = VDP_DECODER_PROFILE_MPEG2_MAIN;
+  }
+  read_bits( &sequence->br, 4 );
+  tmp = read_bits( &sequence->br, 1 );
+  lprintf( "progressive_sequence: %d\n", tmp );
+  if ( read_bits( &sequence->br, 2 ) == 2 )
+    sequence->chroma = VO_CHROMA_422;
+  tmp = read_bits( &sequence->br, 2 );
+  lprintf( "horizontal_size_extension: %d\n", tmp );
+  tmp = read_bits( &sequence->br, 2 );
+  lprintf( "vertical_size_extension: %d\n", tmp );
+  tmp = read_bits( &sequence->br, 12 );
+  lprintf( "bit_rate_extension: %d\n", tmp );
+  tmp = read_bits( &sequence->br, 1 );
+  lprintf( "marker_bit: %d\n", tmp );
+  tmp = read_bits( &sequence->br, 8 );
+  lprintf( "vbv_buffer_size_extension: %d\n", tmp );
+  tmp = read_bits( &sequence->br, 1 );
+  lprintf( "low_delay: %d\n", tmp );
+  tmp = read_bits( &sequence->br, 2 );
+  lprintf( "frame_rate_extension_n: %d\n", tmp );
+  tmp = read_bits( &sequence->br, 5 );
+  lprintf( "frame_rate_extension_d: %d\n", tmp );
+}
+
+
+
+static void picture_coding_extension( sequence_t *sequence, uint8_t *buf, int len )
+{
+  if ( sequence->picture.state!=WANT_EXT )
+    return;
+
+  VdpPictureInfoMPEG1Or2 *infos = &sequence->picture.vdp_infos;
+  if ( infos->picture_structure && infos->picture_structure!=PICTURE_FRAME )
+    infos = &sequence->picture.vdp_infos2;
+
+  bits_reader_set( &sequence->br, buf );
+  int tmp = read_bits( &sequence->br, 4 );
+  lprintf( "extension_start_code_identifier: %d\n", tmp );
+  infos->f_code[0][0] = read_bits( &sequence->br, 4 );
+  infos->f_code[0][1] = read_bits( &sequence->br, 4 );
+  infos->f_code[1][0] = read_bits( &sequence->br, 4 );
+  infos->f_code[1][1] = read_bits( &sequence->br, 4 );
+  lprintf( "f_code_0_0: %d\n", infos->f_code[0][0] );
+  lprintf( "f_code_0_1: %d\n", infos->f_code[0][1] );
+  lprintf( "f_code_1_0: %d\n", infos->f_code[1][0] );
+  lprintf( "f_code_1_1: %d\n", infos->f_code[1][1] );
+  infos->intra_dc_precision = read_bits( &sequence->br, 2 );
+  lprintf( "intra_dc_precision: %d\n", infos->intra_dc_precision );
+  infos->picture_structure = read_bits( &sequence->br, 2 );
+  lprintf( "picture_structure: %d\n", infos->picture_structure );
+  infos->top_field_first = read_bits( &sequence->br, 1 );
+  lprintf( "top_field_first: %d\n", infos->top_field_first );
+  infos->frame_pred_frame_dct = read_bits( &sequence->br, 1 );
+  lprintf( "frame_pred_frame_dct: %d\n", infos->frame_pred_frame_dct );
+  infos->concealment_motion_vectors = read_bits( &sequence->br, 1 );
+  lprintf( "concealment_motion_vectors: %d\n", infos->concealment_motion_vectors );
+  infos->q_scale_type = read_bits( &sequence->br, 1 );
+  lprintf( "q_scale_type: %d\n", infos->q_scale_type );
+  infos->intra_vlc_format = read_bits( &sequence->br, 1 );
+  lprintf( "intra_vlc_format: %d\n", infos->intra_vlc_format );
+  infos->alternate_scan = read_bits( &sequence->br, 1 );
+  lprintf( "alternate_scan: %d\n", infos->alternate_scan );
+  tmp = read_bits( &sequence->br, 1 );
+  lprintf( "repeat_first_field: %d\n", tmp );
+  tmp = read_bits( &sequence->br, 1 );
+  lprintf( "chroma_420_type: %d\n", tmp );
+  sequence->picture.progressive_frame = read_bits( &sequence->br, 1 );
+  lprintf( "progressive_frame: %d\n", sequence->picture.progressive_frame );
+  sequence->picture.state = WANT_SLICE;
+}
+
+
+
+static void quant_matrix_extension( sequence_t *sequence, uint8_t *buf, int len )
+{
+  int i, j;
+
+  bits_reader_set( &sequence->br, buf );
+  read_bits( &sequence->br, 4 );
+  i = read_bits( &sequence->br, 1 );
+  lprintf( "load_intra_quantizer_matrix: %d\n", i );
+  if ( i ) {
+    for ( j=0; j<64; ++j ) {
+      sequence->picture.vdp_infos2.intra_quantizer_matrix[mpeg2_scan_norm[j]] = sequence->picture.vdp_infos.intra_quantizer_matrix[mpeg2_scan_norm[j]] = read_bits( &sequence->br, 8 );
+    }
+  }
+  else {
+    for ( j=0; j<64; ++j ) {
+      sequence->picture.vdp_infos2.intra_quantizer_matrix[mpeg2_scan_norm[j]] = sequence->picture.vdp_infos.intra_quantizer_matrix[mpeg2_scan_norm[j]] = default_intra_quantizer_matrix[j];
+    }
+  }
+
+  i = read_bits( &sequence->br, 1 );
+  lprintf( "load_non_intra_quantizer_matrix: %d\n", i );
+  if ( i ) {
+    for ( j=0; j<64; ++j ) {
+      sequence->picture.vdp_infos2.non_intra_quantizer_matrix[mpeg2_scan_norm[j]] = sequence->picture.vdp_infos.non_intra_quantizer_matrix[mpeg2_scan_norm[j]] = read_bits( &sequence->br, 8 );
+    }
+  }
+  else {
+    memset( sequence->picture.vdp_infos.non_intra_quantizer_matrix, 16, 64 );
+    memset( sequence->picture.vdp_infos2.non_intra_quantizer_matrix, 16, 64 );
+  }
+}
+
+
+
+static void copy_slice( sequence_t *sequence, uint8_t *buf, int len )
+{
+  int size = sequence->picture.slices_pos+len;
+  if ( sequence->picture.slices_size < size ) {
+    sequence->picture.slices_size = size+1024;
+    sequence->picture.slices = realloc( sequence->picture.slices, sequence->picture.slices_size );
+  }
+  xine_fast_memcpy( sequence->picture.slices+sequence->picture.slices_pos, buf, len );
+  sequence->picture.slices_pos += len;
+  if ( sequence->picture.slices_pos_top )
+    sequence->picture.slices_count2++;
+  else
+    sequence->picture.slices_count++;
+}
+
+
+
+static int parse_code( vdpau_mpeg12_decoder_t *this_gen, uint8_t *buf, int len )
+{
+  sequence_t *sequence = (sequence_t*)&this_gen->sequence;
+
+  if ( !sequence->have_header && buf[3]!=sequence_header_code ) {
+    lprintf( " ----------- no sequence header yet.\n" );
+    return 0;
+  }
+
+  if ( (buf[3] >= begin_slice_start_code) && (buf[3] <= end_slice_start_code) ) {
+    lprintf( " ----------- slice_start_code\n" );
+    if ( sequence->picture.state==WANT_SLICE )
+      copy_slice( sequence, buf, len );
+    return 0;
+  }
+  else if ( sequence->picture.state==WANT_SLICE && sequence->picture.slices_count ) {
+    if ( !sequence->picture.slices_count2 ) {
+      sequence->picture.slices_pos_top = sequence->picture.slices_pos;
+    }
+    /* no more slices, decode */
+    return 1;
+  }
+
+  switch ( buf[3] ) {
+    case sequence_header_code:
+      lprintf( " ----------- sequence_header_code\n" );
+      sequence_header( this_gen, buf+4, len-4 );
+      break;
+    case extension_start_code: {
+      switch ( buf[4]>>4 ) {
+        case sequence_ext_sc:
+          lprintf( " ----------- sequence_extension_start_code\n" );
+          sequence_extension( sequence, buf+4, len-4 );
+          break;
+        case quant_matrix_ext_sc:
+          lprintf( " ----------- quant_matrix_extension_start_code\n" );
+          quant_matrix_extension( sequence, buf+4, len-4 );
+          break;
+        case picture_coding_ext_sc:
+          lprintf( " ----------- picture_coding_extension_start_code\n" );
+          picture_coding_extension( sequence, buf+4, len-4 );
+          break;
+        case sequence_display_ext_sc:
+          lprintf( " ----------- sequence_display_extension_start_code\n" );
+          break;
+      }
+      break;
+      }
+    case user_data_start_code:
+      lprintf( " ----------- user_data_start_code\n" );
+      break;
+    case group_start_code:
+      lprintf( " ----------- group_start_code\n" );
+      break;
+    case picture_start_code:
+      lprintf( " ----------- picture_start_code\n" );
+      picture_header( sequence, buf+4, len-4 );
+      break;
+    case sequence_error_code:
+      lprintf( " ----------- sequence_error_code\n" );
+      break;
+    case sequence_end_code:
+      lprintf( " ----------- sequence_end_code\n" );
+      break;
+  }
+  return 0;
+}
+
+
+
+static void decode_render( vdpau_mpeg12_decoder_t *vd, vdpau_accel_t *accel )
+{
+  sequence_t *seq = (sequence_t*)&vd->sequence;
+  picture_t *pic = (picture_t*)&seq->picture;
+
+  pic->vdp_infos.slice_count = pic->slices_count;
+  pic->vdp_infos2.slice_count = pic->slices_count2;
+
+  VdpStatus st;
+  if ( vd->decoder==VDP_INVALID_HANDLE || vd->decoder_profile!=seq->profile || vd->decoder_width!=seq->coded_width || vd->decoder_height!=seq->coded_height ) {
+    if ( vd->decoder!=VDP_INVALID_HANDLE ) {
+      accel->vdp_decoder_destroy( vd->decoder );
+      vd->decoder = VDP_INVALID_HANDLE;
+    }
+    st = accel->vdp_decoder_create( accel->vdp_device, seq->profile, seq->coded_width, seq->coded_height, 2, &vd->decoder);
+    if ( st!=VDP_STATUS_OK )
+      lprintf( "failed to create decoder !! %s\n", accel->vdp_get_error_string( st ) );
+    else {
+      vd->decoder_profile = seq->profile;
+      vd->decoder_width = seq->coded_width;
+      vd->decoder_height = seq->coded_height;
+      seq->vdp_runtime_nr = accel->vdp_runtime_nr;
+    }
+  }
+
+  VdpBitstreamBuffer vbit;
+  vbit.struct_version = VDP_BITSTREAM_BUFFER_VERSION;
+  vbit.bitstream = pic->slices;
+  vbit.bitstream_bytes = (pic->vdp_infos.picture_structure==PICTURE_FRAME)? pic->slices_pos : pic->slices_pos_top;
+  st = accel->vdp_decoder_render( vd->decoder, accel->surface, (VdpPictureInfo*)&pic->vdp_infos, 1, &vbit );
+  if ( st!=VDP_STATUS_OK )
+    lprintf( "decoder failed : %d!! %s\n", st, accel->vdp_get_error_string( st ) );
+  else {
+    lprintf( "DECODER SUCCESS : frame_type:%d, slices=%d, slices_bytes=%d, current=%d, forwref:%d, backref:%d, pts:%lld\n",
+              pic->vdp_infos.picture_coding_type, pic->vdp_infos.slice_count, vbit.bitstream_bytes, accel->surface, pic->vdp_infos.forward_reference, pic->vdp_infos.backward_reference, seq->cur_pts );
+    VdpPictureInfoMPEG1Or2 *info = &pic->vdp_infos;
+    lprintf("%d %d %d %d %d %d %d %d %d %d %d %d %d\n", info->intra_dc_precision, info->frame_pred_frame_dct, info->concealment_motion_vectors, info->intra_vlc_format, info->alternate_scan, info->q_scale_type, info->top_field_first, info->full_pel_forward_vector, info->full_pel_backward_vector, info->f_code[0][0], info->f_code[0][1], info->f_code[1][0], info->f_code[1][1] );
+  }
+
+  if ( pic->vdp_infos.picture_structure != PICTURE_FRAME ) {
+    pic->vdp_infos2.backward_reference = VDP_INVALID_HANDLE;
+    pic->vdp_infos2.forward_reference = VDP_INVALID_HANDLE;
+    if ( pic->vdp_infos2.picture_coding_type==P_FRAME ) {
+      if ( pic->vdp_infos.picture_coding_type==I_FRAME )
+        pic->vdp_infos2.forward_reference = accel->surface;
+      else
+        pic->vdp_infos2.forward_reference = pic->vdp_infos.forward_reference;
+    }
+    else if ( pic->vdp_infos.picture_coding_type==B_FRAME ) {
+      pic->vdp_infos2.forward_reference = pic->vdp_infos.forward_reference;
+      pic->vdp_infos2.backward_reference = pic->vdp_infos.backward_reference;
+    }
+    vbit.struct_version = VDP_BITSTREAM_BUFFER_VERSION;
+    vbit.bitstream = pic->slices+pic->slices_pos_top;
+    vbit.bitstream_bytes = pic->slices_pos-pic->slices_pos_top;
+    st = accel->vdp_decoder_render( vd->decoder, accel->surface, (VdpPictureInfo*)&pic->vdp_infos2, 1, &vbit );
+    if ( st!=VDP_STATUS_OK )
+      lprintf( "decoder failed : %d!! %s\n", st, accel->vdp_get_error_string( st ) );
+    else
+      lprintf( "DECODER SUCCESS : frame_type:%d, slices=%d, current=%d, forwref:%d, backref:%d, pts:%lld\n",
+                pic->vdp_infos2.picture_coding_type, pic->vdp_infos2.slice_count, accel->surface, pic->vdp_infos2.forward_reference, pic->vdp_infos2.backward_reference, seq->cur_pts );
+  }
+}
+
+
+
+static void decode_picture( vdpau_mpeg12_decoder_t *vd )
+{
+  sequence_t *seq = (sequence_t*)&vd->sequence;
+  picture_t *pic = (picture_t*)&seq->picture;
+  vdpau_accel_t *ref_accel;
+
+  pic->state = WANT_HEADER;
+
+  if ( seq->profile == VDP_DECODER_PROFILE_MPEG1 )
+    pic->vdp_infos.picture_structure=PICTURE_FRAME;
+
+  if ( pic->vdp_infos.picture_structure!=PICTURE_FRAME && !pic->slices_count2 ) {
+    /* waiting second field */
+    lprintf("********************* no slices_count2 **********************\n");
+    return;
+  }
+
+  if ( pic->vdp_infos.picture_coding_type==P_FRAME ) {
+    if ( seq->backward_ref ) {
+      ref_accel = (vdpau_accel_t*)seq->backward_ref->accel_data;
+      pic->vdp_infos.forward_reference = ref_accel->surface;
+    }
+    else
+      return;
+  }
+  else if ( pic->vdp_infos.picture_coding_type==B_FRAME ) {
+    if ( seq->forward_ref ) {
+      ref_accel = (vdpau_accel_t*)seq->forward_ref->accel_data;
+      pic->vdp_infos.forward_reference = ref_accel->surface;
+    }
+    else
+      return;
+    if ( seq->backward_ref ) {
+      ref_accel = (vdpau_accel_t*)seq->backward_ref->accel_data;
+      pic->vdp_infos.backward_reference = ref_accel->surface;
+    }
+    else
+      return;
+  }
+
+  vo_frame_t *img = vd->stream->video_out->get_frame( vd->stream->video_out, seq->coded_width, seq->coded_height,
+                                                      seq->ratio, XINE_IMGFMT_VDPAU, VO_BOTH_FIELDS|seq->chroma );
+  vdpau_accel_t *accel = (vdpau_accel_t*)img->accel_data;
+  if ( !seq->accel_vdpau )
+    seq->accel_vdpau = accel;
+
+  if( seq->vdp_runtime_nr != *(seq->accel_vdpau->current_vdp_runtime_nr) ) {
+    seq->accel_vdpau = accel;
+    if ( seq->forward_ref )
+      seq->forward_ref->free( seq->forward_ref );
+    seq->forward_ref = NULL;
+    if ( seq->backward_ref )
+      seq->backward_ref->free( seq->backward_ref );
+    seq->backward_ref = NULL;
+    vd->decoder = VDP_INVALID_HANDLE;
+  }
+
+  decode_render( vd, accel );
+
+#ifdef MAKE_DAT
+  if ( nframes==0 ) {
+	fwrite( &seq->coded_width, 1, sizeof(seq->coded_width), outfile );
+	fwrite( &seq->coded_height, 1, sizeof(seq->coded_height), outfile );
+	fwrite( &seq->ratio, 1, sizeof(seq->ratio), outfile );
+	fwrite( &seq->profile, 1, sizeof(seq->profile), outfile );
+  }
+
+  if ( nframes++ < 25 ) {
+	fwrite( &pic->vdp_infos, 1, sizeof(pic->vdp_infos), outfile );
+	fwrite( &pic->slices_pos, 1, sizeof(pic->slices_pos), outfile );
+	fwrite( pic->slices, 1, pic->slices_pos, outfile );
+  }
+#endif
+
+  img->drawn = 0;
+  img->pts = seq->seq_pts;
+  seq->seq_pts = 0; /* reset */
+  img->bad_frame = 0;
+  img->duration = seq->video_step;
+
+  /* trying to deal with (french) buggy streams that randomly set bottom_field_first
+     while stream is top_field_first. So we assume that when top_field_first
+     is set one time, the stream _is_ top_field_first. */
+  //printf("pic->vdp_infos.top_field_first = %d\n", pic->vdp_infos.top_field_first);
+  if ( pic->vdp_infos.top_field_first )
+    seq->top_field_first = 1;
+  img->top_field_first = seq->top_field_first;
+
+  /* progressive_frame is unreliable with most mpeg2 streams */
+  if ( pic->vdp_infos.picture_structure!=PICTURE_FRAME )
+    img->progressive_frame = 0;
+  else
+    img->progressive_frame = pic->progressive_frame;
+
+  if ( pic->vdp_infos.picture_coding_type!=B_FRAME ) {
+    if ( pic->vdp_infos.picture_coding_type==I_FRAME && !seq->backward_ref ) {
+      img->pts = 0;
+      img->draw( img, vd->stream );
+      ++img->drawn;
+    }
+    if ( seq->forward_ref ) {
+      seq->forward_ref->drawn = 0;
+      seq->forward_ref->free( seq->forward_ref );
+    }
+    seq->forward_ref = seq->backward_ref;
+    if ( seq->forward_ref && !seq->forward_ref->drawn ) {
+      seq->forward_ref->draw( seq->forward_ref, vd->stream );
+    }
+    seq->backward_ref = img;
+  }
+  else {
+    img->draw( img, vd->stream );
+    img->free( img );
+  }
+}
+
+
+
+/*
+ * This function receives a buffer of data from the demuxer layer and
+ * figures out how to handle it based on its header flags.
+ */
+static void vdpau_mpeg12_decode_data (video_decoder_t *this_gen, buf_element_t *buf)
+{
+  vdpau_mpeg12_decoder_t *this = (vdpau_mpeg12_decoder_t *) this_gen;
+  sequence_t *seq = (sequence_t*)&this->sequence;
+
+  /* preview buffers shall not be decoded and drawn -- use them only to supply stream information */
+  if (buf->decoder_flags & BUF_FLAG_PREVIEW)
+    return;
+
+  if ( !buf->size )
+    return;
+
+  seq->cur_pts = buf->pts;
+
+  int size = seq->bufpos+buf->size;
+  if ( seq->bufsize < size ) {
+    seq->bufsize = size+1024;
+    seq->buf = realloc( seq->buf, seq->bufsize );
+  }
+  xine_fast_memcpy( seq->buf+seq->bufpos, buf->content, buf->size );
+  seq->bufpos += buf->size;
+
+  while ( seq->bufseek <= seq->bufpos-4 ) {
+    uint8_t *buffer = seq->buf+seq->bufseek;
+    if ( buffer[0]==0 && buffer[1]==0 && buffer[2]==1 ) {
+      if ( seq->start<0 ) {
+        seq->start = seq->bufseek;
+      }
+      else {
+        if ( parse_code( this, seq->buf+seq->start, seq->bufseek-seq->start ) ) {
+          decode_picture( this );
+          parse_code( this, seq->buf+seq->start, seq->bufseek-seq->start );
+        }
+        uint8_t *tmp = (uint8_t*)malloc(seq->bufsize);
+        xine_fast_memcpy( tmp, seq->buf+seq->bufseek, seq->bufpos-seq->bufseek );
+        seq->bufpos -= seq->bufseek;
+        seq->start = -1;
+        seq->bufseek = -1;
+        free( seq->buf );
+        seq->buf = tmp;
+      }
+    }
+    ++seq->bufseek;
+  }
+
+  /* still image detection -- don't wait for further data if buffer ends in sequence end code */
+  if (seq->start >= 0 && seq->buf[seq->start + 3] == sequence_end_code) {
+    if (parse_code(this, seq->buf+seq->start, 4)) {
+      decode_picture(this);
+      parse_code(this, seq->buf+seq->start, 4);
+    }
+    seq->start = -1;
+  }
+}
+
+/*
+ * This function is called when xine needs to flush the system.
+ */
+static void vdpau_mpeg12_flush (video_decoder_t *this_gen) {
+  vdpau_mpeg12_decoder_t *this = (vdpau_mpeg12_decoder_t *) this_gen;
+
+  lprintf( "vdpau_mpeg12_flush\n" );
+}
+
+/*
+ * This function resets the video decoder.
+ */
+static void vdpau_mpeg12_reset (video_decoder_t *this_gen) {
+  vdpau_mpeg12_decoder_t *this = (vdpau_mpeg12_decoder_t *) this_gen;
+
+  lprintf( "vdpau_mpeg12_reset\n" );
+  reset_sequence( &this->sequence, 1 );
+}
+
+/*
+ * The decoder should forget any stored pts values here.
+ */
+static void vdpau_mpeg12_discontinuity (video_decoder_t *this_gen) {
+  vdpau_mpeg12_decoder_t *this = (vdpau_mpeg12_decoder_t *) this_gen;
+
+  lprintf( "vdpau_mpeg12_discontinuity\n" );
+  reset_sequence( &this->sequence, 0 );
+}
+
+/*
+ * This function frees the video decoder instance allocated to the decoder.
+ */
+static void vdpau_mpeg12_dispose (video_decoder_t *this_gen) {
+
+  vdpau_mpeg12_decoder_t *this = (vdpau_mpeg12_decoder_t *) this_gen;
+
+  lprintf( "vdpau_mpeg12_dispose\n" );
+
+  if ( this->decoder!=VDP_INVALID_HANDLE && this->sequence.accel_vdpau ) {
+      this->sequence.accel_vdpau->vdp_decoder_destroy( this->decoder );
+      this->decoder = VDP_INVALID_HANDLE;
+    }
+
+  free_sequence( &this->sequence );
+
+  this->stream->video_out->close( this->stream->video_out, this->stream );
+
+  free( this->sequence.picture.slices );
+  free( this->sequence.buf );
+  free( this_gen );
+}
+
+/*
+ * This function allocates, initializes, and returns a private video
+ * decoder structure.
+ */
+static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, xine_stream_t *stream) {
+
+  vdpau_mpeg12_decoder_t  *this ;
+
+  lprintf( "open_plugin\n" );
+
+  /* the videoout must be vdpau-capable to support this decoder */
+  if ( !(stream->video_driver->get_capabilities(stream->video_driver) & VO_CAP_VDPAU_MPEG12) )
+    return NULL;
+
+  /* now check if vdpau has free decoder resource */
+  vo_frame_t *img = stream->video_out->get_frame( stream->video_out, 1920, 1080, 1, XINE_IMGFMT_VDPAU, VO_BOTH_FIELDS );
+  vdpau_accel_t *accel = (vdpau_accel_t*)img->accel_data;
+  int runtime_nr = accel->vdp_runtime_nr;
+  img->free(img);
+  VdpDecoder decoder;
+  VdpStatus st = accel->vdp_decoder_create( accel->vdp_device, VDP_DECODER_PROFILE_MPEG2_MAIN, 1920, 1080, 2, &decoder );
+  if ( st!=VDP_STATUS_OK ) {
+    lprintf( "can't create vdpau decoder.\n" );
+    return 1;
+  }
+
+  accel->vdp_decoder_destroy( decoder );
+
+  this = (vdpau_mpeg12_decoder_t *) calloc(1, sizeof(vdpau_mpeg12_decoder_t));
+
+  this->video_decoder.decode_data         = vdpau_mpeg12_decode_data;
+  this->video_decoder.flush               = vdpau_mpeg12_flush;
+  this->video_decoder.reset               = vdpau_mpeg12_reset;
+  this->video_decoder.discontinuity       = vdpau_mpeg12_discontinuity;
+  this->video_decoder.dispose             = vdpau_mpeg12_dispose;
+
+  this->stream                            = stream;
+  this->class                             = (vdpau_mpeg12_class_t *) class_gen;
+
+  this->sequence.bufsize = 1024;
+  this->sequence.buf = (uint8_t*)malloc(this->sequence.bufsize);
+  this->sequence.forward_ref = 0;
+  this->sequence.backward_ref = 0;
+  this->sequence.vdp_runtime_nr = runtime_nr;
+  free_sequence( &this->sequence );
+  this->sequence.ratio = 1;
+
+  init_picture( &this->sequence.picture );
+
+  this->decoder = VDP_INVALID_HANDLE;
+  this->sequence.accel_vdpau = NULL;
+
+  (stream->video_out->open)(stream->video_out, stream);
+
+#ifdef MAKE_DAT
+  outfile = fopen( "/tmp/mpg.dat","w");
+  nframes = 0;
+#endif
+
+  return &this->video_decoder;
+}
+
+/*
+ * This function returns a brief string that describes (usually with the
+ * decoder's most basic name) the video decoder plugin.
+ */
+static char *get_identifier (video_decoder_class_t *this) {
+  return "vdpau_mpeg12";
+}
+
+/*
+ * This function returns a slightly longer string describing the video
+ * decoder plugin.
+ */
+static char *get_description (video_decoder_class_t *this) {
+  return "vdpau_mpeg12: mpeg1/2 decoder plugin using VDPAU hardware decoding.\n"
+    "Must be used along with video_out_vdpau.";
+}
+
+/*
+ * This function frees the video decoder class and any other memory that was
+ * allocated.
+ */
+static void dispose_class (video_decoder_class_t *this) {
+  free (this);
+}
+
+/*
+ * This function allocates a private video decoder class and initializes
+ * the class's member functions.
+ */
+static void *init_plugin (xine_t *xine, void *data) {
+
+  vdpau_mpeg12_class_t *this;
+
+  this = (vdpau_mpeg12_class_t *) calloc(1, sizeof(vdpau_mpeg12_class_t));
+
+  this->decoder_class.open_plugin     = open_plugin;
+  this->decoder_class.get_identifier  = get_identifier;
+  this->decoder_class.get_description = get_description;
+  this->decoder_class.dispose         = dispose_class;
+
+  return this;
+}
+
+/*
+ * This is a list of all of the internal xine video buffer types that
+ * this decoder is able to handle. Check src/xine-engine/buffer.h for a
+ * list of valid buffer types (and add a new one if the one you need does
+ * not exist). Terminate the list with a 0.
+ */
+static const uint32_t video_types[] = {
+  BUF_VIDEO_MPEG,
+  0
+};
+
+/*
+ * This data structure combines the list of supported xine buffer types and
+ * the priority that the plugin should be given with respect to other
+ * plugins that handle the same buffer type. A plugin with priority (n+1)
+ * will be used instead of a plugin with priority (n).
+ */
+static const decoder_info_t dec_info_video = {
+  video_types,         /* supported types */
+  8                    /* priority        */
+};
+
+/*
+ * The plugin catalog entry. This is the only information that this plugin
+ * will export to the public.
+ */
+const plugin_info_t xine_plugin_info[] EXPORTED = {
+  /* { type, API, "name", version, special_info, init_function } */
+  { PLUGIN_VIDEO_DECODER, 18, "vdpau_mpeg12", XINE_VERSION_CODE, &dec_info_video, init_plugin },
+  { PLUGIN_NONE, 0, "", 0, NULL, NULL }
+};
diff --git a/src/libvdpau/vdpau_vc1.c b/src/libvdpau/vdpau_vc1.c
new file mode 100644
index 000000000..2ffd9a2e2
--- /dev/null
+++ b/src/libvdpau/vdpau_vc1.c
@@ -0,0 +1,1193 @@
+/*
+ * Copyright (C) 2008 the xine project
+ * Copyright (C) 2008 Christophe Thommeret <hftom@free.fr>
+ *
+ * This file is part of xine, a free video player.
+ *
+ * xine is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * xine is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
+ *
+ * vdpau_vc1.c, a vc1 video stream parser using VDPAU hardware decoder
+ *
+ */
+
+/*#define LOG*/
+#define LOG_MODULE "vdpau_vc1"
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "xine_internal.h"
+#include "video_out.h"
+#include "buffer.h"
+#include "xineutils.h"
+#include "accel_vdpau.h"
+#include "bits_reader.h"
+
+#include <vdpau/vdpau.h>
+
+#define sequence_header_code    0x0f
+#define sequence_end_code       0x0a
+#define entry_point_code        0x0e
+#define frame_start_code        0x0d
+#define field_start_code        0x0c
+#define slice_start_code        0x0b
+
+#define PICTURE_FRAME            0
+#define PICTURE_FRAME_INTERLACE  2
+#define PICTURE_FIELD_INTERLACE  3
+
+#define I_FRAME   0
+#define P_FRAME   1
+#define B_FRAME   3
+#define BI_FRAME  4
+
+#define FIELDS_I_I    0
+#define FIELDS_I_P    1
+#define FIELDS_P_I    2
+#define FIELDS_P_P    3
+#define FIELDS_B_B    4
+#define FIELDS_B_BI   5
+#define FIELDS_BI_B   6
+#define FIELDS_BI_BI  7
+
+#define MODE_STARTCODE  0
+#define MODE_FRAME      1
+
+/*#define MAKE_DAT*/ /*do NOT define this, unless you know what you do */
+#ifdef MAKE_DAT
+static int nframes;
+static FILE *outfile;
+#endif
+
+
+
+const double aspect_ratio[] = {
+  0.0,
+  1.0,
+  12./11.,
+  10./11.,
+  16./11.,
+  40./33.,
+  24./11.,
+  20./11.,
+  32./11.,
+  80./33.,
+  18./11.,
+  15./11.,
+  64./33.,
+  160./99.
+};
+
+
+
+typedef struct {
+  VdpPictureInfoVC1       vdp_infos;
+  int                     slices;
+  int                     fptype;
+  int                     field;
+  int                     header_size;
+  int                     hrd_param_flag;
+  int                     hrd_num_leaky_buckets;
+  int                     repeat_first_field;
+  int                     top_field_first;
+  int                     skipped;
+} picture_t;
+
+
+
+typedef struct {
+  uint32_t    coded_width;
+  uint32_t    coded_height;
+
+  uint64_t    video_step; /* frame duration in pts units */
+  double      ratio;
+  VdpDecoderProfile profile;
+
+  int         mode;
+  int         have_header;
+
+  uint8_t     *buf; /* accumulate data */
+  int         bufseek;
+  int         start;
+  int         code_start, current_code;
+  uint32_t    bufsize;
+  uint32_t    bufpos;
+
+  picture_t   picture;
+  vo_frame_t  *forward_ref;
+  vo_frame_t  *backward_ref;
+
+  int64_t    seq_pts;
+  int64_t    cur_pts;
+
+  vdpau_accel_t *accel_vdpau;
+
+  bits_reader_t br;
+
+  int         vdp_runtime_nr;
+
+} sequence_t;
+
+
+
+typedef struct {
+  video_decoder_class_t   decoder_class;
+} vdpau_vc1_class_t;
+
+
+
+typedef struct vdpau_vc1_decoder_s {
+  video_decoder_t         video_decoder;  /* parent video decoder structure */
+
+  vdpau_vc1_class_t    *class;
+  xine_stream_t           *stream;
+
+  sequence_t              sequence;
+
+  VdpDecoder              decoder;
+  VdpDecoderProfile       decoder_profile;
+  uint32_t                decoder_width;
+  uint32_t                decoder_height;
+
+} vdpau_vc1_decoder_t;
+
+
+
+static void init_picture( picture_t *pic )
+{
+  memset( pic, 0, sizeof( picture_t ) );
+}
+
+
+
+static void reset_picture( picture_t *pic )
+{
+  pic->slices = 1;
+}
+
+
+
+static void reset_sequence( sequence_t *sequence )
+{
+  lprintf( "reset_sequence\n" );
+  sequence->bufpos = 0;
+  sequence->bufseek = 0;
+  sequence->start = -1;
+  sequence->code_start = sequence->current_code = 0;
+  sequence->seq_pts = sequence->cur_pts = 0;
+  if ( sequence->forward_ref )
+    sequence->forward_ref->free( sequence->forward_ref );
+  sequence->forward_ref = NULL;
+  if ( sequence->backward_ref )
+    sequence->backward_ref->free( sequence->backward_ref );
+  sequence->backward_ref = NULL;
+  reset_picture( &sequence->picture );
+}
+
+
+
+static void init_sequence( sequence_t *sequence )
+{
+  lprintf( "init_sequence\n" );
+  sequence->have_header = 0;
+  sequence->profile = VDP_DECODER_PROFILE_VC1_SIMPLE;
+  sequence->ratio = 0;
+  sequence->video_step = 0;
+  sequence->picture.hrd_param_flag = 0;
+  reset_sequence( sequence );
+}
+
+
+
+static void update_metadata( vdpau_vc1_decoder_t *this_gen )
+{
+  sequence_t *sequence = (sequence_t*)&this_gen->sequence;
+
+  if ( !sequence->have_header ) {
+    sequence->have_header = 1;
+    _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_VIDEO_WIDTH, sequence->coded_width );
+    _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_VIDEO_HEIGHT, sequence->coded_height );
+    _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_VIDEO_RATIO, ((double)10000*sequence->ratio) );
+    _x_stream_info_set( this_gen->stream, XINE_STREAM_INFO_FRAME_DURATION, sequence->video_step );
+    _x_meta_info_set_utf8( this_gen->stream, XINE_META_INFO_VIDEOCODEC, "VC1/WMV9 (vdpau)" );
+    xine_event_t event;
+    xine_format_change_data_t data;
+    event.type = XINE_EVENT_FRAME_FORMAT_CHANGE;
+    event.stream = this_gen->stream;
+    event.data = &data;
+    event.data_length = sizeof(data);
+    data.width = sequence->coded_width;
+    data.height = sequence->coded_height;
+    data.aspect = sequence->ratio;
+    xine_event_send( this_gen->stream, &event );
+  }
+}
+
+
+
+static void sequence_header_advanced( vdpau_vc1_decoder_t *this_gen, uint8_t *buf, int len )
+{
+  lprintf( "sequence_header_advanced\n" );
+  sequence_t *sequence = (sequence_t*)&this_gen->sequence;
+
+  if ( len < 5 )
+    return;
+
+  sequence->profile = VDP_DECODER_PROFILE_VC1_ADVANCED;
+  lprintf("VDP_DECODER_PROFILE_VC1_ADVANCED\n");
+  bits_reader_set( &sequence->br, buf );
+  read_bits( &sequence->br, 15 );
+  sequence->picture.vdp_infos.postprocflag = read_bits( &sequence->br, 1 );
+  sequence->coded_width = read_bits( &sequence->br, 12 )<<1;
+  sequence->coded_height = (read_bits( &sequence->br, 12 )+1)<<1;
+  sequence->picture.vdp_infos.pulldown = read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.interlace = read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.tfcntrflag = read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.finterpflag = read_bits( &sequence->br, 1 );
+  read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.psf = read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.maxbframes = 7;
+  if ( read_bits( &sequence->br, 1 ) ) {
+    double w, h;
+    int ar=0;
+    w = read_bits( &sequence->br, 14 )+1;
+    h = read_bits( &sequence->br, 14 )+1;
+    if ( read_bits( &sequence->br, 1 ) ) {
+      ar = read_bits( &sequence->br, 4 );
+    }
+    if ( ar==15 ) {
+      w = read_bits( &sequence->br, 8 );
+      h = read_bits( &sequence->br, 8 );
+      sequence->ratio = w/h;
+      lprintf("aspect_ratio (w/h) = %f\n", sequence->ratio);
+    }
+    else if ( ar && ar<14 ) {
+      sequence->ratio = sequence->coded_width*aspect_ratio[ar]/sequence->coded_height;
+      lprintf("aspect_ratio = %f\n", sequence->ratio);
+    }
+
+    if ( read_bits( &sequence->br, 1 ) ) {
+      if ( read_bits( &sequence->br, 1 ) ) {
+        int exp = read_bits( &sequence->br, 16 );
+        lprintf("framerate exp = %d\n", exp);
+      }
+      else {
+        double nr = read_bits( &sequence->br, 8 );
+        switch ((int)nr) {
+          case 1: nr = 24000; break;
+          case 2: nr = 25000; break;
+          case 3: nr = 30000; break;
+          case 4: nr = 50000; break;
+          case 5: nr = 60000; break;
+          default: nr = 0;
+        }
+        double dr = read_bits( &sequence->br, 4 );
+        switch ((int)dr) {
+          case 2: dr = 1001; break;
+          default: dr = 1000;
+        }
+        sequence->video_step = 90000/(nr/dr);
+        lprintf("framerate = %f video_step = %d\n", nr/dr, sequence->video_step);
+      }
+    }
+    if ( read_bits( &sequence->br, 1 ) ) {
+	  int col = read_bits( &sequence->br, 8 );
+      lprintf("color_standard = %d\n", col);
+      read_bits( &sequence->br, 16 );
+    }
+  }
+  sequence->picture.hrd_param_flag = read_bits( &sequence->br, 1 );
+  if ( sequence->picture.hrd_param_flag )
+    sequence->picture.hrd_num_leaky_buckets = read_bits( &sequence->br, 5 );
+
+  update_metadata( this_gen );
+}
+
+
+
+static void sequence_header( vdpau_vc1_decoder_t *this_gen, uint8_t *buf, int len )
+{
+  lprintf( "sequence_header\n" );
+  sequence_t *sequence = (sequence_t*)&this_gen->sequence;
+
+  if ( len < 4 )
+    return;
+
+  bits_reader_set( &sequence->br, buf );
+  switch ( read_bits( &sequence->br, 2 ) ) {
+    case 0: sequence->profile = VDP_DECODER_PROFILE_VC1_SIMPLE; lprintf("VDP_DECODER_PROFILE_VC1_SIMPLE\n"); break;
+    case 1: sequence->profile = VDP_DECODER_PROFILE_VC1_MAIN; lprintf("VDP_DECODER_PROFILE_VC1_MAIN\n"); break;
+    case 2: sequence->profile = VDP_DECODER_PROFILE_VC1_MAIN; lprintf("vc1_complex profile not supported by vdpau, trying vc1_main.\n"); break;
+    case 3: return sequence_header_advanced( this_gen, buf, len ); break;
+    default: return; /* illegal value, broken header? */
+  }
+  read_bits( &sequence->br, 10 );
+  sequence->picture.vdp_infos.loopfilter = read_bits( &sequence->br, 1 );
+  read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.multires = read_bits( &sequence->br, 1 );
+  read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.fastuvmc = read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.extended_mv = read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.dquant = read_bits( &sequence->br, 2 );
+  sequence->picture.vdp_infos.vstransform = read_bits( &sequence->br, 1 );
+  read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.overlap = read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.syncmarker = read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.rangered = read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.maxbframes = read_bits( &sequence->br, 3 );
+  sequence->picture.vdp_infos.quantizer = read_bits( &sequence->br, 2 );
+  sequence->picture.vdp_infos.finterpflag = read_bits( &sequence->br, 1 );
+
+  update_metadata( this_gen );
+}
+
+
+
+static void entry_point( vdpau_vc1_decoder_t *this_gen, uint8_t *buf, int len )
+{
+  lprintf( "entry_point\n" );
+  sequence_t *sequence = (sequence_t*)&this_gen->sequence;
+
+  bits_reader_set( &sequence->br, buf );
+  read_bits( &sequence->br, 2 );
+  sequence->picture.vdp_infos.panscan_flag = read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.refdist_flag = read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.loopfilter = read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.fastuvmc = read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.extended_mv = read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.dquant = read_bits( &sequence->br, 2 );
+  sequence->picture.vdp_infos.vstransform = read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.overlap = read_bits( &sequence->br, 1 );
+  sequence->picture.vdp_infos.quantizer = read_bits( &sequence->br, 2 );
+
+  if ( sequence->picture.hrd_param_flag ) {
+    int i;
+    for ( i=0; i<sequence->picture.hrd_num_leaky_buckets; ++i )
+      read_bits( &sequence->br, 8 );
+  }
+
+  if ( read_bits( &sequence->br, 1 ) ) {
+    sequence->coded_width = (read_bits( &sequence->br, 12 )+1)<<1;
+    sequence->coded_height = (read_bits( &sequence->br, 12 )+1)<<1;
+  }
+
+  if ( sequence->picture.vdp_infos.extended_mv )
+    sequence->picture.vdp_infos.extended_dmv = read_bits( &sequence->br, 1 );
+
+  sequence->picture.vdp_infos.range_mapy_flag = read_bits( &sequence->br, 1 );
+  if ( sequence->picture.vdp_infos.range_mapy_flag ) {
+    sequence->picture.vdp_infos.range_mapy = read_bits( &sequence->br, 3 );
+  }
+  sequence->picture.vdp_infos.range_mapuv_flag = read_bits( &sequence->br, 1 );
+  if ( sequence->picture.vdp_infos.range_mapuv_flag ) {
+    sequence->picture.vdp_infos.range_mapuv = read_bits( &sequence->br, 3 );
+  }
+}
+
+
+
+static void picture_header( vdpau_vc1_decoder_t *this_gen, uint8_t *buf, int len )
+{
+  sequence_t *sequence = (sequence_t*)&this_gen->sequence;
+  picture_t *pic = (picture_t*)&sequence->picture;
+  VdpPictureInfoVC1 *info = &(sequence->picture.vdp_infos);
+  int tmp;
+
+  lprintf("picture_header\n");
+
+  bits_reader_set( &sequence->br, buf );
+  read_bits( &sequence->br, 2 );
+
+  if ( info->finterpflag )
+    read_bits( &sequence->br, 1 );
+  if ( info->rangered ) {
+    /*info->rangered &= ~2;
+    info->rangered |= get_bits( buf,off++,1 ) << 1;*/
+    info->rangered = (read_bits( &sequence->br, 1 ) << 1) +1;
+  }
+  if ( !info->maxbframes ) {
+    if ( read_bits( &sequence->br, 1 ) )
+      info->picture_type = P_FRAME;
+    else
+      info->picture_type = I_FRAME;
+  }
+  else {
+    if ( read_bits( &sequence->br, 1 ) )
+      info->picture_type = P_FRAME;
+    else {
+      if ( read_bits( &sequence->br, 1 ) )
+        info->picture_type = I_FRAME;
+      else
+        info->picture_type = B_FRAME;
+    }
+  }
+  if ( info->picture_type == B_FRAME ) {
+    tmp = read_bits( &sequence->br, 3 );
+    if ( tmp==7 ) {
+      tmp = (tmp<<4) | read_bits( &sequence->br, 4 );
+      if ( tmp==127 )
+        info->picture_type = BI_FRAME;
+    }
+  }
+}
+
+
+
+static void picture_header_advanced( vdpau_vc1_decoder_t *this_gen, uint8_t *buf, int len )
+{
+  sequence_t *sequence = (sequence_t*)&this_gen->sequence;
+  picture_t *pic = (picture_t*)&sequence->picture;
+  VdpPictureInfoVC1 *info = &(sequence->picture.vdp_infos);
+
+  lprintf("picture_header_advanced\n");
+
+  bits_reader_set( &sequence->br, buf );
+
+  if ( info->interlace ) {
+    lprintf("frame->interlace=1\n");
+    if ( !read_bits( &sequence->br, 1 ) ) {
+      lprintf("progressive frame\n");
+      info->frame_coding_mode = PICTURE_FRAME;
+    }
+    else {
+      if ( !read_bits( &sequence->br, 1 ) ) {
+        lprintf("frame interlaced\n");
+        info->frame_coding_mode = PICTURE_FRAME_INTERLACE;
+      }
+      else {
+        lprintf("field interlaced\n");
+        info->frame_coding_mode = PICTURE_FIELD_INTERLACE;
+      }
+    }
+  }
+  if ( info->interlace && info->frame_coding_mode == PICTURE_FIELD_INTERLACE ) {
+    pic->fptype = read_bits( &sequence->br, 3 );
+    switch ( pic->fptype ) {
+      case FIELDS_I_I:
+      case FIELDS_I_P:
+        info->picture_type = I_FRAME; break;
+      case FIELDS_P_I:
+      case FIELDS_P_P:
+        info->picture_type = P_FRAME; break;
+      case FIELDS_B_B:
+      case FIELDS_B_BI:
+        info->picture_type = B_FRAME; break;
+      default:
+        info->picture_type = BI_FRAME;
+    }
+  }
+  else {
+    if ( !read_bits( &sequence->br, 1 ) )
+      info->picture_type = P_FRAME;
+    else {
+      if ( !read_bits( &sequence->br, 1 ) )
+        info->picture_type = B_FRAME;
+      else {
+        if ( !read_bits( &sequence->br, 1 ) )
+          info->picture_type = I_FRAME;
+        else {
+          if ( !read_bits( &sequence->br, 1 ) )
+            info->picture_type = BI_FRAME;
+          else {
+            info->picture_type = P_FRAME;
+            pic->skipped = 1;
+          }
+        }
+      }
+    }
+  }
+  if ( info->tfcntrflag ) {
+    lprintf("tfcntrflag=1\n");
+    read_bits( &sequence->br, 8 );
+  }
+  if ( info->pulldown && info->interlace ) {
+    pic->top_field_first = read_bits( &sequence->br, 1 );
+    pic->repeat_first_field = read_bits( &sequence->br, 1 );
+  }
+}
+
+
+
+static void parse_header( vdpau_vc1_decoder_t *this_gen, uint8_t *buf, int len )
+{
+  sequence_t *sequence = (sequence_t*)&this_gen->sequence;
+  int off=0;
+
+  while ( off < (len-4) ) {
+    uint8_t *buffer = buf+off;
+    if ( buffer[0]==0 && buffer[1]==0 && buffer[2]==1 ) {
+      switch ( buffer[3] ) {
+        case sequence_header_code: sequence_header( this_gen, buf+off+4, len-off-4 ); break;
+        case entry_point_code: entry_point( this_gen, buf+off+4, len-off-4 ); break;
+      }
+    }
+    ++off;
+  }
+  if ( !sequence->have_header )
+    sequence_header( this_gen, buf, len );
+}
+
+
+
+static void remove_emulation_prevention( uint8_t *src, uint8_t *dst, int src_len, int *dst_len )
+{
+  int i;
+  int len = 0;
+  int removed = 0;
+
+  for ( i=0; i<src_len-3; ++i ) {
+    if ( src[i]==0 && src[i+1]==0 && src[i+2]==3 ) {
+      lprintf("removed emulation prevention byte\n");
+      dst[len++] = src[i];
+      dst[len++] = src[i+1];
+      i += 2;
+      ++removed;
+    }
+    else {
+      memcpy( dst+len, src+i, 4 );
+      ++len;
+    }
+  }
+  for ( ; i<src_len; ++i )
+    dst[len++] = src[i];
+  *dst_len = src_len-removed;
+}
+
+
+
+static int parse_code( vdpau_vc1_decoder_t *this_gen, uint8_t *buf, int len )
+{
+  sequence_t *sequence = (sequence_t*)&this_gen->sequence;
+
+  if ( !sequence->have_header && buf[3]!=sequence_header_code )
+    return 0;
+
+  if ( sequence->code_start == frame_start_code ) {
+    if ( sequence->current_code==field_start_code || sequence->current_code==slice_start_code ) {
+	  sequence->picture.slices++;
+      return -1;
+	}
+    return 1; /* frame complete, decode */
+  }
+
+  switch ( buf[3] ) {
+    int dst_len;
+    uint8_t *tmp;
+    case sequence_header_code:
+      lprintf("sequence_header_code\n");
+      tmp = malloc( len );
+      remove_emulation_prevention( buf, tmp, len, &dst_len );
+      sequence_header( this_gen, tmp+4, dst_len-4 );
+      free( tmp );
+      break;
+    case entry_point_code:
+      lprintf("entry_point_code\n");
+      tmp = malloc( len );
+      remove_emulation_prevention( buf, tmp, len, &dst_len );
+      entry_point( this_gen, tmp+4, dst_len-4 );
+      free( tmp );
+      break;
+    case sequence_end_code:
+      lprintf("sequence_end_code\n");
+      break;
+    case frame_start_code:
+      lprintf("frame_start_code, len=%d\n", len);
+      break;
+    case field_start_code:
+      lprintf("field_start_code\n");
+      break;
+    case slice_start_code:
+      lprintf("slice_start_code, len=%d\n", len);
+      break;
+  }
+  return 0;
+}
+
+
+
+static void duplicate_image( vdpau_vc1_decoder_t *vd, vo_frame_t *dst )
+{
+  lprintf("duplicate_image\n");
+  sequence_t *seq = (sequence_t*)&vd->sequence;
+  picture_t *pic = (picture_t*)&seq->picture;
+
+  if ( !seq->backward_ref ) /* Should not happen! */
+    return;
+
+  dst->proc_duplicate_frame_data( dst, seq->backward_ref );
+}
+
+
+
+static void decode_render( vdpau_vc1_decoder_t *vd, vdpau_accel_t *accel, uint8_t *buf, int len )
+{
+  sequence_t *seq = (sequence_t*)&vd->sequence;
+  picture_t *pic = (picture_t*)&seq->picture;
+
+  VdpStatus st;
+  if ( vd->decoder==VDP_INVALID_HANDLE || vd->decoder_profile!=seq->profile || vd->decoder_width!=seq->coded_width || vd->decoder_height!=seq->coded_height ) {
+    if ( vd->decoder!=VDP_INVALID_HANDLE ) {
+      accel->vdp_decoder_destroy( vd->decoder );
+      vd->decoder = VDP_INVALID_HANDLE;
+    }
+    st = accel->vdp_decoder_create( accel->vdp_device, seq->profile, seq->coded_width, seq->coded_height, 2, &vd->decoder);
+    if ( st!=VDP_STATUS_OK )
+      printf( "vdpau_vc1: failed to create decoder !! %s\n", accel->vdp_get_error_string( st ) );
+    else {
+      lprintf( "decoder created.\n" );
+      vd->decoder_profile = seq->profile;
+      vd->decoder_width = seq->coded_width;
+      vd->decoder_height = seq->coded_height;
+      seq->vdp_runtime_nr = accel->vdp_runtime_nr;
+    }
+  }
+
+  VdpBitstreamBuffer vbit;
+  vbit.struct_version = VDP_BITSTREAM_BUFFER_VERSION;
+  vbit.bitstream = buf;
+  vbit.bitstream_bytes = len;
+  if ( pic->field )
+    vbit.bitstream_bytes = pic->field;
+  st = accel->vdp_decoder_render( vd->decoder, accel->surface, (VdpPictureInfo*)&pic->vdp_infos, 1, &vbit );
+  if ( st!=VDP_STATUS_OK )
+    printf( "vdpau_vc1: decoder failed : %d!! %s\n", st, accel->vdp_get_error_string( st ) );
+  else {
+    lprintf( "DECODER SUCCESS : slices=%d, slices_bytes=%d, current=%d, forwref:%d, backref:%d, pts:%lld\n",
+              pic->vdp_infos.slice_count, vbit.bitstream_bytes, accel->surface, pic->vdp_infos.forward_reference, pic->vdp_infos.backward_reference, seq->seq_pts );
+  }
+  VdpPictureInfoVC1 *info = &(seq->picture.vdp_infos);
+  lprintf("%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n", info->slice_count, info->picture_type, info->frame_coding_mode,
+           info->postprocflag, info->pulldown, info->interlace, info->tfcntrflag, info->finterpflag, info->psf, info->dquant, info->panscan_flag, info->refdist_flag,
+           info->quantizer, info->extended_mv, info->extended_dmv, info->overlap, info->vstransform, info->loopfilter, info->fastuvmc, info->range_mapy_flag, info->range_mapy,
+           info->range_mapuv_flag, info->range_mapuv, info->multires, info->syncmarker, info->rangered, info->maxbframes, info->deblockEnable, info->pquant );
+
+  if ( pic->field ) {
+    int old_type = pic->vdp_infos.picture_type;
+    switch ( pic->fptype ) {
+      case FIELDS_I_I:
+      case FIELDS_P_I:
+        pic->vdp_infos.picture_type = I_FRAME;
+        pic->vdp_infos.backward_reference = VDP_INVALID_HANDLE;
+        pic->vdp_infos.forward_reference = VDP_INVALID_HANDLE;
+        break;
+      case FIELDS_I_P:
+      case FIELDS_P_P:
+        pic->vdp_infos.forward_reference = accel->surface;
+        pic->vdp_infos.picture_type = P_FRAME; break;
+      case FIELDS_B_B:
+      case FIELDS_BI_B:
+        pic->vdp_infos.picture_type = B_FRAME;
+        break;
+      default:
+        pic->vdp_infos.picture_type = BI_FRAME;
+    }
+    vbit.bitstream = buf+pic->field+4;
+    vbit.bitstream_bytes = len-pic->field-4;
+    st = accel->vdp_decoder_render( vd->decoder, accel->surface, (VdpPictureInfo*)&pic->vdp_infos, 1, &vbit );
+    if ( st!=VDP_STATUS_OK )
+      printf( "vdpau_vc1: decoder failed : %d!! %s\n", st, accel->vdp_get_error_string( st ) );
+    else {
+      lprintf( "DECODER SUCCESS (second field): slices=%d, slices_bytes=%d, current=%d, forwref:%d, backref:%d, pts:%lld\n",
+                pic->vdp_infos.slice_count, vbit.bitstream_bytes, accel->surface, pic->vdp_infos.forward_reference, pic->vdp_infos.backward_reference, seq->seq_pts );
+    }
+    VdpPictureInfoVC1 *info = &(seq->picture.vdp_infos);
+    lprintf("%d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d %d\n", info->slice_count, info->picture_type, info->frame_coding_mode,
+             info->postprocflag, info->pulldown, info->interlace, info->tfcntrflag, info->finterpflag, info->psf, info->dquant, info->panscan_flag, info->refdist_flag,
+             info->quantizer, info->extended_mv, info->extended_dmv, info->overlap, info->vstransform, info->loopfilter, info->fastuvmc, info->range_mapy_flag, info->range_mapy,
+             info->range_mapuv_flag, info->range_mapuv, info->multires, info->syncmarker, info->rangered, info->maxbframes, info->deblockEnable, info->pquant );
+
+    pic->vdp_infos.picture_type = old_type;
+  }
+}
+
+
+
+static int search_field( vdpau_vc1_decoder_t *vd, uint8_t *buf, int len )
+{
+  int i;
+  lprintf("search_fields, len=%d\n", len);
+  for ( i=0; i<len-4; ++i ) {
+    if ( buf[i]==0 && buf[i+1]==0 && buf[i+2]==1 && buf[i+3]==field_start_code ) {
+      lprintf("found field_start_code at %d\n", i);
+      return i;
+    }
+  }
+  return 0;
+}
+
+
+
+static void decode_picture( vdpau_vc1_decoder_t *vd )
+{
+  sequence_t *seq = (sequence_t*)&vd->sequence;
+  picture_t *pic = (picture_t*)&seq->picture;
+  vdpau_accel_t *ref_accel;
+  int field;
+
+  uint8_t *buf;
+  int len;
+
+  pic->skipped = 0;
+  pic->field = 0;
+
+  if ( seq->mode == MODE_FRAME ) {
+    buf = seq->buf;
+    len = seq->bufpos;
+    if ( seq->profile==VDP_DECODER_PROFILE_VC1_ADVANCED )
+      picture_header_advanced( vd, buf, len );
+    else
+      picture_header( vd, buf, len );
+
+    if ( len < 2 )
+      pic->skipped = 1;
+  }
+  else {
+    seq->picture.vdp_infos.slice_count = seq->picture.slices;
+    buf = seq->buf+seq->start+4;
+    len = seq->bufseek-seq->start-4;
+    if ( seq->profile==VDP_DECODER_PROFILE_VC1_ADVANCED ) {
+      int tmplen = (len>50) ? 50 : len;
+      uint8_t *tmp = malloc( tmplen );
+      remove_emulation_prevention( buf, tmp, tmplen, &tmplen );
+      picture_header_advanced( vd, tmp, tmplen );
+      free( tmp );
+    }
+    else
+      picture_header( vd, buf, len );
+
+    if ( len < 2 )
+      pic->skipped = 1;
+  }
+
+  if ( pic->vdp_infos.interlace && pic->vdp_infos.frame_coding_mode == PICTURE_FIELD_INTERLACE ) {
+    if ( !(field = search_field( vd, buf, len )) )
+      lprintf("error, no fields found!\n");
+    else
+      pic->field = field;
+  }
+
+  pic->vdp_infos.forward_reference = VDP_INVALID_HANDLE;
+  pic->vdp_infos.backward_reference = VDP_INVALID_HANDLE;
+
+  if ( pic->vdp_infos.picture_type==P_FRAME ) {
+    if ( seq->backward_ref ) {
+      ref_accel = (vdpau_accel_t*)seq->backward_ref->accel_data;
+      pic->vdp_infos.forward_reference = ref_accel->surface;
+    }
+    else {
+	  reset_picture( &seq->picture );
+      return;
+	}
+  }
+  else if ( pic->vdp_infos.picture_type>=B_FRAME ) {
+    if ( seq->forward_ref ) {
+      ref_accel = (vdpau_accel_t*)seq->forward_ref->accel_data;
+      pic->vdp_infos.forward_reference = ref_accel->surface;
+    }
+    else {
+	  reset_picture( &seq->picture );
+      return;
+	}
+    if ( seq->backward_ref ) {
+      ref_accel = (vdpau_accel_t*)seq->backward_ref->accel_data;
+      pic->vdp_infos.backward_reference = ref_accel->surface;
+    }
+    else {
+	  reset_picture( &seq->picture );
+      return;
+	}
+  }
+
+  vo_frame_t *img = vd->stream->video_out->get_frame( vd->stream->video_out, seq->coded_width, seq->coded_height,
+                                                      seq->ratio, XINE_IMGFMT_VDPAU, VO_BOTH_FIELDS );
+  vdpau_accel_t *accel = (vdpau_accel_t*)img->accel_data;
+  if ( !seq->accel_vdpau )
+    seq->accel_vdpau = accel;
+
+  if( seq->vdp_runtime_nr != *(seq->accel_vdpau->current_vdp_runtime_nr) ) {
+    seq->accel_vdpau = accel;
+    if ( seq->forward_ref )
+      seq->forward_ref->free( seq->forward_ref );
+    seq->forward_ref = NULL;
+    if ( seq->backward_ref )
+      seq->backward_ref->free( seq->backward_ref );
+    seq->backward_ref = NULL;
+    vd->decoder = VDP_INVALID_HANDLE;
+  }
+
+  if ( pic->skipped )
+    duplicate_image( vd, img );
+  else
+    decode_render( vd, accel, buf, len );
+
+
+#ifdef MAKE_DAT
+  if ( nframes==0 ) {
+	fwrite( &seq->coded_width, 1, sizeof(seq->coded_width), outfile );
+	fwrite( &seq->coded_height, 1, sizeof(seq->coded_height), outfile );
+	fwrite( &seq->ratio, 1, sizeof(seq->ratio), outfile );
+	fwrite( &seq->profile, 1, sizeof(seq->profile), outfile );
+  }
+
+  if ( nframes++ < 25 ) {
+	fwrite( &pic->vdp_infos, 1, sizeof(pic->vdp_infos), outfile );
+	fwrite( &len, 1, sizeof(len), outfile );
+	fwrite( buf, 1, len, outfile );
+	printf( "picture_type = %d\n", pic->vdp_infos.picture_type);
+  }
+#endif
+
+  if ( pic->vdp_infos.interlace && pic->vdp_infos.frame_coding_mode ) {
+    img->progressive_frame = 0;
+    img->top_field_first = pic->top_field_first;
+  }
+  else {
+    img->progressive_frame = 1;
+    img->top_field_first = 1;
+  }
+  img->pts = seq->seq_pts;
+  img->bad_frame = 0;
+  img->duration = seq->video_step;
+  accel->color_standard = VDP_COLOR_STANDARD_ITUR_BT_709;
+
+  if ( pic->vdp_infos.picture_type<B_FRAME ) {
+    if ( pic->vdp_infos.picture_type==I_FRAME && !seq->backward_ref ) {
+      img->pts = 0;
+      img->draw( img, vd->stream );
+      ++img->drawn;
+    }
+    if ( seq->forward_ref ) {
+      seq->forward_ref->drawn = 0;
+      seq->forward_ref->free( seq->forward_ref );
+    }
+    seq->forward_ref = seq->backward_ref;
+    if ( seq->forward_ref && !seq->forward_ref->drawn ) {
+      seq->forward_ref->draw( seq->forward_ref, vd->stream );
+    }
+    seq->backward_ref = img;
+  }
+  else {
+    img->draw( img, vd->stream );
+    img->free( img );
+  }
+
+  seq->seq_pts +=seq->video_step;
+
+  reset_picture( &seq->picture );
+}
+
+
+
+/*
+ * This function receives a buffer of data from the demuxer layer and
+ * figures out how to handle it based on its header flags.
+ */
+static void vdpau_vc1_decode_data (video_decoder_t *this_gen, buf_element_t *buf)
+{
+  vdpau_vc1_decoder_t *this = (vdpau_vc1_decoder_t *) this_gen;
+  sequence_t *seq = (sequence_t*)&this->sequence;
+
+  /* a video decoder does not care about this flag (?) */
+  if (buf->decoder_flags & BUF_FLAG_PREVIEW) {
+    lprintf("BUF_FLAG_PREVIEW\n");
+  }
+
+  if (buf->decoder_flags & BUF_FLAG_FRAMERATE) {
+    lprintf("BUF_FLAG_FRAMERATE=%d\n", buf->decoder_info[0]);
+    if ( buf->decoder_info[0] > 0 ) {
+      this->sequence.video_step = buf->decoder_info[0];
+      _x_stream_info_set(this->stream, XINE_STREAM_INFO_FRAME_DURATION, this->sequence.video_step);
+    }
+  }
+
+  if (buf->decoder_flags & BUF_FLAG_HEADER) {
+    lprintf("BUF_FLAG_HEADER\n");
+  }
+
+  if (buf->decoder_flags & BUF_FLAG_ASPECT) {
+    lprintf("BUF_FLAG_ASPECT\n");
+    seq->ratio = (double)buf->decoder_info[1]/(double)buf->decoder_info[2];
+    lprintf("arx=%d ary=%d ratio=%f\n", buf->decoder_info[1], buf->decoder_info[2], seq->ratio);
+  }
+
+  if (buf->decoder_flags & BUF_FLAG_FRAME_START) {
+    lprintf("BUF_FLAG_FRAME_START\n");
+    seq->seq_pts = buf->pts;
+  }
+
+  if ( !buf->size )
+    return;
+
+  seq->cur_pts = buf->pts;
+
+  if (buf->decoder_flags & BUF_FLAG_STDHEADER) {
+    lprintf("BUF_FLAG_STDHEADER\n");
+    xine_bmiheader *bih = (xine_bmiheader *) buf->content;
+    int bs = sizeof( xine_bmiheader );
+    seq->coded_width = bih->biWidth;
+    seq->coded_height = bih->biHeight;
+    lprintf( "width=%d height=%d\n", bih->biWidth, bih->biHeight );
+    if ( buf->size > bs ) {
+      seq->mode = MODE_FRAME;
+      parse_header( this, buf->content+bs, buf->size-bs );
+    }
+    return;
+  }
+
+  int size = seq->bufpos+buf->size;
+  if ( seq->bufsize < size ) {
+    seq->bufsize = size+10000;
+    seq->buf = realloc( seq->buf, seq->bufsize );
+    lprintf("sequence buffer realloced = %d\n", seq->bufsize );
+  }
+  xine_fast_memcpy( seq->buf+seq->bufpos, buf->content, buf->size );
+  seq->bufpos += buf->size;
+
+  if ( seq->mode == MODE_FRAME ) {
+    if (buf->decoder_flags & BUF_FLAG_FRAME_END) {
+      lprintf("BUF_FLAG_FRAME_END\n");
+      seq->picture.vdp_infos.slice_count = 1;
+      decode_picture( this );
+      seq->bufpos = 0;
+    }
+  }
+  else {
+    int res;
+    while ( seq->bufseek <= seq->bufpos-4 ) {
+      uint8_t *buffer = seq->buf+seq->bufseek;
+      if ( buffer[0]==0 && buffer[1]==0 && buffer[2]==1 ) {
+		seq->current_code = buffer[3];
+		lprintf("current_code = %d\n", seq->current_code);
+        if ( seq->start<0 ) {
+          seq->start = seq->bufseek;
+          seq->code_start = buffer[3];
+		  lprintf("code_start = %d\n", seq->code_start);
+          if ( seq->cur_pts )
+            seq->seq_pts = seq->cur_pts;
+        }
+        else {
+          res = parse_code( this, seq->buf+seq->start, seq->bufseek-seq->start );
+          if ( res==1 ) {
+            decode_picture( this );
+            parse_code( this, seq->buf+seq->start, seq->bufseek-seq->start );
+          }
+          if ( res!=-1 ) {
+            uint8_t *tmp = (uint8_t*)malloc(seq->bufsize);
+            xine_fast_memcpy( tmp, seq->buf+seq->bufseek, seq->bufpos-seq->bufseek );
+            seq->bufpos -= seq->bufseek;
+            seq->start = -1;
+            seq->bufseek = -1;
+            free( seq->buf );
+            seq->buf = tmp;
+          }
+        }
+      }
+      ++seq->bufseek;
+    }
+  }
+}
+
+
+
+/*
+ * This function is called when xine needs to flush the system.
+ */
+static void vdpau_vc1_flush (video_decoder_t *this_gen) {
+  vdpau_vc1_decoder_t *this = (vdpau_vc1_decoder_t *) this_gen;
+
+  lprintf( "vdpau_vc1_flush\n" );
+}
+
+/*
+ * This function resets the video decoder.
+ */
+static void vdpau_vc1_reset (video_decoder_t *this_gen) {
+  vdpau_vc1_decoder_t *this = (vdpau_vc1_decoder_t *) this_gen;
+
+  lprintf( "vdpau_vc1_reset\n" );
+  reset_sequence( &this->sequence );
+}
+
+/*
+ * The decoder should forget any stored pts values here.
+ */
+static void vdpau_vc1_discontinuity (video_decoder_t *this_gen) {
+  vdpau_vc1_decoder_t *this = (vdpau_vc1_decoder_t *) this_gen;
+
+  lprintf( "vdpau_vc1_discontinuity\n" );
+}
+
+/*
+ * This function frees the video decoder instance allocated to the decoder.
+ */
+static void vdpau_vc1_dispose (video_decoder_t *this_gen) {
+
+  vdpau_vc1_decoder_t *this = (vdpau_vc1_decoder_t *) this_gen;
+
+  lprintf( "vdpau_vc1_dispose\n" );
+
+  if ( this->decoder!=VDP_INVALID_HANDLE && this->sequence.accel_vdpau ) {
+      this->sequence.accel_vdpau->vdp_decoder_destroy( this->decoder );
+      this->decoder = VDP_INVALID_HANDLE;
+    }
+
+  reset_sequence( &this->sequence );
+
+  this->stream->video_out->close( this->stream->video_out, this->stream );
+
+  free( this->sequence.buf );
+  free( this_gen );
+}
+
+/*
+ * This function allocates, initializes, and returns a private video
+ * decoder structure.
+ */
+static video_decoder_t *open_plugin (video_decoder_class_t *class_gen, xine_stream_t *stream) {
+
+  vdpau_vc1_decoder_t  *this ;
+
+  lprintf( "open_plugin\n" );
+
+  /* the videoout must be vdpau-capable to support this decoder */
+  if ( !(stream->video_driver->get_capabilities(stream->video_driver) & VO_CAP_VDPAU_VC1) )
+    return NULL;
+
+  /* now check if vdpau has free decoder resource */
+  vo_frame_t *img = stream->video_out->get_frame( stream->video_out, 1920, 1080, 1, XINE_IMGFMT_VDPAU, VO_BOTH_FIELDS );
+  vdpau_accel_t *accel = (vdpau_accel_t*)img->accel_data;
+  int runtime_nr = accel->vdp_runtime_nr;
+  img->free(img);
+  VdpDecoder decoder;
+  VdpStatus st = accel->vdp_decoder_create( accel->vdp_device, VDP_DECODER_PROFILE_VC1_MAIN, 1920, 1080, 2, &decoder );
+  if ( st!=VDP_STATUS_OK ) {
+    lprintf( "can't create vdpau decoder.\n" );
+    return 1;
+  }
+
+  accel->vdp_decoder_destroy( decoder );
+
+  this = (vdpau_vc1_decoder_t *) calloc(1, sizeof(vdpau_vc1_decoder_t));
+
+  this->video_decoder.decode_data         = vdpau_vc1_decode_data;
+  this->video_decoder.flush               = vdpau_vc1_flush;
+  this->video_decoder.reset               = vdpau_vc1_reset;
+  this->video_decoder.discontinuity       = vdpau_vc1_discontinuity;
+  this->video_decoder.dispose             = vdpau_vc1_dispose;
+
+  this->stream                            = stream;
+  this->class                             = (vdpau_vc1_class_t *) class_gen;
+
+  this->sequence.bufsize = 10000;
+  this->sequence.buf = (uint8_t*)malloc(this->sequence.bufsize);
+  this->sequence.forward_ref = 0;
+  this->sequence.backward_ref = 0;
+  this->sequence.vdp_runtime_nr = runtime_nr;
+  init_sequence( &this->sequence );
+
+  init_picture( &this->sequence.picture );
+
+  this->decoder = VDP_INVALID_HANDLE;
+  this->sequence.accel_vdpau = NULL;
+  this->sequence.mode = MODE_STARTCODE;
+
+  (stream->video_out->open)(stream->video_out, stream);
+
+#ifdef MAKE_DAT
+  outfile = fopen( "/tmp/vc1.dat","w");
+  nframes = 0;
+#endif
+
+  return &this->video_decoder;
+}
+
+/*
+ * This function returns a brief string that describes (usually with the
+ * decoder's most basic name) the video decoder plugin.
+ */
+static char *get_identifier (video_decoder_class_t *this) {
+  return "vdpau_vc1";
+}
+
+/*
+ * This function returns a slightly longer string describing the video
+ * decoder plugin.
+ */
+static char *get_description (video_decoder_class_t *this) {
+  return "vdpau_vc1: vc1 decoder plugin using VDPAU hardware decoding.\n"
+    "Must be used along with video_out_vdpau.";
+}
+
+/*
+ * This function frees the video decoder class and any other memory that was
+ * allocated.
+ */
+static void dispose_class (video_decoder_class_t *this) {
+  free (this);
+}
+
+/*
+ * This function allocates a private video decoder class and initializes
+ * the class's member functions.
+ */
+static void *init_plugin (xine_t *xine, void *data) {
+
+  vdpau_vc1_class_t *this;
+
+  this = (vdpau_vc1_class_t *) calloc(1, sizeof(vdpau_vc1_class_t));
+
+  this->decoder_class.open_plugin     = open_plugin;
+  this->decoder_class.get_identifier  = get_identifier;
+  this->decoder_class.get_description = get_description;
+  this->decoder_class.dispose         = dispose_class;
+
+  return this;
+}
+
+/*
+ * This is a list of all of the internal xine video buffer types that
+ * this decoder is able to handle. Check src/xine-engine/buffer.h for a
+ * list of valid buffer types (and add a new one if the one you need does
+ * not exist). Terminate the list with a 0.
+ */
+static const uint32_t video_types[] = {
+  BUF_VIDEO_VC1, BUF_VIDEO_WMV9,
+  0
+};
+
+/*
+ * This data structure combines the list of supported xine buffer types and
+ * the priority that the plugin should be given with respect to other
+ * plugins that handle the same buffer type. A plugin with priority (n+1)
+ * will be used instead of a plugin with priority (n).
+ */
+static const decoder_info_t dec_info_video = {
+  video_types,         /* supported types */
+  8                    /* priority        */
+};
+
+/*
+ * The plugin catalog entry. This is the only information that this plugin
+ * will export to the public.
+ */
+const plugin_info_t xine_plugin_info[] EXPORTED = {
+  /* { type, API, "name", version, special_info, init_function } */
+  { PLUGIN_VIDEO_DECODER, 18, "vdpau_vc1", XINE_VERSION_CODE, &dec_info_video, init_plugin },
+  { PLUGIN_NONE, 0, "", 0, NULL, NULL }
+};
diff --git a/src/video_out/Makefile.am b/src/video_out/Makefile.am
index 3e182fc14..afa838ab7 100644
--- a/src/video_out/Makefile.am
+++ b/src/video_out/Makefile.am
@@ -37,6 +37,10 @@ endif
 endif
 endif
 
+if HAVE_VDPAU
+vdpau_module = xineplug_vo_out_vdpau.la
+endif
+
 if HAVE_XCB
 XCBOSD = xcbosd.c
 if HAVE_XCBSHM
@@ -101,9 +105,14 @@ xineplug_LTLIBRARIES = $(xshm_module) $(xv_module) $(xvmc_module) \
 		  $(xxmc_module) \
 		  $(xcbshm_module) \
 		  $(xcbxv_module) \
+		  $(vdpau_module) \
                   xineplug_vo_out_raw.la \
                   xineplug_vo_out_none.la
 
+xineplug_vo_out_vdpau_la_SOURCES = yuv2rgb.c yuv2rgb_mmx.c yuv2rgb_mlib.c video_out_vdpau.c
+xineplug_vo_out_vdpau_la_LIBADD = $(XINE_LIB) $(MLIB_LIBS) $(PTHREAD_LIBS) $(X_LIBS) $(LTLIBINTL) -lvdpau
+xineplug_vo_out_vdpau_la_CFLAGS = $(VISIBILITY_FLAG) $(MLIB_CFLAGS) $(X_CFLAGS)
+
 xineplug_vo_out_xcbshm_la_SOURCES = yuv2rgb.c yuv2rgb_mmx.c yuv2rgb_mlib.c video_out_xcbshm.c $(XCBOSD)
 xineplug_vo_out_xcbshm_la_LIBADD = $(XINE_LIB) $(MLIB_LIBS) $(PTHREAD_LIBS) $(XCB_LIBS) $(XCBSHM_LIBS) $(LTLIBINTL)
 xineplug_vo_out_xcbshm_la_CFLAGS = $(VISIBILITY_FLAG) $(MLIB_CFLAGS) $(XCB_CFLAGS) $(XCBSHM_CFLAGS)
diff --git a/src/video_out/video_out_raw.c b/src/video_out/video_out_raw.c
index 99e2c0004..52b959985 100644
--- a/src/video_out/video_out_raw.c
+++ b/src/video_out/video_out_raw.c
@@ -163,11 +163,16 @@ static int raw_process_ovl( raw_driver_t *this_gen, vo_overlay_t *overlay )
     clr = rle->color;
     alpha = trans[clr];
     for ( i=0; i<rlelen; ++i ) {
-    	rgba[0] = colors[clr].y;
-    	rgba[1] = colors[clr].cr;
-    	rgba[2] = colors[clr].cb;
-    	rgba[3] = alpha*255/15;
-    	rgba+= 4;
+      if ( alpha == 0 ) {
+        rgba[0] = rgba[1] = rgba[2] = rgba[3] = 0;
+      }
+      else {
+        rgba[0] = colors[clr].y;
+        rgba[1] = colors[clr].cr;
+        rgba[2] = colors[clr].cb;
+        rgba[3] = alpha*255/15;
+      }
+      rgba+= 4;
     	++pos;
     }
     ++rle;
@@ -278,10 +283,14 @@ static void raw_frame_dispose (vo_frame_t *vo_img)
 
   frame->yuv2rgb->dispose (frame->yuv2rgb);
 
-  free (frame->chunk[0]);
-  free (frame->chunk[1]);
-  free (frame->chunk[2]);
-  free (frame->chunk[3]);
+  if ( frame->chunk[0] )
+    free (frame->chunk[0]);
+  if ( frame->chunk[1] )
+    free (frame->chunk[1]);
+  if ( frame->chunk[2] )
+    free (frame->chunk[2]);
+  if ( frame->chunk[3] )
+    free (frame->chunk[3]);
   free (frame);
 }
 
@@ -297,6 +306,9 @@ static vo_frame_t *raw_alloc_frame (vo_driver_t *this_gen)
   if (!frame)
     return NULL;
 
+  frame->chunk[0] = frame->chunk[1] = frame->chunk[2] = frame->chunk[3] = NULL;
+  frame->width = frame->height = frame->format = frame->flags = 0;
+
   pthread_mutex_init (&frame->vo_frame.mutex, NULL);
 
   /*
@@ -330,13 +342,16 @@ static void raw_update_frame_format (vo_driver_t *this_gen, vo_frame_t *frame_ge
       || (frame->flags  != flags)) {
 /*     lprintf ("updating frame to %d x %d (ratio=%g, format=%08x)\n", width, height, ratio, format); */
 
-    flags &= VO_BOTH_FIELDS;
-
     /* (re-) allocate render space */
-    free (frame->chunk[0]);
-    free (frame->chunk[1]);
-    free (frame->chunk[2]);
-    free (frame->chunk[3]);
+    if ( frame->chunk[0] )
+      free (frame->chunk[0]);
+    if ( frame->chunk[1] )
+      free (frame->chunk[1]);
+    if ( frame->chunk[2] )
+      free (frame->chunk[2]);
+    if ( frame->chunk[3] )
+      free (frame->chunk[3]);
+    frame->chunk[0] = frame->chunk[1] = frame->chunk[2] = frame->chunk[3] = NULL;
 
     if (format == XINE_IMGFMT_YV12) {
       frame->vo_frame.pitches[0] = 8*((width + 7) / 8);
@@ -355,7 +370,7 @@ static void raw_update_frame_format (vo_driver_t *this_gen, vo_frame_t *frame_ge
 				       (void **) &frame->chunk[3]);
 
     /* set up colorspace converter */
-    switch (flags) {
+    switch (flags & VO_BOTH_FIELDS) {
     case VO_TOP_FIELD:
     case VO_BOTTOM_FIELD:
       frame->yuv2rgb->configure (frame->yuv2rgb,
@@ -382,6 +397,7 @@ static void raw_update_frame_format (vo_driver_t *this_gen, vo_frame_t *frame_ge
     frame->width = width;
     frame->height = height;
     frame->format = format;
+    frame->flags = flags;
 
     raw_frame_field ((vo_frame_t *)frame, flags);
   }
diff --git a/src/video_out/video_out_vdpau.c b/src/video_out/video_out_vdpau.c
new file mode 100644
index 000000000..8b4c645d6
--- /dev/null
+++ b/src/video_out/video_out_vdpau.c
@@ -0,0 +1,2664 @@
+/*
+ * Copyright (C) 2008 the xine project
+ * Copyright (C) 2008 Christophe Thommeret <hftom@free.fr>
+ *
+ * This file is part of xine, a free video player.
+ *
+ * xine is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * xine is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA
+ *
+ *
+ * video_out_vdpau.c, a video output plugin
+ * using VDPAU (Video Decode and Presentation Api for Unix)
+ *
+ *
+ */
+
+/* #define LOG */
+#define LOG_MODULE "video_out_vdpau"
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <math.h>
+#include <errno.h>
+#include <ctype.h>
+#include <pthread.h>
+
+#include "xine.h"
+#include "video_out.h"
+#include "vo_scale.h"
+#include "xine_internal.h"
+#include "yuv2rgb.h"
+#include "xineutils.h"
+
+#include <vdpau/vdpau_x11.h>
+#include "accel_vdpau.h"
+
+#define NUM_FRAMES_BACK 1
+
+#define LOCKDISPLAY /*define this if you have a buggy libX11/xcb*/
+
+
+#define DEINT_BOB                    1
+#define DEINT_HALF_TEMPORAL          2
+#define DEINT_HALF_TEMPORAL_SPATIAL  3
+#define DEINT_TEMPORAL               4
+#define DEINT_TEMPORAL_SPATIAL       5
+
+#define NUMBER_OF_DEINTERLACERS 5
+
+char *vdpau_deinterlacer_name[] = {
+  "bob",
+  "half temporal",
+  "half temporal_spatial",
+  "temporal",
+  "temporal_spatial",
+  NULL
+};
+
+char* vdpau_deinterlacer_description [] = {
+  "bob\nBasic deinterlacing, doing 50i->50p.\n\n",
+  "half temporal\nDisplays first field only, doing 50i->25p\n\n",
+  "half temporal_spatial\nDisplays first field only, doing 50i->25p\n\n",
+  "temporal\nVery good, 50i->50p\n\n",
+  "temporal_spatial\nThe best, but very GPU intensive.\n\n",
+  NULL
+};
+
+
+VdpOutputSurfaceRenderBlendState blend = {
+  VDP_OUTPUT_SURFACE_RENDER_BLEND_STATE_VERSION,
+  VDP_OUTPUT_SURFACE_RENDER_BLEND_FACTOR_ONE,
+  VDP_OUTPUT_SURFACE_RENDER_BLEND_FACTOR_ONE_MINUS_SRC_COLOR,
+  VDP_OUTPUT_SURFACE_RENDER_BLEND_FACTOR_ONE,
+  VDP_OUTPUT_SURFACE_RENDER_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
+  VDP_OUTPUT_SURFACE_RENDER_BLEND_EQUATION_ADD,
+  VDP_OUTPUT_SURFACE_RENDER_BLEND_EQUATION_ADD,
+  0
+};
+
+
+
+VdpDevice vdp_device;
+VdpPresentationQueue vdp_queue;
+VdpPresentationQueueTarget vdp_queue_target;
+
+VdpDeviceDestroy *vdp_device_destroy;
+
+VdpGetProcAddress *vdp_get_proc_address;
+
+VdpGetApiVersion *vdp_get_api_version;
+VdpGetInformationString *vdp_get_information_string;
+VdpGetErrorString *vdp_get_error_string;
+
+VdpVideoSurfaceQueryGetPutBitsYCbCrCapabilities *vdp_video_surface_query_get_put_bits_ycbcr_capabilities;
+VdpVideoSurfaceCreate *vdp_video_surface_create;
+VdpVideoSurfaceDestroy *vdp_video_surface_destroy;
+VdpVideoSurfacePutBitsYCbCr *vdp_video_surface_putbits_ycbcr;
+VdpVideoSurfaceGetBitsYCbCr *vdp_video_surface_getbits_ycbcr;
+
+VdpOutputSurfaceCreate *vdp_output_surface_create;
+VdpOutputSurfaceDestroy *vdp_output_surface_destroy;
+VdpOutputSurfaceRenderBitmapSurface *vdp_output_surface_render_bitmap_surface;
+VdpOutputSurfacePutBitsNative *vdp_output_surface_put_bits;
+
+VdpVideoMixerCreate *vdp_video_mixer_create;
+VdpVideoMixerDestroy *vdp_video_mixer_destroy;
+VdpVideoMixerRender *vdp_video_mixer_render;
+VdpVideoMixerSetAttributeValues *vdp_video_mixer_set_attribute_values;
+VdpVideoMixerSetFeatureEnables *vdp_video_mixer_set_feature_enables;
+VdpVideoMixerGetFeatureEnables *vdp_video_mixer_get_feature_enables;
+VdpVideoMixerQueryFeatureSupport *vdp_video_mixer_query_feature_support;
+VdpVideoMixerQueryParameterSupport *vdp_video_mixer_query_parameter_support;
+VdpVideoMixerQueryAttributeSupport *vdp_video_mixer_query_attribute_support;
+VdpVideoMixerQueryParameterValueRange *vdp_video_mixer_query_parameter_value_range;
+VdpVideoMixerQueryAttributeValueRange *vdp_video_mixer_query_attribute_value_range;
+
+VdpGenerateCSCMatrix *vdp_generate_csc_matrix;
+
+VdpPresentationQueueTargetCreateX11 *vdp_queue_target_create_x11;
+VdpPresentationQueueTargetDestroy *vdp_queue_target_destroy;
+VdpPresentationQueueCreate *vdp_queue_create;
+VdpPresentationQueueDestroy *vdp_queue_destroy;
+VdpPresentationQueueDisplay *vdp_queue_display;
+VdpPresentationQueueBlockUntilSurfaceIdle *vdp_queue_block;
+VdpPresentationQueueSetBackgroundColor *vdp_queue_set_background_color;
+VdpPresentationQueueGetTime *vdp_queue_get_time;
+VdpPresentationQueueQuerySurfaceStatus *vdp_queue_query_surface_status;
+
+VdpBitmapSurfacePutBitsNative *vdp_bitmap_put_bits;
+VdpBitmapSurfaceCreate  *vdp_bitmap_create;
+VdpBitmapSurfaceDestroy *vdp_bitmap_destroy;
+
+VdpDecoderQueryCapabilities *vdp_decoder_query_capabilities;
+VdpDecoderCreate *vdp_decoder_create;
+VdpDecoderDestroy *vdp_decoder_destroy;
+VdpDecoderRender *vdp_decoder_render;
+
+VdpPreemptionCallbackRegister *vdp_preemption_callback_register;
+
+static void vdp_preemption_callback( VdpDevice device, void *context );
+static void vdpau_reinit( vo_driver_t *this_gen );
+
+static VdpVideoSurfaceCreate *orig_vdp_video_surface_create;
+static VdpVideoSurfaceDestroy *orig_vdp_video_surface_destroy;
+
+static VdpDecoderCreate *orig_vdp_decoder_create;
+static VdpDecoderDestroy *orig_vdp_decoder_destroy;
+static VdpDecoderRender *orig_vdp_decoder_render;
+
+static Display *guarded_display;
+
+static VdpStatus guarded_vdp_video_surface_create(VdpDevice device, VdpChromaType chroma_type, uint32_t width, uint32_t height,VdpVideoSurface *surface)
+{
+  VdpStatus r;
+#ifdef LOCKDISPLAY
+  XLockDisplay(guarded_display);
+#endif
+  r = orig_vdp_video_surface_create(device, chroma_type, width, height, surface);
+#ifdef LOCKDISPLAY
+  XUnlockDisplay(guarded_display);
+#endif
+  return r;
+}
+
+static VdpStatus guarded_vdp_video_surface_destroy(VdpVideoSurface surface)
+{
+  VdpStatus r;
+  /*XLockDisplay(guarded_display);*/
+  r = orig_vdp_video_surface_destroy(surface);
+  /*XUnlockDisplay(guarded_display);*/
+  return r;
+}
+
+static VdpStatus guarded_vdp_decoder_create(VdpDevice device, VdpDecoderProfile profile, uint32_t width, uint32_t height, uint32_t max_references, VdpDecoder *decoder)
+{
+  VdpStatus r;
+#ifdef LOCKDISPLAY
+  XLockDisplay(guarded_display);
+#endif
+  r = orig_vdp_decoder_create(device, profile, width, height, max_references, decoder);
+#ifdef LOCKDISPLAY
+  XUnlockDisplay(guarded_display);
+#endif
+  return r;
+}
+
+static VdpStatus guarded_vdp_decoder_destroy(VdpDecoder decoder)
+{
+  VdpStatus r;
+#ifdef LOCKDISPLAY
+  XLockDisplay(guarded_display);
+#endif
+  r = orig_vdp_decoder_destroy(decoder);
+#ifdef LOCKDISPLAY
+  XUnlockDisplay(guarded_display);
+#endif
+  return r;
+}
+
+static VdpStatus guarded_vdp_decoder_render(VdpDecoder decoder, VdpVideoSurface target, VdpPictureInfo const *picture_info, uint32_t bitstream_buffer_count, VdpBitstreamBuffer const *bitstream_buffers)
+{
+  VdpStatus r;
+#ifdef LOCKDISPLAY
+  XLockDisplay(guarded_display);
+#endif
+  r = orig_vdp_decoder_render(decoder, target, picture_info, bitstream_buffer_count, bitstream_buffers);
+#ifdef LOCKDISPLAY
+  XUnlockDisplay(guarded_display);
+#endif
+  return r;
+}
+
+
+
+typedef struct {
+  VdpBitmapSurface ovl_bitmap;
+  uint32_t  bitmap_width, bitmap_height;
+  int ovl_w, ovl_h; /* overlay's width and height */
+  int ovl_x, ovl_y; /* overlay's top-left display position */
+  int unscaled;
+  int expected_overlay_width; /*if >0 scale to video width*/
+  int expected_overlay_height; /* if >0 scale to video height */
+} vdpau_overlay_t;
+
+
+typedef struct {
+  int                 x;
+  int                 y;
+  int                 w;
+  int                 h;
+}
+argb_ovl_data_t;
+
+
+typedef struct {
+  vo_frame_t         vo_frame;
+
+  int                width, height, format, flags;
+  double             ratio;
+  uint8_t           *chunk[3]; /* mem alloc by xmalloc_aligned           */
+
+  vdpau_accel_t     vdpau_accel_data;
+} vdpau_frame_t;
+
+
+typedef struct {
+
+  vo_driver_t        vo_driver;
+  vo_scale_t         sc;
+
+  Display           *display;
+  int                screen;
+  Drawable           drawable;
+
+  config_values_t   *config;
+
+  int ovl_changed;
+  vdpau_overlay_t     overlays[XINE_VORAW_MAX_OVL];
+  yuv2rgb_factory_t   *yuv2rgb_factory;
+  yuv2rgb_t           *ovl_yuv2rgb;
+  VdpOutputSurface    overlay_output;
+  uint32_t            overlay_output_width;
+  uint32_t            overlay_output_height;
+  int                 has_overlay;
+
+  VdpOutputSurface    overlay_unscaled;
+  uint32_t            overlay_unscaled_width;
+  uint32_t            overlay_unscaled_height;
+  int                 has_unscaled;
+
+  VdpOutputSurface    argb_overlay;
+  uint32_t            argb_overlay_width;
+  uint32_t            argb_overlay_height;
+  int                 has_argb_overlay;
+  int                 argb_ovl_count;
+  vo_overlay_t       *argb_ovl[XINE_VORAW_MAX_OVL];
+  int                 argb_ovl_data_count;
+  argb_ovl_data_t     argb_ovl_data[XINE_VORAW_MAX_OVL];
+
+  int32_t             video_window_x;
+  int32_t             video_window_y;
+  int32_t             video_window_width;
+  int32_t             video_window_height;
+
+  VdpVideoSurface      soft_surface;
+  uint32_t             soft_surface_width;
+  uint32_t             soft_surface_height;
+  int                  soft_surface_format;
+
+#define NOUTPUTSURFACE 2
+  VdpOutputSurface     output_surface[NOUTPUTSURFACE];
+  uint8_t              current_output_surface;
+  uint32_t             output_surface_width[NOUTPUTSURFACE];
+  uint32_t             output_surface_height[NOUTPUTSURFACE];
+  uint8_t              init_queue;
+
+  VdpVideoMixer        video_mixer;
+  VdpChromaType        video_mixer_chroma;
+  uint32_t             video_mixer_width;
+  uint32_t             video_mixer_height;
+  VdpColorStandard     color_standard;
+  VdpBool              temporal_spatial_is_supported;
+  VdpBool              temporal_is_supported;
+  VdpBool              noise_reduction_is_supported;
+  VdpBool              sharpness_is_supported;
+  VdpBool              inverse_telecine_is_supported;
+  VdpBool              skip_chroma_is_supported;
+
+  char*                deinterlacers_name[NUMBER_OF_DEINTERLACERS+1];
+  int                  deinterlacers_method[NUMBER_OF_DEINTERLACERS];
+
+  int                  scaling_level_max;
+  int                  scaling_level_current;
+
+  VdpColor             back_color;
+
+  vdpau_frame_t        *back_frame[ NUM_FRAMES_BACK ];
+
+  uint32_t          capabilities;
+  xine_t            *xine;
+
+  int               hue;
+  int               saturation;
+  int               brightness;
+  int               contrast;
+  int               sharpness;
+  int               noise;
+  int               deinterlace;
+  int               deinterlace_method;
+  int               enable_inverse_telecine;
+  int               honor_progressive;
+  int               skip_chroma;
+
+  int               vdp_runtime_nr;
+  int               reinit_needed;
+
+  int               allocated_surfaces;
+  int		            zoom_x;
+  int		            zoom_y;
+} vdpau_driver_t;
+
+
+typedef struct {
+  video_driver_class_t driver_class;
+  xine_t              *xine;
+} vdpau_class_t;
+
+
+
+static void vdpau_overlay_clut_yuv2rgb(vdpau_driver_t  *this, vo_overlay_t *overlay, vdpau_frame_t *frame)
+{
+  int i;
+  clut_t* clut = (clut_t*) overlay->color;
+
+  if (!overlay->rgb_clut) {
+    for ( i=0; i<sizeof(overlay->color)/sizeof(overlay->color[0]); i++ ) {
+      *((uint32_t *)&clut[i]) = this->ovl_yuv2rgb->yuv2rgb_single_pixel_fun(this->ovl_yuv2rgb, clut[i].y, clut[i].cb, clut[i].cr);
+    }
+    overlay->rgb_clut++;
+  }
+  if (!overlay->hili_rgb_clut) {
+    clut = (clut_t*) overlay->hili_color;
+    for ( i=0; i<sizeof(overlay->color)/sizeof(overlay->color[0]); i++) {
+      *((uint32_t *)&clut[i]) = this->ovl_yuv2rgb->yuv2rgb_single_pixel_fun(this->ovl_yuv2rgb, clut[i].y, clut[i].cb, clut[i].cr);
+    }
+    overlay->hili_rgb_clut++;
+  }
+}
+
+
+
+static void vdpau_process_argb_ovls(vdpau_driver_t *this_gen, vo_frame_t *frame_gen)
+{
+  vdpau_driver_t  *this = (vdpau_driver_t *) this_gen;
+  int i, k;
+
+  vo_overlay_t *ovl[XINE_VORAW_MAX_OVL];
+  argb_ovl_data_t ovl_data[XINE_VORAW_MAX_OVL];
+  int ovl_data_count = 0;
+
+  int total_extent_width = 0, total_extent_height = 0;
+  this->video_window_x      = 0;
+  this->video_window_y      = 0;
+  this->video_window_width  = 0;
+  this->video_window_height = 0;
+
+  /* lock layers while processing and determine extent */
+  for (i = 0; i < this->argb_ovl_count; i++) {
+    pthread_mutex_lock(&this->argb_ovl[i]->argb_layer->mutex);
+
+    if (this->argb_ovl[i]->argb_layer->buffer != NULL) {
+      int extent_width  = this->argb_ovl[i]->extent_width;
+      int extent_height = this->argb_ovl[i]->extent_height;
+      if (extent_width <= 0 || extent_height <= 0) {
+        extent_width  = frame_gen->width;
+        extent_height = frame_gen->height;
+      }
+      if (extent_width > 0 && extent_height > 0) {
+        if (total_extent_width < extent_width)
+          total_extent_width = extent_width;
+        if (total_extent_height < extent_height)
+          total_extent_height = extent_height;
+        ovl_data[ovl_data_count].x = this->argb_ovl[i]->x;
+        ovl_data[ovl_data_count].y = this->argb_ovl[i]->y;
+        ovl_data[ovl_data_count].w = this->argb_ovl[i]->width;
+        ovl_data[ovl_data_count].h = this->argb_ovl[i]->height;
+        ovl[ovl_data_count++] = this->argb_ovl[i];
+      }
+      if (this->argb_ovl[i]->video_window_width > 0
+        && this->argb_ovl[i]->video_window_height > 0) {
+        /* last one wins */
+        this->video_window_x      = this->argb_ovl[i]->video_window_x;
+        this->video_window_y      = this->argb_ovl[i]->video_window_y;
+        this->video_window_width  = this->argb_ovl[i]->video_window_width;
+        this->video_window_height = this->argb_ovl[i]->video_window_height;
+      }
+    }
+  }
+
+  /* adjust surface */
+  if (total_extent_width > 0 && total_extent_height > 0) {
+    if (this->argb_overlay_width != total_extent_width || this->argb_overlay_height != total_extent_height || this->argb_overlay == VDP_INVALID_HANDLE) {
+      if (this->argb_overlay != VDP_INVALID_HANDLE)
+        vdp_output_surface_destroy(this->argb_overlay);
+
+      VdpStatus st = vdp_output_surface_create(vdp_device, VDP_RGBA_FORMAT_B8G8R8A8, total_extent_width, total_extent_height, &this->argb_overlay);
+      if (st != VDP_STATUS_OK)
+        printf("vdpau_process_argb_ovl: vdp_output_surface_create failed : %s\n", vdp_get_error_string(st));
+
+      this->argb_overlay_width  = total_extent_width;
+      this->argb_overlay_height = total_extent_height;
+
+      /* change argb_ovl_data to wipe complete surface */
+      this->argb_ovl_data_count = 1;
+      this->argb_ovl_data[0].x = 0;
+      this->argb_ovl_data[0].y = 0;
+      this->argb_ovl_data[0].w = total_extent_width;
+      this->argb_ovl_data[0].h = total_extent_height;
+
+      /* extend dirty areas to maximum for filling wiped surface */
+      for (i = 0; i < ovl_data_count; i++) {
+        ovl[i]->argb_layer->x1 = 0;
+        ovl[i]->argb_layer->y1 = 0;
+        ovl[i]->argb_layer->x2 = ovl[i]->width;
+        ovl[i]->argb_layer->y2 = ovl[i]->height;
+      }
+    }
+  }
+
+  /* wipe surface for gone overlays */
+  if (this->argb_overlay != VDP_INVALID_HANDLE) {
+    uint32_t *zeros = NULL;
+    for (i = 0; i < this->argb_ovl_data_count; i++) {
+      argb_ovl_data_t *curr_ovl_data = &this->argb_ovl_data[i];
+      int ovl_gone = 1;
+      for (k = 0; k < ovl_data_count; k++) {
+        if (0 == memcmp(curr_ovl_data, &ovl_data[k], sizeof (*curr_ovl_data))) {
+          ovl_gone = 0;
+          break;
+        }
+      }
+      if (!ovl_gone)
+        continue;
+      if (!zeros)
+        zeros = calloc(4, this->argb_overlay_width * this->argb_overlay_height);
+      if (zeros) {
+        uint32_t pitch = curr_ovl_data->w * 4;
+        VdpRect dest = { curr_ovl_data->x, curr_ovl_data->y, curr_ovl_data->x + curr_ovl_data->w, curr_ovl_data->y + curr_ovl_data->h };
+        VdpStatus st = vdp_output_surface_put_bits(this->argb_overlay, (void *)&zeros, &pitch, &dest);
+        if (st != VDP_STATUS_OK)
+          printf("vdpau_process_argb_ovl: vdp_output_surface_put_bits_native failed : %s\n", vdp_get_error_string(st));
+      }
+    }
+    free(zeros);
+  }
+
+  /* set destination area according to dirty area of argb layer and reset dirty area */
+  for (i = 0; i < ovl_data_count; i++) {
+    uint32_t pitch = ovl[i]->width * 4;
+    uint32_t *buffer_start = ovl[i]->argb_layer->buffer + ovl[i]->argb_layer->y1 * ovl[i]->width + ovl[i]->argb_layer->x1;
+    VdpRect dest = { ovl[i]->x + ovl[i]->argb_layer->x1, ovl[i]->y + ovl[i]->argb_layer->y1, ovl[i]->x + ovl[i]->argb_layer->x2, ovl[i]->y + ovl[i]->argb_layer->y2 };
+    ovl[i]->argb_layer->x1 = ovl[i]->width;
+    ovl[i]->argb_layer->y1 = ovl[i]->height;
+    ovl[i]->argb_layer->x2 = 0;
+    ovl[i]->argb_layer->y2 = 0;
+
+    VdpStatus st = vdp_output_surface_put_bits(this->argb_overlay, (void *)&buffer_start, &pitch, &dest);
+    if (st != VDP_STATUS_OK)
+      printf( "vdpau_process_argb_ovl: vdp_output_surface_put_bits_native failed : %s\n", vdp_get_error_string(st));
+    else
+      this->has_argb_overlay = 1;
+  }
+
+  /* store ovl_data */
+  memcpy(this->argb_ovl_data, ovl_data, sizeof (ovl_data));
+  this->argb_ovl_data_count = ovl_data_count;
+
+  /* unlock layers */
+  for (i = 0; i < this->argb_ovl_count; i++)
+    pthread_mutex_unlock(&this->argb_ovl[i]->argb_layer->mutex);
+}
+
+
+
+static int vdpau_process_ovl( vdpau_driver_t *this_gen, vo_overlay_t *overlay )
+{
+  vdpau_overlay_t *ovl = &this_gen->overlays[this_gen->ovl_changed-1];
+
+  if ( overlay->width<=0 || overlay->height<=0 )
+    return 0;
+
+  if ( (ovl->bitmap_width < overlay->width ) || (ovl->bitmap_height < overlay->height) || (ovl->ovl_bitmap == VDP_INVALID_HANDLE) ) {
+    if (ovl->ovl_bitmap != VDP_INVALID_HANDLE) {
+      vdp_bitmap_destroy( ovl->ovl_bitmap );
+      ovl->ovl_bitmap = VDP_INVALID_HANDLE;
+    }
+    VdpStatus st = vdp_bitmap_create( vdp_device, VDP_RGBA_FORMAT_B8G8R8A8, overlay->width, overlay->height, 0, &ovl->ovl_bitmap );
+    if ( st != VDP_STATUS_OK ) {
+      printf( "vdpau_process_ovl: vdp_bitmap_create failed : %s\n", vdp_get_error_string(st) );
+    }
+    ovl->bitmap_width = overlay->width;
+    ovl->bitmap_height = overlay->height;
+  }
+  ovl->ovl_w = overlay->width;
+  ovl->ovl_h = overlay->height;
+  ovl->ovl_x = overlay->x;
+  ovl->ovl_y = overlay->y;
+  ovl->unscaled = overlay->unscaled;
+  ovl->expected_overlay_width = overlay->extent_width;
+  ovl->expected_overlay_height = overlay->extent_height;
+  uint32_t *buf = (uint32_t*)malloc(ovl->ovl_w*ovl->ovl_h*4);
+  if ( !buf )
+    return 0;
+
+  int num_rle = overlay->num_rle;
+  rle_elem_t *rle = overlay->rle;
+  uint32_t *rgba = buf;
+  uint32_t red, green, blue, alpha;
+  clut_t *low_colors = (clut_t*)overlay->color;
+  clut_t *hili_colors = (clut_t*)overlay->hili_color;
+  uint8_t *low_trans = overlay->trans;
+  uint8_t *hili_trans = overlay->hili_trans;
+  clut_t *colors;
+  uint8_t *trans;
+  int rlelen = 0;
+  uint8_t clr = 0;
+  int i, pos=0, x, y;
+
+  while ( num_rle>0 ) {
+    x = pos%ovl->ovl_w;
+    y = pos/ovl->ovl_w;
+    if ( (x>=overlay->hili_left && x<=overlay->hili_right) && (y>=overlay->hili_top && y<=overlay->hili_bottom) ) {
+      colors = hili_colors;
+      trans = hili_trans;
+    }
+    else {
+      colors = low_colors;
+      trans = low_trans;
+    }
+    rlelen = rle->len;
+    clr = rle->color;
+    for ( i=0; i<rlelen; ++i ) {
+      if ( trans[clr] == 0 ) {
+        alpha = red = green = blue = 0;
+      }
+      else {
+        red = colors[clr].y; /* red */
+        green = colors[clr].cr; /* green */
+        blue = colors[clr].cb; /* blue */
+        alpha = trans[clr]*255/15;
+      }
+      *rgba = (alpha<<24) | (red<<16) | (green<<8) | blue;
+      rgba++;
+      ++pos;
+    }
+    ++rle;
+    --num_rle;
+  }
+  uint32_t pitch = ovl->ovl_w*4;
+  VdpRect dest = { 0, 0, ovl->ovl_w, ovl->ovl_h };
+  VdpStatus st = vdp_bitmap_put_bits( ovl->ovl_bitmap, &buf, &pitch, &dest);
+  if ( st != VDP_STATUS_OK ) {
+    printf( "vdpau_process_ovl: vdp_bitmap_put_bits failed : %s\n", vdp_get_error_string(st) );
+  }
+  free(buf);
+  return 1;
+}
+
+
+
+static void vdpau_overlay_begin (vo_driver_t *this_gen, vo_frame_t *frame_gen, int changed)
+{
+  vdpau_driver_t  *this = (vdpau_driver_t *) this_gen;
+
+  if ( !changed )
+    return;
+
+  this->has_overlay = this->has_unscaled = 0;
+  this->has_argb_overlay = 0;
+  this->argb_ovl_count = 0;
+  ++this->ovl_changed;
+}
+
+
+
+static void vdpau_overlay_blend (vo_driver_t *this_gen, vo_frame_t *frame_gen, vo_overlay_t *overlay)
+{
+  vdpau_driver_t  *this = (vdpau_driver_t *) this_gen;
+  vdpau_frame_t *frame = (vdpau_frame_t *) frame_gen;
+
+  if (!this->ovl_changed)
+    return;
+
+  if (overlay->rle) {
+    if (this->ovl_changed >= XINE_VORAW_MAX_OVL)
+      return;
+    if (!overlay->rgb_clut || !overlay->hili_rgb_clut)
+      vdpau_overlay_clut_yuv2rgb (this, overlay, frame);
+    if ( vdpau_process_ovl( this, overlay ) )
+      ++this->ovl_changed;
+  }
+
+  if (overlay->argb_layer) {
+    if (this->argb_ovl_count >= XINE_VORAW_MAX_OVL)
+      return;
+    this->argb_ovl[this->argb_ovl_count++] = overlay;
+  }
+}
+
+
+
+static void vdpau_overlay_end (vo_driver_t *this_gen, vo_frame_t *frame)
+{
+  vdpau_driver_t  *this = (vdpau_driver_t *) this_gen;
+  int i;
+  VdpStatus st;
+
+  if ( !this->ovl_changed )
+    return;
+
+  if (this->argb_ovl_count || this->argb_ovl_data_count)
+    vdpau_process_argb_ovls(this, frame);
+
+  if ( !(this->ovl_changed-1) ) {
+    this->ovl_changed = 0;
+    this->has_overlay = 0;
+    this->has_unscaled = 0;
+    return;
+  }
+
+  int w=0, h=0;
+  int scaler = 0;
+  for ( i=0; i<this->ovl_changed-1; ++i ) {
+    if ( this->overlays[i].unscaled )
+      continue;
+    if ( w < (this->overlays[i].ovl_x+this->overlays[i].ovl_w) )
+      w = this->overlays[i].ovl_x+this->overlays[i].ovl_w;
+    if ( h < (this->overlays[i].ovl_y+this->overlays[i].ovl_h) )
+      h = this->overlays[i].ovl_y+this->overlays[i].ovl_h;
+    if ( this->overlays[i].expected_overlay_width )
+      scaler = 1;
+    if ( this->overlays[i].expected_overlay_height )
+      scaler = 1;
+  }
+
+  if ( scaler ) {
+    w = this->video_mixer_width;
+    h = this->video_mixer_height;
+  }
+
+  int out_w = (w>frame->width) ? w : frame->width;
+  int out_h = (h>frame->height) ? h : frame->height;
+
+  if ( (this->overlay_output_width!=out_w || this->overlay_output_height!=out_h) && this->overlay_output != VDP_INVALID_HANDLE ) {
+    st = vdp_output_surface_destroy( this->overlay_output );
+    if ( st != VDP_STATUS_OK ) {
+      printf( "vdpau_overlay_end: vdp_output_surface_destroy failed : %s\n", vdp_get_error_string(st) );
+    }
+    this->overlay_output = VDP_INVALID_HANDLE;
+  }
+
+  this->overlay_output_width = out_w;
+  this->overlay_output_height = out_h;
+
+  w = 64; h = 64;
+  for ( i=0; i<this->ovl_changed-1; ++i ) {
+    if ( !this->overlays[i].unscaled )
+      continue;
+    if ( w < (this->overlays[i].ovl_x+this->overlays[i].ovl_w) )
+      w = this->overlays[i].ovl_x+this->overlays[i].ovl_w;
+    if ( h < (this->overlays[i].ovl_y+this->overlays[i].ovl_h) )
+      h = this->overlays[i].ovl_y+this->overlays[i].ovl_h;
+  }
+
+  if ( (this->overlay_unscaled_width!=w || this->overlay_unscaled_height!=h) && this->overlay_unscaled != VDP_INVALID_HANDLE ) {
+    st = vdp_output_surface_destroy( this->overlay_unscaled );
+    if ( st != VDP_STATUS_OK ) {
+      printf( "vdpau_overlay_end: vdp_output_surface_destroy failed : %s\n", vdp_get_error_string(st) );
+    }
+    this->overlay_unscaled = VDP_INVALID_HANDLE;
+  }
+
+  this->overlay_unscaled_width = w;
+  this->overlay_unscaled_height = h;
+
+  if ( this->overlay_unscaled == VDP_INVALID_HANDLE ) {
+    st = vdp_output_surface_create( vdp_device, VDP_RGBA_FORMAT_B8G8R8A8, this->overlay_unscaled_width, this->overlay_unscaled_height, &this->overlay_unscaled );
+    if ( st != VDP_STATUS_OK )
+      printf( "vdpau_overlay_end: vdp_output_surface_create failed : %s\n", vdp_get_error_string(st) );
+  }
+
+  if ( this->overlay_output == VDP_INVALID_HANDLE ) {
+    st = vdp_output_surface_create( vdp_device, VDP_RGBA_FORMAT_B8G8R8A8, this->overlay_output_width, this->overlay_output_height, &this->overlay_output );
+    if ( st != VDP_STATUS_OK )
+      printf( "vdpau_overlay_end: vdp_output_surface_create failed : %s\n", vdp_get_error_string(st) );
+  }
+
+  w = (this->overlay_unscaled_width>this->overlay_output_width) ? this->overlay_unscaled_width : this->overlay_output_width;
+  h = (this->overlay_unscaled_height>this->overlay_output_height) ? this->overlay_unscaled_height : this->overlay_output_height;
+
+  uint32_t *buf = (uint32_t*)calloc(w*4,h);
+  uint32_t pitch = w*4;
+  VdpRect clear = { 0, 0, this->overlay_output_width, this->overlay_output_height };
+  st = vdp_output_surface_put_bits( this->overlay_output, &buf, &pitch, &clear );
+  if ( st != VDP_STATUS_OK ) {
+    printf( "vdpau_overlay_end: vdp_output_surface_put_bits (clear) failed : %s\n", vdp_get_error_string(st) );
+  }
+  clear.x1 = this->overlay_unscaled_width; clear.y1 = this->overlay_unscaled_height;
+  st = vdp_output_surface_put_bits( this->overlay_unscaled, &buf, &pitch, &clear );
+  if ( st != VDP_STATUS_OK ) {
+    printf( "vdpau_overlay_end: vdp_output_surface_put_bits (clear) failed : %s\n", vdp_get_error_string(st) );
+  }
+  free(buf);
+
+  VdpOutputSurface *surface;
+  for ( i=0; i<this->ovl_changed-1; ++i ) {
+    VdpRect dest = { this->overlays[i].ovl_x, this->overlays[i].ovl_y, this->overlays[i].ovl_x+this->overlays[i].ovl_w, this->overlays[i].ovl_y+this->overlays[i].ovl_h };
+    if ( this->overlays[i].expected_overlay_width ) {
+      double rx = (double)this->overlay_output_width/(double)this->overlays[i].expected_overlay_width;
+      double ry = (double)this->overlay_output_height/(double)this->overlays[i].expected_overlay_height;
+      dest.x0 *= rx; dest.y0 *= ry; dest.x1 *=rx; dest.y1 *= ry;
+      lprintf( "vdpau_overlay_end: overlay_width=%d overlay_height=%d rx=%f ry=%f\n", this->overlay_output_width, this->overlay_output_height, rx, ry );
+    }
+    VdpRect src = { 0, 0, this->overlays[i].ovl_w, this->overlays[i].ovl_h };
+    surface = (this->overlays[i].unscaled) ? &this->overlay_unscaled : &this->overlay_output;
+    st = vdp_output_surface_render_bitmap_surface( *surface, &dest, this->overlays[i].ovl_bitmap, &src, 0, &blend, 0 );
+    if ( st != VDP_STATUS_OK ) {
+      printf( "vdpau_overlay_end: vdp_output_surface_render_bitmap_surface failed : %s\n", vdp_get_error_string(st) );
+    }
+  }
+  this->has_overlay = 1;
+  this->ovl_changed = 0;
+}
+
+
+
+static void vdpau_frame_proc_slice (vo_frame_t *vo_img, uint8_t **src)
+{
+  vdpau_frame_t  *frame = (vdpau_frame_t *) vo_img ;
+
+  vo_img->proc_called = 1;
+}
+
+
+
+static void vdpau_frame_field (vo_frame_t *vo_img, int which_field)
+{
+}
+
+
+
+static void vdpau_frame_dispose (vo_frame_t *vo_img)
+{
+  vdpau_frame_t  *frame = (vdpau_frame_t *) vo_img ;
+
+  if ( frame->chunk[0] )
+    free (frame->chunk[0]);
+  if ( frame->chunk[1] )
+    free (frame->chunk[1]);
+  if ( frame->chunk[2] )
+    free (frame->chunk[2]);
+  if ( frame->vdpau_accel_data.surface != VDP_INVALID_HANDLE )
+    vdp_video_surface_destroy( frame->vdpau_accel_data.surface );
+  free (frame);
+}
+
+
+
+static vo_frame_t *vdpau_alloc_frame (vo_driver_t *this_gen)
+{
+  vdpau_frame_t  *frame;
+  vdpau_driver_t *this = (vdpau_driver_t *) this_gen;
+
+  lprintf( "vo_vdpau: vdpau_alloc_frame\n" );
+
+  frame = (vdpau_frame_t *) calloc(1, sizeof(vdpau_frame_t));
+
+  if (!frame)
+    return NULL;
+
+  frame->chunk[0] = frame->chunk[1] = frame->chunk[2] = NULL;
+  frame->width = frame->height = frame->format = frame->flags = 0;
+
+  frame->vo_frame.accel_data = &frame->vdpau_accel_data;
+
+  pthread_mutex_init (&frame->vo_frame.mutex, NULL);
+
+  /*
+   * supply required functions/fields
+   */
+  frame->vo_frame.proc_duplicate_frame_data = NULL;
+  frame->vo_frame.proc_slice = vdpau_frame_proc_slice;
+  frame->vo_frame.proc_frame = NULL;
+  frame->vo_frame.field      = vdpau_frame_field;
+  frame->vo_frame.dispose    = vdpau_frame_dispose;
+  frame->vo_frame.driver     = this_gen;
+
+  frame->vdpau_accel_data.vo_frame = &frame->vo_frame;
+  frame->vdpau_accel_data.vdp_device = vdp_device;
+  frame->vdpau_accel_data.surface = VDP_INVALID_HANDLE;
+  frame->vdpau_accel_data.chroma = VDP_CHROMA_TYPE_420;
+  frame->vdpau_accel_data.color_standard = this->color_standard;
+  frame->vdpau_accel_data.vdp_decoder_create = vdp_decoder_create;
+  frame->vdpau_accel_data.vdp_decoder_destroy = vdp_decoder_destroy;
+  frame->vdpau_accel_data.vdp_decoder_render = vdp_decoder_render;
+  frame->vdpau_accel_data.vdp_get_error_string = vdp_get_error_string;
+  frame->vdpau_accel_data.vdp_runtime_nr = this->vdp_runtime_nr;
+  frame->vdpau_accel_data.current_vdp_runtime_nr = &this->vdp_runtime_nr;
+
+  return (vo_frame_t *) frame;
+}
+
+
+
+static void vdpau_provide_standard_frame_data (vo_frame_t *this_gen, xine_current_frame_data_t *data)
+{
+  vdpau_frame_t *this = (vdpau_frame_t *)this_gen;
+  VdpStatus st;
+  VdpYCbCrFormat format;
+
+  if (this->vo_frame.format != XINE_IMGFMT_VDPAU) {
+    fprintf(stderr, "vdpau_provide_standard_frame_data: unexpected frame format 0x%08x!\n", this->vo_frame.format);
+    return;
+  }
+
+  if (!(this->flags & VO_CHROMA_422)) {
+    data->format = XINE_IMGFMT_YV12;
+    data->img_size = this->vo_frame.width * this->vo_frame.height
+                   + ((this->vo_frame.width + 1) / 2) * ((this->vo_frame.height + 1) / 2)
+                   + ((this->vo_frame.width + 1) / 2) * ((this->vo_frame.height + 1) / 2);
+    if (data->img) {
+      this->vo_frame.pitches[0] = 8*((this->vo_frame.width + 7) / 8);
+      this->vo_frame.pitches[1] = 8*((this->vo_frame.width + 15) / 16);
+      this->vo_frame.pitches[2] = 8*((this->vo_frame.width + 15) / 16);
+      this->vo_frame.base[0] = xine_xmalloc_aligned(16, this->vo_frame.pitches[0] * this->vo_frame.height, (void **)&this->chunk[0]);
+      this->vo_frame.base[1] = xine_xmalloc_aligned(16, this->vo_frame.pitches[1] * ((this->vo_frame.height+1)/2), (void **)&this->chunk[1]);
+      this->vo_frame.base[2] = xine_xmalloc_aligned(16, this->vo_frame.pitches[2] * ((this->vo_frame.height+1)/2), (void **)&this->chunk[2]);
+      format = VDP_YCBCR_FORMAT_YV12;
+    }
+  } else {
+    data->format = XINE_IMGFMT_YUY2;
+    data->img_size = this->vo_frame.width * this->vo_frame.height
+                   + ((this->vo_frame.width + 1) / 2) * this->vo_frame.height
+                   + ((this->vo_frame.width + 1) / 2) * this->vo_frame.height;
+    if (data->img) {
+      this->vo_frame.pitches[0] = 8*((this->vo_frame.width + 3) / 4);
+      this->vo_frame.base[0] = xine_xmalloc_aligned(16, this->vo_frame.pitches[0] * this->vo_frame.height, (void **)&this->chunk[0]);
+      format = VDP_YCBCR_FORMAT_YUYV;
+    }
+  }
+
+  if (data->img) {
+    st = vdp_video_surface_getbits_ycbcr(this->vdpau_accel_data.surface, format, this->vo_frame.base, this->vo_frame.pitches);
+    if (st != VDP_STATUS_OK)
+      printf("vo_vdpau: failed to get surface bits !! %s\n", vdp_get_error_string(st));
+
+    if (format == VDP_YCBCR_FORMAT_YV12) {
+      yv12_to_yv12(
+       /* Y */
+        this->vo_frame.base[0], this->vo_frame.pitches[0],
+        data->img, this->vo_frame.width,
+       /* U */
+        this->vo_frame.base[2], this->vo_frame.pitches[2],
+        data->img+this->vo_frame.width*this->vo_frame.height, this->vo_frame.width/2,
+       /* V */
+        this->vo_frame.base[1], this->vo_frame.pitches[1],
+        data->img+this->vo_frame.width*this->vo_frame.height+this->vo_frame.width*this->vo_frame.height/4, this->vo_frame.width/2,
+       /* width x height */
+        this->vo_frame.width, this->vo_frame.height);
+    } else {
+      yuy2_to_yuy2(
+       /* src */
+        this->vo_frame.base[0], this->vo_frame.pitches[0],
+       /* dst */
+        data->img, this->vo_frame.width*2,
+       /* width x height */
+        this->vo_frame.width, this->vo_frame.height);
+    }
+
+    if (this->chunk[0])
+      free(this->chunk[0]);
+    if (this->chunk[1])
+      free(this->chunk[1]);
+    if (this->chunk[2])
+      free(this->chunk[2]);
+    this->chunk[0] = this->chunk[1] = this->chunk[2] = NULL;
+  }
+}
+
+
+
+static void vdpau_duplicate_frame_data (vo_frame_t *this_gen, vo_frame_t *original)
+{
+  vdpau_frame_t *this = (vdpau_frame_t *)this_gen;
+  vdpau_frame_t *orig = (vdpau_frame_t *)original;
+  VdpStatus st;
+  VdpYCbCrFormat format;
+
+  if (orig->vo_frame.format != XINE_IMGFMT_VDPAU) {
+    fprintf(stderr, "vdpau_duplicate_frame_data: unexpected frame format 0x%08x!\n", orig->vo_frame.format);
+    return;
+  }
+
+  if(orig->vdpau_accel_data.vdp_runtime_nr != this->vdpau_accel_data.vdp_runtime_nr) {
+    fprintf(stderr, "vdpau_duplicate_frame_data: called with invalid frame\n");
+    return;
+  }
+
+  if (!(orig->flags & VO_CHROMA_422)) {
+    this->vo_frame.pitches[0] = 8*((orig->vo_frame.width + 7) / 8);
+    this->vo_frame.pitches[1] = 8*((orig->vo_frame.width + 15) / 16);
+    this->vo_frame.pitches[2] = 8*((orig->vo_frame.width + 15) / 16);
+    this->vo_frame.base[0] = xine_xmalloc_aligned(16, this->vo_frame.pitches[0] * orig->vo_frame.height, (void **)&this->chunk[0]);
+    this->vo_frame.base[1] = xine_xmalloc_aligned(16, this->vo_frame.pitches[1] * ((orig->vo_frame.height+1)/2), (void **)&this->chunk[1]);
+    this->vo_frame.base[2] = xine_xmalloc_aligned(16, this->vo_frame.pitches[2] * ((orig->vo_frame.height+1)/2), (void **)&this->chunk[2]);
+    format = VDP_YCBCR_FORMAT_YV12;
+  } else {
+    this->vo_frame.pitches[0] = 8*((orig->vo_frame.width + 3) / 4);
+    this->vo_frame.base[0] = xine_xmalloc_aligned(16, this->vo_frame.pitches[0] * orig->vo_frame.height, (void **)&this->chunk[0]);
+    format = VDP_YCBCR_FORMAT_YUYV;
+  }
+
+  st = vdp_video_surface_getbits_ycbcr(orig->vdpau_accel_data.surface, format, this->vo_frame.base, this->vo_frame.pitches);
+  if (st != VDP_STATUS_OK)
+    printf("vo_vdpau: failed to get surface bits !! %s\n", vdp_get_error_string(st));
+
+  st = vdp_video_surface_putbits_ycbcr(this->vdpau_accel_data.surface, format, this->vo_frame.base, this->vo_frame.pitches);
+  if (st != VDP_STATUS_OK)
+    printf("vo_vdpau: failed to put surface bits !! %s\n", vdp_get_error_string(st));
+
+  this->vdpau_accel_data.color_standard = orig->vdpau_accel_data.color_standard;
+
+  if (this->chunk[0])
+    free(this->chunk[0]);
+  if (this->chunk[1])
+    free(this->chunk[1]);
+  if (this->chunk[2])
+    free(this->chunk[2]);
+  this->chunk[0] = this->chunk[1] = this->chunk[2] = NULL;
+}
+
+
+
+static void vdpau_update_frame_format (vo_driver_t *this_gen, vo_frame_t *frame_gen,
+      uint32_t width, uint32_t height, double ratio, int format, int flags)
+{
+  vdpau_driver_t *this = (vdpau_driver_t *) this_gen;
+  vdpau_frame_t   *frame = VDPAU_FRAME(frame_gen);
+
+  VdpChromaType chroma = (flags & VO_CHROMA_422) ? VDP_CHROMA_TYPE_422 : VDP_CHROMA_TYPE_420;
+
+  vo_frame_t orig_frame_content;
+  if (format == XINE_IMGFMT_VDPAU) {
+    if (frame_gen != &frame->vo_frame) {
+      /* this is an intercepted frame, so we need to detect and propagate any
+       * changes on the original vo_frame to all the intercepted frames */
+       xine_fast_memcpy(&orig_frame_content, &frame->vo_frame, sizeof (vo_frame_t));
+    }
+  }
+
+  /* Check frame size and format and reallocate if necessary */
+  if ( (frame->width != width) || (frame->height != height) || (frame->format != format) || (frame->format==XINE_IMGFMT_VDPAU && frame->vdpau_accel_data.chroma!=chroma) ||
+        (frame->vdpau_accel_data.vdp_runtime_nr != this->vdp_runtime_nr)) {
+
+    /* (re-) allocate render space */
+    if ( frame->chunk[0] )
+      free (frame->chunk[0]);
+    if ( frame->chunk[1] )
+      free (frame->chunk[1]);
+    if ( frame->chunk[2] )
+      free (frame->chunk[2]);
+    frame->chunk[0] = frame->chunk[1] = frame->chunk[2] = NULL;
+
+    if (format == XINE_IMGFMT_YV12) {
+      frame->vo_frame.pitches[0] = 8*((width + 7) / 8);
+      frame->vo_frame.pitches[1] = 8*((width + 15) / 16);
+      frame->vo_frame.pitches[2] = 8*((width + 15) / 16);
+      frame->vo_frame.base[0] = xine_xmalloc_aligned (16, frame->vo_frame.pitches[0] * height,  (void **) &frame->chunk[0]);
+      frame->vo_frame.base[1] = xine_xmalloc_aligned (16, frame->vo_frame.pitches[1] * ((height+1)/2), (void **) &frame->chunk[1]);
+      frame->vo_frame.base[2] = xine_xmalloc_aligned (16, frame->vo_frame.pitches[2] * ((height+1)/2), (void **) &frame->chunk[2]);
+    } else if (format == XINE_IMGFMT_YUY2){
+      frame->vo_frame.pitches[0] = 8*((width + 3) / 4);
+      frame->vo_frame.base[0] = xine_xmalloc_aligned (16, frame->vo_frame.pitches[0] * height, (void **) &frame->chunk[0]);
+      frame->chunk[1] = NULL;
+      frame->chunk[2] = NULL;
+    }
+
+    if ( frame->vdpau_accel_data.vdp_runtime_nr != this->vdp_runtime_nr ) {
+      frame->vdpau_accel_data.surface = VDP_INVALID_HANDLE;
+      frame->vdpau_accel_data.vdp_runtime_nr = this->vdp_runtime_nr;
+      frame->vdpau_accel_data.vdp_device = vdp_device;
+      frame->vo_frame.proc_duplicate_frame_data = NULL;
+      frame->vo_frame.proc_provide_standard_frame_data = NULL;
+    }
+
+    if ( frame->vdpau_accel_data.surface != VDP_INVALID_HANDLE  ) {
+      if ( (frame->width != width) || (frame->height != height) || (format != XINE_IMGFMT_VDPAU) || frame->vdpau_accel_data.chroma != chroma ) {
+        lprintf("vo_vdpau: update_frame - destroy surface\n");
+        vdp_video_surface_destroy( frame->vdpau_accel_data.surface );
+        frame->vdpau_accel_data.surface = VDP_INVALID_HANDLE;
+        --this->allocated_surfaces;
+        frame->vo_frame.proc_duplicate_frame_data = NULL;
+        frame->vo_frame.proc_provide_standard_frame_data = NULL;
+      }
+    }
+
+    if ( (format == XINE_IMGFMT_VDPAU) && (frame->vdpau_accel_data.surface == VDP_INVALID_HANDLE) ) {
+      VdpStatus st = vdp_video_surface_create( vdp_device, chroma, width, height, &frame->vdpau_accel_data.surface );
+      if ( st!=VDP_STATUS_OK )
+        printf( "vo_vdpau: failed to create surface !! %s\n", vdp_get_error_string( st ) );
+      else {
+        frame->vdpau_accel_data.chroma = chroma;
+        ++this->allocated_surfaces;
+        frame->vo_frame.proc_duplicate_frame_data = vdpau_duplicate_frame_data;
+        frame->vo_frame.proc_provide_standard_frame_data = vdpau_provide_standard_frame_data;
+      }
+    }
+
+    frame->width = width;
+    frame->height = height;
+    frame->format = format;
+    frame->flags = flags;
+
+    vdpau_frame_field ((vo_frame_t *)frame, flags);
+  }
+
+  frame->vdpau_accel_data.color_standard = VDP_COLOR_STANDARD_ITUR_BT_601;
+  frame->ratio = ratio;
+  frame->vo_frame.future_frame = NULL;
+
+  if (format == XINE_IMGFMT_VDPAU) {
+    if (frame_gen != &frame->vo_frame) {
+      /* this is an intercepted frame, so we need to detect and propagate any
+       * changes on the original vo_frame to all the intercepted frames */
+      unsigned char *p0 = (unsigned char *)&orig_frame_content;
+      unsigned char *p1 = (unsigned char *)&frame->vo_frame;
+      int i;
+      for (i = 0; i < sizeof (vo_frame_t); i++) {
+        if (*p0 != *p1) {
+          /* propagate the change */
+          vo_frame_t *f = frame_gen;
+          while (f->next) {
+            /* serveral restrictions apply when intercepting VDPAU frames. So let's check
+             * the intercepted frames before modifing them and fail otherwise. */
+            unsigned char *p = (unsigned char *)f + i;
+            if (*p != *p0) {
+              xprintf(this->xine, XINE_VERBOSITY_DEBUG, "vdpau_update_frame_format: a post plugin violates the restrictions on intercepting VDPAU frames\n");
+              _x_abort();
+            }
+
+            *p = *p1;
+            f = f->next;
+          }
+        }
+        p0++;
+        p1++;
+      }
+    }
+  }
+}
+
+
+
+static int vdpau_redraw_needed (vo_driver_t *this_gen)
+{
+  vdpau_driver_t  *this = (vdpau_driver_t *) this_gen;
+
+  _x_vo_scale_compute_ideal_size( &this->sc );
+  if ( _x_vo_scale_redraw_needed( &this->sc ) ) {
+    _x_vo_scale_compute_output_size( &this->sc );
+    return 1;
+  }
+  return 0;
+}
+
+
+
+static void vdpau_release_back_frames( vo_driver_t *this_gen )
+{
+  vdpau_driver_t  *this  = (vdpau_driver_t *) this_gen;
+  int i;
+
+  for ( i=0; i<NUM_FRAMES_BACK; ++i ) {
+    if ( this->back_frame[ i ])
+      this->back_frame[ i ]->vo_frame.free( &this->back_frame[ i ]->vo_frame );
+    this->back_frame[ i ] = NULL;
+  }
+}
+
+
+
+static void vdpau_backup_frame( vo_driver_t *this_gen, vo_frame_t *frame_gen )
+{
+  vdpau_driver_t  *this  = (vdpau_driver_t *) this_gen;
+  vdpau_frame_t   *frame = (vdpau_frame_t *) frame_gen;
+
+  int i;
+  if ( this->back_frame[NUM_FRAMES_BACK-1]) {
+    this->back_frame[NUM_FRAMES_BACK-1]->vo_frame.free (&this->back_frame[NUM_FRAMES_BACK-1]->vo_frame);
+  }
+  for ( i=NUM_FRAMES_BACK-1; i>0; i-- )
+    this->back_frame[i] = this->back_frame[i-1];
+  this->back_frame[0] = frame;
+}
+
+
+
+static void vdpau_set_deinterlace( vo_driver_t *this_gen )
+{
+  vdpau_driver_t  *this  = (vdpau_driver_t *) this_gen;
+
+  VdpVideoMixerFeature features[2];
+  VdpBool feature_enables[2];
+  int features_count = 0;
+  if ( this->temporal_is_supported ) {
+    features[features_count] = VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL;
+    ++features_count;
+  }
+  if ( this->temporal_spatial_is_supported ) {
+    features[features_count] = VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL_SPATIAL;
+    ++features_count;
+  }
+
+  if ( !features_count )
+    return;
+
+  if ( this->deinterlace ) {
+    if ( this->video_mixer_width<800 ) {
+      feature_enables[0] = feature_enables[1] = 1;
+	    if ( this->temporal_is_supported ) {
+	      if ( this->temporal_spatial_is_supported )
+	        printf("vo_vdpau: deinterlace: temporal_spatial\n" );
+		    else
+		      printf("vo_vdpau: deinterlace: temporal\n" );
+      }
+      else
+        printf("vo_vdpau: deinterlace: bob\n" );
+    }
+    else {
+      switch ( this->deinterlacers_method[this->deinterlace_method] ) {
+        case DEINT_BOB:
+          feature_enables[0] = feature_enables[1] = 0;
+          printf("vo_vdpau: deinterlace: bob\n" );
+          break;
+        case DEINT_HALF_TEMPORAL:
+          feature_enables[0] = 1; feature_enables[1] = 0;
+          printf("vo_vdpau: deinterlace: half_temporal\n" );
+          break;
+        case DEINT_TEMPORAL:
+          feature_enables[0] = 1; feature_enables[1] = 0;
+          printf("vo_vdpau: deinterlace: temporal\n" );
+          break;
+        case DEINT_HALF_TEMPORAL_SPATIAL:
+          feature_enables[0] = feature_enables[1] = 1;
+          printf("vo_vdpau: deinterlace: half_temporal_spatial\n" );
+          break;
+        case DEINT_TEMPORAL_SPATIAL:
+          feature_enables[0] = feature_enables[1] = 1;
+          printf("vo_vdpau: deinterlace: temporal_spatial\n" );
+          break;
+      }
+    }
+  }
+  else {
+    feature_enables[0] = feature_enables[1] = 0;
+    printf("vo_vdpau: deinterlace: none\n" );
+  }
+
+  vdp_video_mixer_set_feature_enables( this->video_mixer, features_count, features, feature_enables );
+}
+
+
+
+static void vdpau_set_inverse_telecine( vo_driver_t *this_gen )
+{
+  vdpau_driver_t  *this  = (vdpau_driver_t *) this_gen;
+
+  if ( !this->inverse_telecine_is_supported )
+    return;
+
+  VdpVideoMixerFeature features[] = { VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE };
+  VdpBool feature_enables[1];
+  if ( this->deinterlace && this->enable_inverse_telecine )
+    feature_enables[0] = 1;
+  else
+    feature_enables[0] = 0;
+
+  vdp_video_mixer_set_feature_enables( this->video_mixer, 1, features, feature_enables );
+  vdp_video_mixer_get_feature_enables( this->video_mixer, 1, features, feature_enables );
+  printf("vo_vdpau: enabled features: inverse_telecine=%d\n", feature_enables[0] );
+}
+
+
+
+static void vdpau_update_deinterlace_method( void *this_gen, xine_cfg_entry_t *entry )
+{
+  vdpau_driver_t  *this  = (vdpau_driver_t *) this_gen;
+
+  this->deinterlace_method = entry->num_value;
+  printf( "vo_vdpau: deinterlace_method=%d\n", this->deinterlace_method );
+  vdpau_set_deinterlace( (vo_driver_t*)this_gen );
+}
+
+
+
+static void vdpau_set_scaling_level( vo_driver_t *this_gen )
+{
+  vdpau_driver_t  *this  = (vdpau_driver_t *) this_gen;
+  int i;
+  VdpVideoMixerFeature features[9];
+  VdpBool feature_enables[9];
+#ifdef VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L1
+  for ( i=0; i<this->scaling_level_max; ++i ) {
+    features[i] = VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L1 + i;
+    feature_enables[i] = 0;
+  }
+  vdp_video_mixer_set_feature_enables( this->video_mixer, this->scaling_level_max, features, feature_enables );
+
+  if ( this->scaling_level_current ) {
+    features[0] = VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L1 - 1 + this->scaling_level_current;
+    feature_enables[0] = 1;
+    vdp_video_mixer_set_feature_enables( this->video_mixer, 1, features, feature_enables );
+  }
+
+  printf( "vo_vdpau: set_scaling_level=%d\n", this->scaling_level_current );
+#endif
+}
+
+
+
+static void vdpau_update_scaling_level( void *this_gen, xine_cfg_entry_t *entry )
+{
+  vdpau_driver_t  *this  = (vdpau_driver_t *) this_gen;
+
+  this->scaling_level_current = entry->num_value;
+  printf( "vo_vdpau: scaling_quality=%d\n", this->scaling_level_current );
+  vdpau_set_scaling_level( (vo_driver_t*)this_gen );
+}
+
+
+
+static void vdpau_update_enable_inverse_telecine( void *this_gen, xine_cfg_entry_t *entry )
+{
+  vdpau_driver_t  *this  = (vdpau_driver_t *) this_gen;
+
+  this->enable_inverse_telecine = entry->num_value;
+  printf( "vo_vdpau: enable inverse_telecine=%d\n", this->enable_inverse_telecine );
+  vdpau_set_inverse_telecine( (vo_driver_t*)this_gen );
+}
+
+
+
+static void vdpau_honor_progressive_flag( void *this_gen, xine_cfg_entry_t *entry )
+{
+  vdpau_driver_t  *this  = (vdpau_driver_t *) this_gen;
+
+  this->honor_progressive = entry->num_value;
+  printf( "vo_vdpau: honor_progressive=%d\n", this->honor_progressive );
+}
+
+
+
+static void vdpau_update_noise( vdpau_driver_t *this_gen )
+{
+  if ( !this_gen->noise_reduction_is_supported )
+    return;
+
+  float value = this_gen->noise/100.0;
+  if ( value==0 ) {
+    VdpVideoMixerFeature features[] = { VDP_VIDEO_MIXER_FEATURE_NOISE_REDUCTION };
+    VdpBool feature_enables[] = { 0 };
+    vdp_video_mixer_set_feature_enables( this_gen->video_mixer, 1, features, feature_enables );
+    printf( "vo_vdpau: disable noise reduction.\n" );
+    return;
+  }
+  else {
+    VdpVideoMixerFeature features[] = { VDP_VIDEO_MIXER_FEATURE_NOISE_REDUCTION };
+    VdpBool feature_enables[] = { 1 };
+    vdp_video_mixer_set_feature_enables( this_gen->video_mixer, 1, features, feature_enables );
+    printf( "vo_vdpau: enable noise reduction.\n" );
+  }
+
+  VdpVideoMixerAttribute attributes [] = { VDP_VIDEO_MIXER_ATTRIBUTE_NOISE_REDUCTION_LEVEL };
+  void* attribute_values[] = { &value };
+  VdpStatus st = vdp_video_mixer_set_attribute_values( this_gen->video_mixer, 1, attributes, attribute_values );
+  if ( st != VDP_STATUS_OK )
+    printf( "vo_vdpau: error, can't set noise reduction level !!\n" );
+}
+
+
+
+static void vdpau_update_sharpness( vdpau_driver_t *this_gen )
+{
+  if ( !this_gen->sharpness_is_supported )
+    return;
+
+  float value = this_gen->sharpness/100.0;
+  if ( value==0 ) {
+    VdpVideoMixerFeature features[] = { VDP_VIDEO_MIXER_FEATURE_SHARPNESS  };
+    VdpBool feature_enables[] = { 0 };
+    vdp_video_mixer_set_feature_enables( this_gen->video_mixer, 1, features, feature_enables );
+    printf( "vo_vdpau: disable sharpness.\n" );
+    return;
+  }
+  else {
+    VdpVideoMixerFeature features[] = { VDP_VIDEO_MIXER_FEATURE_SHARPNESS  };
+    VdpBool feature_enables[] = { 1 };
+    vdp_video_mixer_set_feature_enables( this_gen->video_mixer, 1, features, feature_enables );
+    printf( "vo_vdpau: enable sharpness.\n" );
+  }
+
+  VdpVideoMixerAttribute attributes [] = { VDP_VIDEO_MIXER_ATTRIBUTE_SHARPNESS_LEVEL };
+  void* attribute_values[] = { &value };
+  VdpStatus st = vdp_video_mixer_set_attribute_values( this_gen->video_mixer, 1, attributes, attribute_values );
+  if ( st != VDP_STATUS_OK )
+    printf( "vo_vdpau: error, can't set sharpness level !!\n" );
+}
+
+
+
+static void vdpau_update_csc( vdpau_driver_t *this_gen )
+{
+  float hue = this_gen->hue/100.0;
+  float saturation = this_gen->saturation/100.0;
+  float contrast = this_gen->contrast/100.0;
+  float brightness = this_gen->brightness/100.0;
+
+  printf( "vo_vdpau: vdpau_update_csc: hue=%f, saturation=%f, contrast=%f, brightness=%f, color_standard=%d\n", hue, saturation, contrast, brightness, this_gen->color_standard );
+
+  VdpCSCMatrix matrix;
+  VdpProcamp procamp = { VDP_PROCAMP_VERSION, brightness, contrast, saturation, hue };
+
+  VdpStatus st = vdp_generate_csc_matrix( &procamp, this_gen->color_standard, &matrix );
+  if ( st != VDP_STATUS_OK ) {
+    printf( "vo_vdpau: error, can't generate csc matrix !!\n" );
+    return;
+  }
+  VdpVideoMixerAttribute attributes [] = { VDP_VIDEO_MIXER_ATTRIBUTE_CSC_MATRIX };
+  void* attribute_values[] = { &matrix };
+  st = vdp_video_mixer_set_attribute_values( this_gen->video_mixer, 1, attributes, attribute_values );
+  if ( st != VDP_STATUS_OK )
+    printf( "vo_vdpau: error, can't set csc matrix !!\n" );
+}
+
+
+
+static void vdpau_update_skip_chroma( vdpau_driver_t *this_gen )
+{
+  if ( !this_gen->skip_chroma_is_supported )
+    return;
+
+  VdpVideoMixerAttribute attributes [] = { VDP_VIDEO_MIXER_ATTRIBUTE_SKIP_CHROMA_DEINTERLACE };
+  void* attribute_values[] = { &(this_gen->skip_chroma) };
+  VdpStatus st = vdp_video_mixer_set_attribute_values( this_gen->video_mixer, 1, attributes, attribute_values );
+  if ( st != VDP_STATUS_OK )
+    printf( "vo_vdpau: error, can't set skip_chroma !!\n" );
+  else
+    printf( "vo_vdpau: skip_chroma = %d\n", this_gen->skip_chroma );
+}
+
+
+
+static void vdpau_set_skip_chroma( void *this_gen, xine_cfg_entry_t *entry )
+{
+  vdpau_driver_t  *this  = (vdpau_driver_t *) this_gen;
+  this->skip_chroma = entry->num_value;
+  vdpau_update_skip_chroma( this );
+}
+
+
+
+static void vdpau_shift_queue( vo_driver_t *this_gen )
+{
+  vdpau_driver_t  *this  = (vdpau_driver_t *) this_gen;
+
+  if ( this->init_queue<2 )
+    ++this->init_queue;
+  ++this->current_output_surface;
+  if ( this->current_output_surface > (NOUTPUTSURFACE-1) )
+    this->current_output_surface = 0;
+}
+
+
+
+static void vdpau_check_output_size( vo_driver_t *this_gen )
+{
+  vdpau_driver_t  *this  = (vdpau_driver_t *) this_gen;
+
+  if ( (this->sc.gui_width > this->output_surface_width[this->current_output_surface]) || (this->sc.gui_height > this->output_surface_height[this->current_output_surface]) ) {
+    /* recreate output surface to match window size */
+    lprintf( "vo_vdpau: output_surface size update\n" );
+    this->output_surface_width[this->current_output_surface] = this->sc.gui_width;
+    this->output_surface_height[this->current_output_surface] = this->sc.gui_height;
+
+    vdp_output_surface_destroy( this->output_surface[this->current_output_surface] );
+    vdp_output_surface_create( vdp_device, VDP_RGBA_FORMAT_B8G8R8A8, this->output_surface_width[this->current_output_surface], this->output_surface_height[this->current_output_surface], &this->output_surface[this->current_output_surface] );
+  }
+}
+
+
+
+static void vdpau_display_frame (vo_driver_t *this_gen, vo_frame_t *frame_gen)
+{
+  vdpau_driver_t  *this  = (vdpau_driver_t *) this_gen;
+  vdpau_frame_t   *frame = (vdpau_frame_t *) frame_gen;
+  VdpStatus st;
+  VdpVideoSurface surface;
+  VdpChromaType chroma = this->video_mixer_chroma;
+  VdpColorStandard color_standard = this->color_standard;
+  uint32_t mix_w = this->video_mixer_width;
+  uint32_t mix_h = this->video_mixer_height;
+  VdpTime stream_speed;
+
+
+  if(this->reinit_needed)
+    vdpau_reinit(this_gen);
+
+  if ( (frame->width != this->sc.delivered_width) || (frame->height != this->sc.delivered_height) || (frame->ratio != this->sc.delivered_ratio) ) {
+    this->sc.force_redraw = 1;    /* trigger re-calc of output size */
+  }
+
+  this->sc.delivered_height = frame->height;
+  this->sc.delivered_width  = frame->width;
+  this->sc.delivered_ratio  = frame->ratio;
+  this->sc.crop_left        = frame->vo_frame.crop_left;
+  this->sc.crop_right       = frame->vo_frame.crop_right;
+  this->sc.crop_top         = frame->vo_frame.crop_top;
+  this->sc.crop_bottom      = frame->vo_frame.crop_bottom;
+
+  vdpau_redraw_needed( this_gen );
+
+  if ( (frame->format == XINE_IMGFMT_YV12) || (frame->format == XINE_IMGFMT_YUY2) ) {
+    chroma = ( frame->format==XINE_IMGFMT_YV12 )? VDP_CHROMA_TYPE_420 : VDP_CHROMA_TYPE_422;
+    if ( (frame->width != this->soft_surface_width) || (frame->height != this->soft_surface_height) || (frame->format != this->soft_surface_format) ) {
+      lprintf( "vo_vdpau: soft_surface size update\n" );
+      /* recreate surface to match frame changes */
+      this->soft_surface_width = frame->width;
+      this->soft_surface_height = frame->height;
+      this->soft_surface_format = frame->format;
+      vdp_video_surface_destroy( this->soft_surface );
+      this->soft_surface = VDP_INVALID_HANDLE;
+      vdp_video_surface_create( vdp_device, chroma, this->soft_surface_width, this->soft_surface_height, &this->soft_surface );
+    }
+    /* FIXME: have to swap U and V planes to get correct colors !! */
+    uint32_t pitches[] = { frame->vo_frame.pitches[0], frame->vo_frame.pitches[2], frame->vo_frame.pitches[1] };
+    void* data[] = { frame->vo_frame.base[0], frame->vo_frame.base[2], frame->vo_frame.base[1] };
+    if ( frame->format==XINE_IMGFMT_YV12 ) {
+      st = vdp_video_surface_putbits_ycbcr( this->soft_surface, VDP_YCBCR_FORMAT_YV12, &data, pitches );
+      if ( st != VDP_STATUS_OK )
+        printf( "vo_vdpau: vdp_video_surface_putbits_ycbcr YV12 error : %s\n", vdp_get_error_string( st ) );
+    }
+    else {
+      st = vdp_video_surface_putbits_ycbcr( this->soft_surface, VDP_YCBCR_FORMAT_YUYV, &data, pitches );
+      if ( st != VDP_STATUS_OK )
+        printf( "vo_vdpau: vdp_video_surface_putbits_ycbcr YUY2 error : %s\n", vdp_get_error_string( st ) );
+    }
+    surface = this->soft_surface;
+    mix_w = this->soft_surface_width;
+    mix_h = this->soft_surface_height;
+  }
+  else if (frame->format == XINE_IMGFMT_VDPAU) {
+    surface = frame->vdpau_accel_data.surface;
+    mix_w = frame->width;
+    mix_h = frame->height;
+    chroma = (frame->vo_frame.flags & VO_CHROMA_422) ? VDP_CHROMA_TYPE_422 : VDP_CHROMA_TYPE_420;
+    color_standard = frame->vdpau_accel_data.color_standard;
+  }
+  else {
+    /* unknown format */
+    printf( "vo_vdpau: got an unknown image -------------\n" );
+    frame->vo_frame.free( &frame->vo_frame );
+    return;
+  }
+
+  if ( (mix_w != this->video_mixer_width) || (mix_h != this->video_mixer_height) || (chroma != this->video_mixer_chroma)) {
+    vdpau_release_back_frames( this_gen ); /* empty past frames array */
+    lprintf("vo_vdpau: recreate mixer to match frames: width=%d, height=%d, chroma=%d\n", mix_w, mix_h, chroma);
+    vdp_video_mixer_destroy( this->video_mixer );
+    this->video_mixer = VDP_INVALID_HANDLE;
+    VdpVideoMixerFeature features[15];
+    int features_count = 0;
+    if ( this->noise_reduction_is_supported ) {
+      features[features_count] = VDP_VIDEO_MIXER_FEATURE_NOISE_REDUCTION;
+      ++features_count;
+    }
+    if ( this->sharpness_is_supported ) {
+      features[features_count] = VDP_VIDEO_MIXER_FEATURE_SHARPNESS;
+      ++features_count;
+    }
+    if ( this->temporal_is_supported ) {
+      features[features_count] = VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL;
+      ++features_count;
+    }
+    if ( this->temporal_spatial_is_supported ) {
+      features[features_count] = VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL_SPATIAL;
+      ++features_count;
+    }
+    if ( this->inverse_telecine_is_supported ) {
+      features[features_count] = VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE;
+      ++features_count;
+    }
+    int i;
+#ifdef VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L1
+    for ( i=0; i<this->scaling_level_max; ++i ) {
+     features[features_count] = VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L1 + i;
+      ++features_count;
+    }
+#endif
+    VdpVideoMixerParameter params[] = { VDP_VIDEO_MIXER_PARAMETER_VIDEO_SURFACE_WIDTH, VDP_VIDEO_MIXER_PARAMETER_VIDEO_SURFACE_HEIGHT,
+          VDP_VIDEO_MIXER_PARAMETER_CHROMA_TYPE, VDP_VIDEO_MIXER_PARAMETER_LAYERS };
+    int num_layers = 3;
+    void const *param_values[] = { &mix_w, &mix_h, &chroma, &num_layers };
+    vdp_video_mixer_create( vdp_device, features_count, features, 4, params, param_values, &this->video_mixer );
+    this->video_mixer_chroma = chroma;
+    this->video_mixer_width = mix_w;
+    this->video_mixer_height = mix_h;
+    vdpau_set_deinterlace( this_gen );
+    vdpau_set_scaling_level( this_gen );
+    vdpau_set_inverse_telecine( this_gen );
+    vdpau_update_noise( this );
+    vdpau_update_sharpness( this );
+    this->color_standard = color_standard;
+    vdpau_update_csc( this );
+    vdpau_update_skip_chroma( this );
+  }
+
+  if (color_standard != this->color_standard) {
+    lprintf("vo_vdpau: update color_standard: %d\n", color_standard);
+    this->color_standard = color_standard;
+    vdpau_update_csc( this );
+  }
+
+  VdpRect vid_source, out_dest, vid_dest;
+
+  vdpau_check_output_size( this_gen );
+  vid_source.x0 = this->sc.displayed_xoffset; vid_source.y0 = this->sc.displayed_yoffset;
+  vid_source.x1 = this->sc.displayed_width+this->sc.displayed_xoffset; vid_source.y1 = this->sc.displayed_height+this->sc.displayed_yoffset;
+  out_dest.x0 = out_dest.y0 = 0;
+  out_dest.x1 = this->sc.gui_width; out_dest.y1 = this->sc.gui_height;
+  vid_dest.x0 = this->sc.output_xoffset; vid_dest.y0 = this->sc.output_yoffset;
+  vid_dest.x1 = this->sc.output_xoffset+this->sc.output_width; vid_dest.y1 = this->sc.output_yoffset+this->sc.output_height;
+
+  /* prepare field delay calculation to not run into a deadlock while display locked */
+  stream_speed = frame->vo_frame.stream ? xine_get_param(frame->vo_frame.stream, XINE_PARAM_FINE_SPEED) : 0;
+  if (stream_speed != 0) {
+    int vo_bufs_in_fifo = 0;
+    _x_query_buffer_usage(frame->vo_frame.stream, NULL, NULL, &vo_bufs_in_fifo, NULL);
+    /* fprintf(stderr, "vo_bufs: %d\n", vo_bufs_in_fifo); */
+    if (vo_bufs_in_fifo <= 0)
+      stream_speed = 0; /* still image -> no delay */
+  }
+
+  VdpTime last_time;
+
+  if ( this->init_queue>1 )
+    vdp_queue_block( vdp_queue, this->output_surface[this->current_output_surface], &last_time );
+
+  uint32_t layer_count;
+  VdpLayer layer[3];
+  VdpRect unscaledsrc;
+  if ( this->has_overlay ) {
+    layer_count = 2;
+    layer[0].struct_version = VDP_LAYER_VERSION; layer[0].source_surface = this->overlay_output; layer[0].source_rect = &vid_source; layer[0].destination_rect = &vid_dest;
+    unscaledsrc.x0 = 0; unscaledsrc.y0 = 0; unscaledsrc.x1 = this->overlay_unscaled_width; unscaledsrc.y1 = this->overlay_unscaled_height;
+    layer[1].struct_version = VDP_LAYER_VERSION; layer[1].source_surface = this->overlay_unscaled; layer[1].source_rect = &unscaledsrc; layer[1].destination_rect = &unscaledsrc;
+  }
+  else {
+    layer_count = 0;
+  }
+
+  VdpRect argb_dest;
+  VdpRect argb_rect = { 0, 0, this->argb_overlay_width, this->argb_overlay_height };
+  if( this->has_argb_overlay ) {
+    layer_count++;
+    memcpy(&argb_dest, &vid_dest, sizeof (vid_dest));
+    layer[layer_count-1].destination_rect = &argb_dest;
+    layer[layer_count-1].source_rect = &argb_rect;
+    layer[layer_count-1].source_surface = this->argb_overlay;
+    layer[layer_count-1].struct_version = VDP_LAYER_VERSION;
+    /* recalculate video destination window to match osd's specified video window */
+    if (this->video_window_width > 0 && this->video_window_height > 0) {
+      VdpRect win_rect = { this->video_window_x, this->video_window_y, this->video_window_x + this->video_window_width, this->video_window_y + this->video_window_height };
+      vid_dest.x0 = ((win_rect.x0 - argb_rect.x0) * (argb_dest.x1 - argb_dest.x0) + argb_dest.x0 * (argb_rect.x1 - argb_rect.x0)) / (argb_rect.x1 - argb_rect.x0);
+      vid_dest.y0 = ((win_rect.y0 - argb_rect.y0) * (argb_dest.y1 - argb_dest.y0) + argb_dest.y0 * (argb_rect.y1 - argb_rect.y0)) / (argb_rect.y1 - argb_rect.y0);
+      vid_dest.x1 = ((win_rect.x1 - argb_rect.x0) * (argb_dest.x1 - argb_dest.x0) + argb_dest.x0 * (argb_rect.x1 - argb_rect.x0)) / (argb_rect.x1 - argb_rect.x0);
+      vid_dest.y1 = ((win_rect.y1 - argb_rect.y0) * (argb_dest.y1 - argb_dest.y0) + argb_dest.y0 * (argb_rect.y1 - argb_rect.y0)) / (argb_rect.y1 - argb_rect.y0);
+    }
+  }
+
+  /* try to get frame duration from previous img->pts when frame->duration is 0 */
+  int frame_duration = frame->vo_frame.duration;
+  if ( !frame_duration && this->back_frame[0] ) {
+    int duration = frame->vo_frame.pts - this->back_frame[0]->vo_frame.pts;
+    if ( duration>0 && duration<4000 )
+      frame_duration = duration;
+  }
+  int non_progressive = (this->honor_progressive && !frame->vo_frame.progressive_frame) || !this->honor_progressive;
+
+#ifdef LOCKDISPLAY
+  XLockDisplay( this->display );
+#endif
+
+  if ( frame->format==XINE_IMGFMT_VDPAU && this->deinterlace && non_progressive /*&& stream_speed*/ && frame_duration>2500 ) {
+    VdpTime current_time = 0;
+    VdpVideoSurface past[2];
+    VdpVideoSurface future[1];
+    VdpVideoMixerPictureStructure picture_structure;
+
+    past[1] = past[0] = (this->back_frame[0] && (this->back_frame[0]->format==XINE_IMGFMT_VDPAU)) ? this->back_frame[0]->vdpau_accel_data.surface : VDP_INVALID_HANDLE;
+    future[0] = surface;
+    picture_structure = ( frame->vo_frame.top_field_first ) ? VDP_VIDEO_MIXER_PICTURE_STRUCTURE_TOP_FIELD : VDP_VIDEO_MIXER_PICTURE_STRUCTURE_BOTTOM_FIELD;
+
+    st = vdp_video_mixer_render( this->video_mixer, VDP_INVALID_HANDLE, 0, picture_structure,
+                               2, past, surface, 1, future, &vid_source, this->output_surface[this->current_output_surface], &out_dest, &vid_dest, layer_count, layer_count?layer:NULL );
+    if ( st != VDP_STATUS_OK )
+      printf( "vo_vdpau: vdp_video_mixer_render error : %s\n", vdp_get_error_string( st ) );
+
+    vdp_queue_get_time( vdp_queue, &current_time );
+    vdp_queue_display( vdp_queue, this->output_surface[this->current_output_surface], 0, 0, 0 ); /* display _now_ */
+    vdpau_shift_queue( this_gen );
+
+    int dm = this->deinterlacers_method[this->deinterlace_method];
+    if ( (dm != DEINT_HALF_TEMPORAL) && (dm != DEINT_HALF_TEMPORAL_SPATIAL) && frame->vo_frame.future_frame ) {  /* process second field */
+      if ( this->init_queue>1 ) {
+#ifdef LOCKDISPLAY
+        XUnlockDisplay(this->display);
+#endif
+        vdp_queue_block( vdp_queue, this->output_surface[this->current_output_surface], &last_time );
+#ifdef LOCKDISPLAY
+        XLockDisplay(this->display);
+#endif
+      }
+
+      vdpau_check_output_size( this_gen );
+
+      picture_structure = ( frame->vo_frame.top_field_first ) ? VDP_VIDEO_MIXER_PICTURE_STRUCTURE_BOTTOM_FIELD : VDP_VIDEO_MIXER_PICTURE_STRUCTURE_TOP_FIELD;
+      past[0] = surface;
+      if ( frame->vo_frame.future_frame!=NULL && ((vdpau_frame_t*)(frame->vo_frame.future_frame))->format==XINE_IMGFMT_VDPAU )
+        future[0] = ((vdpau_frame_t*)(frame->vo_frame.future_frame))->vdpau_accel_data.surface;
+      else
+        future[0] = VDP_INVALID_HANDLE;
+
+      st = vdp_video_mixer_render( this->video_mixer, VDP_INVALID_HANDLE, 0, picture_structure,
+                               2, past, surface, 1, future, &vid_source, this->output_surface[this->current_output_surface], &out_dest, &vid_dest, layer_count, layer_count?layer:NULL );
+      if ( st != VDP_STATUS_OK )
+        printf( "vo_vdpau: vdp_video_mixer_render error : %s\n", vdp_get_error_string( st ) );
+
+      /* calculate delay for second field: there should be no delay for still images otherwise, take replay speed into account */
+      if (stream_speed > 0)
+        current_time += frame->vo_frame.duration * 1000000ull * XINE_FINE_SPEED_NORMAL / (180 * stream_speed);
+      else
+        current_time = 0; /* immediately i. e. no delay */
+
+      //current_time = 0;
+      //printf( "vo_vdpau: deint delay = %d\n", frame_duration *1ull * XINE_FINE_SPEED_NORMAL / (180 * stream_speed) );
+      vdp_queue_display( vdp_queue, this->output_surface[this->current_output_surface], 0, 0, current_time );
+      vdpau_shift_queue( this_gen );
+    }
+  }
+  else {
+    st = vdp_video_mixer_render( this->video_mixer, VDP_INVALID_HANDLE, 0, VDP_VIDEO_MIXER_PICTURE_STRUCTURE_FRAME,
+                               0, 0, surface, 0, 0, &vid_source, this->output_surface[this->current_output_surface], &out_dest, &vid_dest, layer_count, layer_count?layer:NULL );
+    if ( st != VDP_STATUS_OK )
+      printf( "vo_vdpau: vdp_video_mixer_render error : %s\n", vdp_get_error_string( st ) );
+
+    vdp_queue_display( vdp_queue, this->output_surface[this->current_output_surface], 0, 0, 0 );
+    vdpau_shift_queue( this_gen );
+  }
+
+#ifdef LOCKDISPLAY
+  XUnlockDisplay( this->display );
+#endif
+
+  if ( stream_speed ) /* do not release past frame if paused, it will be used for redrawing */
+    vdpau_backup_frame( this_gen, frame_gen );
+  else
+    frame->vo_frame.free( &frame->vo_frame );
+}
+
+
+
+static int vdpau_get_property (vo_driver_t *this_gen, int property)
+{
+  vdpau_driver_t *this = (vdpau_driver_t*)this_gen;
+
+  switch (property) {
+    case VO_PROP_MAX_NUM_FRAMES:
+      return 30;
+    case VO_PROP_WINDOW_WIDTH:
+      return this->sc.gui_width;
+    case VO_PROP_WINDOW_HEIGHT:
+      return this->sc.gui_height;
+    case VO_PROP_OUTPUT_WIDTH:
+      return this->sc.output_width;
+    case VO_PROP_OUTPUT_HEIGHT:
+      return this->sc.output_height;
+    case VO_PROP_OUTPUT_XOFFSET:
+      return this->sc.output_xoffset;
+    case VO_PROP_OUTPUT_YOFFSET:
+      return this->sc.output_yoffset;
+    case VO_PROP_HUE:
+      return this->hue;
+    case VO_PROP_SATURATION:
+      return this->saturation;
+    case VO_PROP_CONTRAST:
+      return this->contrast;
+    case VO_PROP_BRIGHTNESS:
+      return this->brightness;
+    case VO_PROP_SHARPNESS:
+      return this->sharpness;
+    case VO_PROP_NOISE_REDUCTION:
+      return this->noise;
+    case VO_PROP_ZOOM_X:
+      return this->zoom_x;
+    case VO_PROP_ZOOM_Y:
+      return this->zoom_y;
+    case VO_PROP_ASPECT_RATIO:
+      return this->sc.user_ratio;
+  }
+
+  return -1;
+}
+
+
+
+static int vdpau_set_property (vo_driver_t *this_gen, int property, int value)
+{
+  vdpau_driver_t *this = (vdpau_driver_t*)this_gen;
+
+  printf("vdpau_set_property: property=%d, value=%d\n", property, value );
+
+  switch (property) {
+    case VO_PROP_INTERLACED:
+      this->deinterlace = value;
+      vdpau_set_deinterlace( this_gen );
+      break;
+    case VO_PROP_ZOOM_X:
+      if ((value >= XINE_VO_ZOOM_MIN) && (value <= XINE_VO_ZOOM_MAX)) {
+        this->zoom_x = value;
+        this->sc.zoom_factor_x = (double)value / (double)XINE_VO_ZOOM_STEP;
+        _x_vo_scale_compute_ideal_size( &this->sc );
+        this->sc.force_redraw = 1;    /* trigger re-calc of output size */
+      }
+      break;
+    case VO_PROP_ZOOM_Y:
+      if ((value >= XINE_VO_ZOOM_MIN) && (value <= XINE_VO_ZOOM_MAX)) {
+        this->zoom_y = value;
+        this->sc.zoom_factor_y = (double)value / (double)XINE_VO_ZOOM_STEP;
+        _x_vo_scale_compute_ideal_size( &this->sc );
+        this->sc.force_redraw = 1;    /* trigger re-calc of output size */
+      }
+      break;
+    case VO_PROP_ASPECT_RATIO:
+      if ( value>=XINE_VO_ASPECT_NUM_RATIOS )
+        value = XINE_VO_ASPECT_AUTO;
+      this->sc.user_ratio = value;
+      this->sc.force_redraw = 1;    /* trigger re-calc of output size */
+      break;
+    case VO_PROP_HUE: this->hue = value; vdpau_update_csc( this ); break;
+    case VO_PROP_SATURATION: this->saturation = value; vdpau_update_csc( this ); break;
+    case VO_PROP_CONTRAST: this->contrast = value; vdpau_update_csc( this ); break;
+    case VO_PROP_BRIGHTNESS: this->brightness = value; vdpau_update_csc( this ); break;
+    case VO_PROP_SHARPNESS: this->sharpness = value; vdpau_update_sharpness( this ); break;
+    case VO_PROP_NOISE_REDUCTION: this->noise = value; vdpau_update_noise( this ); break;
+  }
+
+  return value;
+}
+
+
+
+static void vdpau_get_property_min_max (vo_driver_t *this_gen, int property, int *min, int *max)
+{
+  switch ( property ) {
+    case VO_PROP_HUE:
+      *max = 314; *min = -314; break;
+    case VO_PROP_SATURATION:
+      *max = 1000; *min = 0; break;
+    case VO_PROP_CONTRAST:
+      *max = 1000; *min = 0; break;
+    case VO_PROP_BRIGHTNESS:
+      *max = 100; *min = -100; break;
+    case VO_PROP_SHARPNESS:
+      *max = 100; *min = -100; break;
+    case VO_PROP_NOISE_REDUCTION:
+      *max = 100; *min = 0; break;
+    default:
+      *max = 0; *min = 0;
+  }
+}
+
+
+
+static int vdpau_gui_data_exchange (vo_driver_t *this_gen, int data_type, void *data)
+{
+  vdpau_driver_t *this = (vdpau_driver_t*)this_gen;
+
+  switch (data_type) {
+#ifndef XINE_DISABLE_DEPRECATED_FEATURES
+    case XINE_GUI_SEND_COMPLETION_EVENT:
+      break;
+#endif
+
+    case XINE_GUI_SEND_EXPOSE_EVENT: {
+      if ( this->init_queue ) {
+#ifdef LOCKDISPLAY
+        XLockDisplay( this->display );
+#endif
+        int previous = this->current_output_surface - 1;
+        if ( previous < 0 )
+          previous = NOUTPUTSURFACE - 1;
+        vdp_queue_display( vdp_queue, this->output_surface[previous], 0, 0, 0 );
+#ifdef LOCKDISPLAY
+        XUnlockDisplay( this->display );
+#endif
+      }
+      break;
+    }
+
+    case XINE_GUI_SEND_DRAWABLE_CHANGED: {
+      VdpStatus st;
+#ifdef LOCKDISPLAY
+      XLockDisplay( this->display );
+#endif
+      this->drawable = (Drawable) data;
+      vdp_queue_destroy( vdp_queue );
+      vdp_queue_target_destroy( vdp_queue_target );
+      st = vdp_queue_target_create_x11( vdp_device, this->drawable, &vdp_queue_target );
+      if ( st != VDP_STATUS_OK ) {
+        printf( "vo_vdpau: FATAL !! Can't recreate presentation queue target after drawable change !!\n" );
+#ifdef LOCKDISPLAY
+        XUnlockDisplay( this->display );
+#endif
+        break;
+      }
+      st = vdp_queue_create( vdp_device, vdp_queue_target, &vdp_queue );
+      if ( st != VDP_STATUS_OK ) {
+        printf( "vo_vdpau: FATAL !! Can't recreate presentation queue after drawable change !!\n" );
+#ifdef LOCKDISPLAY
+        XUnlockDisplay( this->display );
+#endif
+        break;
+      }
+      vdp_queue_set_background_color( vdp_queue, &this->back_color );
+#ifdef LOCKDISPLAY
+      XUnlockDisplay( this->display );
+#endif
+      this->sc.force_redraw = 1;
+      break;
+    }
+
+    case XINE_GUI_SEND_TRANSLATE_GUI_TO_VIDEO: {
+      int x1, y1, x2, y2;
+      x11_rectangle_t *rect = data;
+
+      _x_vo_scale_translate_gui2video(&this->sc, rect->x, rect->y, &x1, &y1);
+      _x_vo_scale_translate_gui2video(&this->sc, rect->x + rect->w, rect->y + rect->h, &x2, &y2);
+      rect->x = x1;
+      rect->y = y1;
+      rect->w = x2-x1;
+      rect->h = y2-y1;
+      break;
+    }
+
+    default:
+      return -1;
+  }
+
+  return 0;
+}
+
+
+
+static uint32_t vdpau_get_capabilities (vo_driver_t *this_gen)
+{
+  vdpau_driver_t *this = (vdpau_driver_t *) this_gen;
+
+  return this->capabilities;
+}
+
+
+
+static void vdpau_dispose (vo_driver_t *this_gen)
+{
+  vdpau_driver_t *this = (vdpau_driver_t *) this_gen;
+  int i;
+
+  this->ovl_yuv2rgb->dispose(this->ovl_yuv2rgb);
+  this->yuv2rgb_factory->dispose (this->yuv2rgb_factory);
+
+  for ( i=0; i<XINE_VORAW_MAX_OVL; ++i ) {
+    if ( this->overlays[i].ovl_bitmap != VDP_INVALID_HANDLE )
+      vdp_bitmap_destroy( this->overlays[i].ovl_bitmap );
+  }
+
+  if ( this->video_mixer!=VDP_INVALID_HANDLE )
+    vdp_video_mixer_destroy( this->video_mixer );
+  if ( this->soft_surface != VDP_INVALID_HANDLE )
+    vdp_video_surface_destroy( this->soft_surface );
+
+  if ( vdp_output_surface_destroy ) {
+    if (this->argb_overlay != VDP_INVALID_HANDLE)
+      vdp_output_surface_destroy(this->argb_overlay);
+    if ( this->overlay_unscaled!=VDP_INVALID_HANDLE )
+      vdp_output_surface_destroy( this->overlay_unscaled );
+    if ( this->overlay_output!=VDP_INVALID_HANDLE )
+      vdp_output_surface_destroy( this->overlay_output );
+    for ( i=0; i<NOUTPUTSURFACE; ++i ) {
+      if ( this->output_surface[i]!=VDP_INVALID_HANDLE )
+        vdp_output_surface_destroy( this->output_surface[i] );
+	}
+  }
+
+  if ( vdp_queue != VDP_INVALID_HANDLE )
+    vdp_queue_destroy( vdp_queue );
+  if ( vdp_queue_target != VDP_INVALID_HANDLE )
+    vdp_queue_target_destroy( vdp_queue_target );
+
+  for ( i=0; i<NUM_FRAMES_BACK; i++ )
+    if ( this->back_frame[i] )
+      this->back_frame[i]->vo_frame.dispose( &this->back_frame[i]->vo_frame );
+
+  if ( (vdp_device != VDP_INVALID_HANDLE) && vdp_device_destroy )
+    vdp_device_destroy( vdp_device );
+
+  free (this);
+}
+
+
+
+static int vdpau_reinit_error( VdpStatus st, const char *msg )
+{
+  if ( st != VDP_STATUS_OK ) {
+    printf( "vo_vdpau: %s : %s\n", msg, vdp_get_error_string( st ) );
+    return 1;
+  }
+  return 0;
+}
+
+
+
+static void vdpau_reinit( vo_driver_t *this_gen )
+{
+  printf("vo_vdpau: VDPAU was pre-empted. Reinit.\n");
+  vdpau_driver_t *this = (vdpau_driver_t *)this_gen;
+
+#ifdef LOCKDISPLAY
+  XLockDisplay(guarded_display);
+#endif
+  vdpau_release_back_frames(this_gen);
+
+  VdpStatus st = vdp_device_create_x11( this->display, this->screen, &vdp_device, &vdp_get_proc_address );
+
+  if ( st != VDP_STATUS_OK ) {
+    printf( "vo_vdpau: Can't create vdp device : " );
+    if ( st == VDP_STATUS_NO_IMPLEMENTATION )
+      printf( "No vdpau implementation.\n" );
+    else
+      printf( "unsupported GPU?\n" );
+    return;
+  }
+
+  st = vdp_queue_target_create_x11( vdp_device, this->drawable, &vdp_queue_target );
+  if ( vdpau_reinit_error( st, "Can't create presentation queue target !!" ) )
+    return;
+  st = vdp_queue_create( vdp_device, vdp_queue_target, &vdp_queue );
+  if ( vdpau_reinit_error( st, "Can't create presentation queue !!" ) )
+    return;
+  vdp_queue_set_background_color( vdp_queue, &this->back_color );
+
+
+  VdpChromaType chroma = VDP_CHROMA_TYPE_420;
+  st = orig_vdp_video_surface_create( vdp_device, chroma, this->soft_surface_width, this->soft_surface_height, &this->soft_surface );
+  if ( vdpau_reinit_error( st, "Can't create video surface !!" ) )
+    return;
+
+  this->current_output_surface = 0;
+  this->init_queue = 0;
+  int i;
+  for ( i=0; i<NOUTPUTSURFACE; ++i ) {
+    st = vdp_output_surface_create( vdp_device, VDP_RGBA_FORMAT_B8G8R8A8, this->output_surface_width[i], this->output_surface_height[i], &this->output_surface[i] );
+    if ( vdpau_reinit_error( st, "Can't create output surface !!" ) ) {
+      int j;
+      for ( j=0; j<i; ++j )
+        vdp_output_surface_destroy( this->output_surface[j] );
+      vdp_video_surface_destroy( this->soft_surface );
+      return;
+    }
+  }
+
+  /* osd overlays need to be recreated */
+  for ( i=0; i<XINE_VORAW_MAX_OVL; ++i ) {
+    this->overlays[i].ovl_bitmap = VDP_INVALID_HANDLE;
+    this->overlays[i].bitmap_width = 0;
+    this->overlays[i].bitmap_height = 0;
+  }
+  this->overlay_output = VDP_INVALID_HANDLE;
+  this->overlay_output_width = this->overlay_output_height = 0;
+  this->overlay_unscaled = VDP_INVALID_HANDLE;
+  this->overlay_unscaled_width = this->overlay_unscaled_height = 0;
+  this->ovl_changed = 0;
+  this->has_overlay = 0;
+  this->has_unscaled = 0;
+
+  this->argb_overlay = VDP_INVALID_HANDLE;
+  this->argb_overlay_width = this->argb_overlay_height = 0;
+  this->has_argb_overlay = 0;
+
+  VdpVideoMixerFeature features[15];
+  int features_count = 0;
+  if ( this->noise_reduction_is_supported ) {
+    features[features_count] = VDP_VIDEO_MIXER_FEATURE_NOISE_REDUCTION;
+    ++features_count;
+  }
+  if ( this->sharpness_is_supported ) {
+    features[features_count] = VDP_VIDEO_MIXER_FEATURE_SHARPNESS;
+	++features_count;
+  }
+  if ( this->temporal_is_supported ) {
+    features[features_count] = VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL;
+    ++features_count;
+  }
+  if ( this->temporal_spatial_is_supported ) {
+    features[features_count] = VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL_SPATIAL;
+    ++features_count;
+  }
+  if ( this->inverse_telecine_is_supported ) {
+    features[features_count] = VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE;
+    ++features_count;
+  }
+#ifdef VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L1
+  for ( i=0; i<this->scaling_level_max; ++i ) {
+    features[features_count] = VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L1 + i;
+    ++features_count;
+  }
+#endif
+  VdpVideoMixerParameter params[] = { VDP_VIDEO_MIXER_PARAMETER_VIDEO_SURFACE_WIDTH, VDP_VIDEO_MIXER_PARAMETER_VIDEO_SURFACE_HEIGHT, VDP_VIDEO_MIXER_PARAMETER_CHROMA_TYPE, VDP_VIDEO_MIXER_PARAMETER_LAYERS };
+  int num_layers = 3;
+  void const *param_values[] = { &this->video_mixer_width, &this->video_mixer_height, &chroma, &num_layers };
+  st = vdp_video_mixer_create( vdp_device, features_count, features, 4, params, param_values, &this->video_mixer );
+  if ( vdpau_reinit_error( st, "Can't create video mixer !!" ) ) {
+    orig_vdp_video_surface_destroy( this->soft_surface );
+    for ( i=0; i<NOUTPUTSURFACE; ++i )
+      vdp_output_surface_destroy( this->output_surface[i] );
+    return;
+  }
+  this->video_mixer_chroma = chroma;
+  vdpau_set_deinterlace( this_gen );
+  vdpau_set_scaling_level( this_gen );
+  vdpau_set_inverse_telecine( this_gen );
+  vdpau_update_noise( this );
+  vdpau_update_sharpness( this );
+  vdpau_update_csc( this );
+  vdpau_update_skip_chroma( this );
+
+  vdp_preemption_callback_register(vdp_device, &vdp_preemption_callback, (void*)this);
+
+  this->vdp_runtime_nr++;
+  this->reinit_needed = 0;
+#ifdef LOCKDISPLAY
+  XUnlockDisplay(guarded_display);
+#endif
+  printf("vo_vdpau: Reinit done.\n");
+}
+
+
+
+static void vdp_preemption_callback(VdpDevice device, void *context)
+{
+  printf("vo_vdpau: VDPAU preemption callback\n");
+  vdpau_driver_t *this = (vdpau_driver_t *)context;
+  this->reinit_needed = 1;
+}
+
+
+
+static int vdpau_init_error( VdpStatus st, const char *msg, vo_driver_t *driver, int error_string )
+{
+  if ( st != VDP_STATUS_OK ) {
+    if ( error_string )
+      printf( "vo_vdpau: %s : %s\n", msg, vdp_get_error_string( st ) );
+    else
+      printf( "vo_vdpau: %s\n", msg );
+    vdpau_dispose( driver );
+    return 1;
+  }
+  return 0;
+}
+
+
+
+static vo_driver_t *vdpau_open_plugin (video_driver_class_t *class_gen, const void *visual_gen)
+{
+  vdpau_class_t       *class   = (vdpau_class_t *) class_gen;
+  x11_visual_t        *visual  = (x11_visual_t *) visual_gen;
+  vdpau_driver_t      *this;
+  config_values_t      *config  = class->xine->config;
+  int i;
+
+  this = (vdpau_driver_t *) calloc(1, sizeof(vdpau_driver_t));
+
+  if (!this)
+    return NULL;
+
+  guarded_display     = visual->display;
+  this->display       = visual->display;
+  this->screen        = visual->screen;
+  this->drawable      = visual->d;
+
+  _x_vo_scale_init(&this->sc, 1, 0, config);
+  this->sc.frame_output_cb  = visual->frame_output_cb;
+  this->sc.dest_size_cb     = visual->dest_size_cb;
+  this->sc.user_data        = visual->user_data;
+  this->sc.user_ratio       = XINE_VO_ASPECT_AUTO;
+
+  this->zoom_x              = 100;
+  this->zoom_y              = 100;
+
+  this->xine                    = class->xine;
+  this->config                  = config;
+
+  this->vo_driver.get_capabilities     = vdpau_get_capabilities;
+  this->vo_driver.alloc_frame          = vdpau_alloc_frame;
+  this->vo_driver.update_frame_format  = vdpau_update_frame_format;
+  this->vo_driver.overlay_begin        = vdpau_overlay_begin;
+  this->vo_driver.overlay_blend        = vdpau_overlay_blend;
+  this->vo_driver.overlay_end          = vdpau_overlay_end;
+  this->vo_driver.display_frame        = vdpau_display_frame;
+  this->vo_driver.get_property         = vdpau_get_property;
+  this->vo_driver.set_property         = vdpau_set_property;
+  this->vo_driver.get_property_min_max = vdpau_get_property_min_max;
+  this->vo_driver.gui_data_exchange    = vdpau_gui_data_exchange;
+  this->vo_driver.dispose              = vdpau_dispose;
+  this->vo_driver.redraw_needed        = vdpau_redraw_needed;
+
+  this->video_mixer = VDP_INVALID_HANDLE;
+  for ( i=0; i<NOUTPUTSURFACE; ++i )
+    this->output_surface[i] = VDP_INVALID_HANDLE;
+  this->soft_surface = VDP_INVALID_HANDLE;
+  vdp_queue = VDP_INVALID_HANDLE;
+  vdp_queue_target = VDP_INVALID_HANDLE;
+  vdp_device = VDP_INVALID_HANDLE;
+
+  vdp_output_surface_destroy = NULL;
+  vdp_device_destroy = NULL;
+
+  this->sharpness_is_supported = 0;
+  this->noise_reduction_is_supported = 0;
+  this->temporal_is_supported = 0;
+  this->temporal_spatial_is_supported = 0;
+  this->inverse_telecine_is_supported = 0;
+  this->skip_chroma_is_supported = 0;
+
+  for ( i=0; i<XINE_VORAW_MAX_OVL; ++i ) {
+    this->overlays[i].ovl_w = this->overlays[i].ovl_h = 0;
+    this->overlays[i].bitmap_width = this->overlays[i].bitmap_height = 0;
+    this->overlays[i].ovl_bitmap = VDP_INVALID_HANDLE;
+    this->overlays[i].ovl_x = this->overlays[i].ovl_y = 0;
+  }
+  this->overlay_output = VDP_INVALID_HANDLE;
+  this->overlay_output_width = this->overlay_output_height = 0;
+  this->overlay_unscaled = VDP_INVALID_HANDLE;
+  this->overlay_unscaled_width = this->overlay_unscaled_height = 0;
+  this->ovl_changed = 0;
+  this->has_overlay = 0;
+  this->has_unscaled = 0;
+
+  this->argb_overlay = VDP_INVALID_HANDLE;
+  this->argb_overlay_width = this->argb_overlay_height = 0;
+  this->has_argb_overlay = 0;
+
+  /*  overlay converter */
+  this->yuv2rgb_factory = yuv2rgb_factory_init (MODE_24_BGR, 0, NULL);
+  this->ovl_yuv2rgb = this->yuv2rgb_factory->create_converter( this->yuv2rgb_factory );
+
+  VdpStatus st = vdp_device_create_x11( visual->display, visual->screen, &vdp_device, &vdp_get_proc_address );
+  if ( st != VDP_STATUS_OK ) {
+    printf( "vo_vdpau: Can't create vdp device : " );
+    if ( st == VDP_STATUS_NO_IMPLEMENTATION )
+      printf( "No vdpau implementation.\n" );
+    else
+      printf( "unsupported GPU?\n" );
+    vdpau_dispose( &this->vo_driver );
+    return NULL;
+  }
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_GET_ERROR_STRING , (void*)&vdp_get_error_string );
+  if ( vdpau_init_error( st, "Can't get GET_ERROR_STRING proc address !!", &this->vo_driver, 0 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_GET_API_VERSION , (void*)&vdp_get_api_version );
+  if ( vdpau_init_error( st, "Can't get GET_API_VERSION proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  uint32_t tmp;
+  vdp_get_api_version( &tmp );
+  printf( "vo_vdpau: vdpau API version : %d\n", tmp );
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_GET_INFORMATION_STRING , (void*)&vdp_get_information_string );
+  if ( vdpau_init_error( st, "Can't get GET_INFORMATION_STRING proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  const char *s;
+  st = vdp_get_information_string( &s );
+  printf( "vo_vdpau: vdpau implementation description : %s\n", s );
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_VIDEO_SURFACE_QUERY_GET_PUT_BITS_Y_CB_CR_CAPABILITIES , (void*)&vdp_video_surface_query_get_put_bits_ycbcr_capabilities );
+  if ( vdpau_init_error( st, "Can't get VIDEO_SURFACE_QUERY_GET_PUT_BITS_Y_CB_CR_CAPABILITIES proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  VdpBool ok;
+  st = vdp_video_surface_query_get_put_bits_ycbcr_capabilities( vdp_device, VDP_CHROMA_TYPE_422, VDP_YCBCR_FORMAT_YUYV, &ok );
+  if ( vdpau_init_error( st, "Failed to check vdpau yuy2 capability", &this->vo_driver, 1 ) )
+    return NULL;
+  if ( !ok ) {
+    printf( "vo_vdpau: VideoSurface doesn't support yuy2, sorry.\n");
+    vdpau_dispose( &this->vo_driver );
+    return NULL;
+  }
+  st = vdp_video_surface_query_get_put_bits_ycbcr_capabilities( vdp_device, VDP_CHROMA_TYPE_420, VDP_YCBCR_FORMAT_YV12, &ok );
+  if ( vdpau_init_error( st, "Failed to check vdpau yv12 capability", &this->vo_driver, 1 ) )
+    return NULL;
+  if ( !ok ) {
+    printf( "vo_vdpau: VideoSurface doesn't support yv12, sorry.\n");
+    vdpau_dispose( &this->vo_driver );
+    return NULL;
+  }
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_DEVICE_DESTROY , (void*)&vdp_device_destroy );
+  if ( vdpau_init_error( st, "Can't get DEVICE_DESTROY proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_VIDEO_SURFACE_CREATE , (void*)&orig_vdp_video_surface_create ); vdp_video_surface_create = guarded_vdp_video_surface_create;
+  if ( vdpau_init_error( st, "Can't get VIDEO_SURFACE_CREATE proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_VIDEO_SURFACE_DESTROY , (void*)&orig_vdp_video_surface_destroy ); vdp_video_surface_destroy = guarded_vdp_video_surface_destroy;
+  if ( vdpau_init_error( st, "Can't get VIDEO_SURFACE_DESTROY proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_VIDEO_SURFACE_PUT_BITS_Y_CB_CR , (void*)&vdp_video_surface_putbits_ycbcr );
+  if ( vdpau_init_error( st, "Can't get VIDEO_SURFACE_PUT_BITS_Y_CB_CR proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_VIDEO_SURFACE_GET_BITS_Y_CB_CR , (void*)&vdp_video_surface_getbits_ycbcr );
+  if ( vdpau_init_error( st, "Can't get VIDEO_SURFACE_GET_BITS_Y_CB_CR proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_OUTPUT_SURFACE_CREATE , (void*)&vdp_output_surface_create );
+  if ( vdpau_init_error( st, "Can't get OUTPUT_SURFACE_CREATE proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_OUTPUT_SURFACE_DESTROY , (void*)&vdp_output_surface_destroy );
+  if ( vdpau_init_error( st, "Can't get OUTPUT_SURFACE_DESTROY proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_BITMAP_SURFACE , (void*)&vdp_output_surface_render_bitmap_surface );
+  if ( vdpau_init_error( st, "Can't get OUTPUT_SURFACE_RENDER_BITMAP_SURFACE proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_NATIVE , (void*)&vdp_output_surface_put_bits );
+  if ( vdpau_init_error( st, "Can't get VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_NATIVE proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_VIDEO_MIXER_CREATE , (void*)&vdp_video_mixer_create );
+  if ( vdpau_init_error( st, "Can't get VIDEO_MIXER_CREATE proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_VIDEO_MIXER_DESTROY , (void*)&vdp_video_mixer_destroy );
+  if ( vdpau_init_error( st, "Can't get VIDEO_MIXER_DESTROY proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_VIDEO_MIXER_RENDER , (void*)&vdp_video_mixer_render );
+  if ( vdpau_init_error( st, "Can't get VIDEO_MIXER_RENDER proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_VIDEO_MIXER_SET_ATTRIBUTE_VALUES , (void*)&vdp_video_mixer_set_attribute_values );
+  if ( vdpau_init_error( st, "Can't get VIDEO_MIXER_SET_ATTRIBUTE_VALUES proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_VIDEO_MIXER_SET_FEATURE_ENABLES , (void*)&vdp_video_mixer_set_feature_enables );
+  if ( vdpau_init_error( st, "Can't get VIDEO_MIXER_SET_FEATURE_ENABLES proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_VIDEO_MIXER_GET_FEATURE_ENABLES , (void*)&vdp_video_mixer_get_feature_enables );
+  if ( vdpau_init_error( st, "Can't get VIDEO_MIXER_GET_FEATURE_ENABLES proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_VIDEO_MIXER_QUERY_FEATURE_SUPPORT , (void*)&vdp_video_mixer_query_feature_support );
+  if ( vdpau_init_error( st, "Can't get VIDEO_MIXER_QUERY_FEATURE_SUPPORT proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_VIDEO_MIXER_QUERY_PARAMETER_SUPPORT , (void*)&vdp_video_mixer_query_parameter_support );
+  if ( vdpau_init_error( st, "Can't get VIDEO_MIXER_QUERY_PARAMETER_SUPPORT proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_VIDEO_MIXER_QUERY_ATTRIBUTE_SUPPORT , (void*)&vdp_video_mixer_query_attribute_support );
+  if ( vdpau_init_error( st, "Can't get VIDEO_MIXER_QUERY_ATTRIBUTE_SUPPORT proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_VIDEO_MIXER_QUERY_PARAMETER_VALUE_RANGE , (void*)&vdp_video_mixer_query_parameter_value_range );
+  if ( vdpau_init_error( st, "Can't get VIDEO_MIXER_QUERY_PARAMETER_VALUE_RANGE proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_VIDEO_MIXER_QUERY_ATTRIBUTE_VALUE_RANGE , (void*)&vdp_video_mixer_query_attribute_value_range );
+  if ( vdpau_init_error( st, "Can't get VIDEO_MIXER_QUERY_ATTRIBUTE_VALUE_RANGE proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_GENERATE_CSC_MATRIX , (void*)&vdp_generate_csc_matrix );
+  if ( vdpau_init_error( st, "Can't get GENERATE_CSC_MATRIX proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_PRESENTATION_QUEUE_TARGET_CREATE_X11 , (void*)&vdp_queue_target_create_x11 );
+  if ( vdpau_init_error( st, "Can't get PRESENTATION_QUEUE_TARGET_CREATE_X11 proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_PRESENTATION_QUEUE_TARGET_DESTROY , (void*)&vdp_queue_target_destroy );
+  if ( vdpau_init_error( st, "Can't get PRESENTATION_QUEUE_TARGET_DESTROY proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_PRESENTATION_QUEUE_CREATE , (void*)&vdp_queue_create );
+  if ( vdpau_init_error( st, "Can't get PRESENTATION_QUEUE_CREATE proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_PRESENTATION_QUEUE_DESTROY , (void*)&vdp_queue_destroy );
+  if ( vdpau_init_error( st, "Can't get PRESENTATION_QUEUE_DESTROY proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_PRESENTATION_QUEUE_DISPLAY , (void*)&vdp_queue_display );
+  if ( vdpau_init_error( st, "Can't get PRESENTATION_QUEUE_DISPLAY proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_PRESENTATION_QUEUE_BLOCK_UNTIL_SURFACE_IDLE , (void*)&vdp_queue_block );
+  if ( vdpau_init_error( st, "Can't get PRESENTATION_QUEUE_BLOCK_UNTIL_SURFACE_IDLE proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_PRESENTATION_QUEUE_SET_BACKGROUND_COLOR , (void*)&vdp_queue_set_background_color );
+  if ( vdpau_init_error( st, "Can't get PRESENTATION_QUEUE_SET_BACKGROUND_COLOR proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_PRESENTATION_QUEUE_GET_TIME , (void*)&vdp_queue_get_time );
+  if ( vdpau_init_error( st, "Can't get PRESENTATION_QUEUE_GET_TIME proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_PRESENTATION_QUEUE_QUERY_SURFACE_STATUS , (void*)&vdp_queue_query_surface_status );
+  if ( vdpau_init_error( st, "Can't get PRESENTATION_QUEUE_QUERY_SURFACE_STATUS proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_DECODER_QUERY_CAPABILITIES , (void*)&vdp_decoder_query_capabilities );
+  if ( vdpau_init_error( st, "Can't get DECODER_QUERY_CAPABILITIES proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_DECODER_CREATE , (void*)&orig_vdp_decoder_create ); vdp_decoder_create = guarded_vdp_decoder_create;
+  if ( vdpau_init_error( st, "Can't get DECODER_CREATE proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_DECODER_DESTROY , (void*)&orig_vdp_decoder_destroy ); vdp_decoder_destroy = guarded_vdp_decoder_destroy;
+  if ( vdpau_init_error( st, "Can't get DECODER_DESTROY proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_DECODER_RENDER , (void*)&orig_vdp_decoder_render ); vdp_decoder_render = guarded_vdp_decoder_render;
+  if ( vdpau_init_error( st, "Can't get DECODER_RENDER proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_BITMAP_SURFACE_CREATE , (void*)&vdp_bitmap_create );
+  if ( vdpau_init_error( st, "Can't get BITMAP_SURFACE_CREATE proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_BITMAP_SURFACE_DESTROY , (void*)&vdp_bitmap_destroy );
+  if ( vdpau_init_error( st, "Can't get BITMAP_SURFACE_DESTROY proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_BITMAP_SURFACE_PUT_BITS_NATIVE , (void*)&vdp_bitmap_put_bits );
+  if ( vdpau_init_error( st, "Can't get BITMAP_SURFACE_PUT_BITS_NATIVE proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_get_proc_address( vdp_device, VDP_FUNC_ID_PREEMPTION_CALLBACK_REGISTER, (void*)&vdp_preemption_callback_register );
+  if ( vdpau_init_error( st, "Can't get PREEMPTION_CALLBACK_REGISTER proc address !!", &this->vo_driver, 1 ) )
+    return NULL;
+
+  st = vdp_preemption_callback_register(vdp_device, &vdp_preemption_callback, (void*)this);
+  if ( vdpau_init_error( st, "Can't register preemption callback !!", &this->vo_driver, 1 ) )
+    return NULL;
+
+  st = vdp_queue_target_create_x11( vdp_device, this->drawable, &vdp_queue_target );
+  if ( vdpau_init_error( st, "Can't create presentation queue target !!", &this->vo_driver, 1 ) )
+    return NULL;
+  st = vdp_queue_create( vdp_device, vdp_queue_target, &vdp_queue );
+  if ( vdpau_init_error( st, "Can't create presentation queue !!", &this->vo_driver, 1 ) )
+    return NULL;
+
+  /* choose almost black as backcolor for color keying */
+  this->back_color.red = 0.02;
+  this->back_color.green = 0.01;
+  this->back_color.blue = 0.03;
+  this->back_color.alpha = 1;
+  vdp_queue_set_background_color( vdp_queue, &this->back_color );
+
+  this->soft_surface_width = 320;
+  this->soft_surface_height = 240;
+  this->soft_surface_format = XINE_IMGFMT_YV12;
+  VdpChromaType chroma = VDP_CHROMA_TYPE_420;
+  st = vdp_video_surface_create( vdp_device, chroma, this->soft_surface_width, this->soft_surface_height, &this->soft_surface );
+  if ( vdpau_init_error( st, "Can't create video surface !!", &this->vo_driver, 1 ) )
+    return NULL;
+
+  for ( i=0; i<NOUTPUTSURFACE; ++i ) {
+    this->output_surface_width[i] = 320;
+    this->output_surface_height[i] = 240;
+  }
+  this->current_output_surface = 0;
+  this->init_queue = 0;
+  for ( i=0; i<NOUTPUTSURFACE; ++i ) {
+    st = vdp_output_surface_create( vdp_device, VDP_RGBA_FORMAT_B8G8R8A8, this->output_surface_width[i], this->output_surface_height[i], &this->output_surface[i] );
+    if ( vdpau_init_error( st, "Can't create output surface !!", &this->vo_driver, 1 ) ) {
+      int j;
+      for ( j=0; j<i; ++j )
+        vdp_output_surface_destroy( this->output_surface[j] );
+      vdp_video_surface_destroy( this->soft_surface );
+      return NULL;
+    }
+  }
+
+  this->scaling_level_max = this->scaling_level_current = 0;
+#ifdef VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L1
+  VdpBool hqscaling;
+  for ( i=0; i<9; ++i ) {
+    st = vdp_video_mixer_query_feature_support( vdp_device, VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L1 + i, &hqscaling );
+    if ( ( st != VDP_STATUS_OK ) || !hqscaling ) {
+      //printf("unsupported scaling quality=%d\n", i);
+      break;
+    }
+    else {
+      //printf("supported scaling quality=%d\n", i);
+      ++this->scaling_level_max;
+    }
+  }
+#endif
+
+  vdp_video_mixer_query_feature_support( vdp_device, VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL, &this->temporal_is_supported );
+  vdp_video_mixer_query_feature_support( vdp_device, VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL_SPATIAL, &this->temporal_spatial_is_supported );
+  vdp_video_mixer_query_feature_support( vdp_device, VDP_VIDEO_MIXER_FEATURE_NOISE_REDUCTION, &this->noise_reduction_is_supported );
+  vdp_video_mixer_query_feature_support( vdp_device, VDP_VIDEO_MIXER_FEATURE_SHARPNESS, &this->sharpness_is_supported );
+  vdp_video_mixer_query_feature_support( vdp_device, VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE, &this->inverse_telecine_is_supported );
+  vdp_video_mixer_query_attribute_support( vdp_device, VDP_VIDEO_MIXER_ATTRIBUTE_SKIP_CHROMA_DEINTERLACE, &this->skip_chroma_is_supported );
+
+  this->color_standard = VDP_COLOR_STANDARD_ITUR_BT_601;
+  this->video_mixer_chroma = chroma;
+  this->video_mixer_width = this->soft_surface_width;
+  this->video_mixer_height = this->soft_surface_height;
+  VdpVideoMixerFeature features[15];
+  int features_count = 0;
+  if ( this->noise_reduction_is_supported ) {
+    features[features_count] = VDP_VIDEO_MIXER_FEATURE_NOISE_REDUCTION;
+    ++features_count;
+  }
+  if ( this->sharpness_is_supported ) {
+    features[features_count] = VDP_VIDEO_MIXER_FEATURE_SHARPNESS;
+    ++features_count;
+  }
+  if ( this->temporal_is_supported ) {
+    features[features_count] = VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL;
+    ++features_count;
+  }
+  if ( this->temporal_spatial_is_supported ) {
+    features[features_count] = VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL_SPATIAL;
+    ++features_count;
+  }
+  if ( this->inverse_telecine_is_supported ) {
+    features[features_count] = VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE;
+    ++features_count;
+  }
+#ifdef VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L1
+  for ( i=0; i<this->scaling_level_max; ++i ) {
+    features[features_count] = VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L1 + i;
+    ++features_count;
+  }
+#endif
+  VdpVideoMixerParameter params[] = { VDP_VIDEO_MIXER_PARAMETER_VIDEO_SURFACE_WIDTH, VDP_VIDEO_MIXER_PARAMETER_VIDEO_SURFACE_HEIGHT,
+        VDP_VIDEO_MIXER_PARAMETER_CHROMA_TYPE, VDP_VIDEO_MIXER_PARAMETER_LAYERS };
+  int num_layers = 3;
+  void const *param_values[] = { &this->video_mixer_width, &this->video_mixer_height, &chroma, &num_layers };
+  st = vdp_video_mixer_create( vdp_device, features_count, features, 4, params, param_values, &this->video_mixer );
+  if ( vdpau_init_error( st, "Can't create video mixer !!", &this->vo_driver, 1 ) ) {
+    vdp_video_surface_destroy( this->soft_surface );
+    for ( i=0; i<NOUTPUTSURFACE; ++i )
+      vdp_output_surface_destroy( this->output_surface[i] );
+    return NULL;
+  }
+
+  char deinterlacers_description[1024];
+  memset( deinterlacers_description, 0, 1024 );
+  int deint_count = 0;
+  int deint_default = 0;
+  this->deinterlacers_name[deint_count] = vdpau_deinterlacer_name[0];
+  this->deinterlacers_method[deint_count] = DEINT_BOB;
+  strcat( deinterlacers_description, vdpau_deinterlacer_description[0] );
+  ++deint_count;
+  if ( this->temporal_is_supported ) {
+    this->deinterlacers_name[deint_count] = vdpau_deinterlacer_name[1];
+    this->deinterlacers_method[deint_count] = DEINT_HALF_TEMPORAL;
+    strcat( deinterlacers_description, vdpau_deinterlacer_description[1] );
+    ++deint_count;
+  }
+  if ( this->temporal_spatial_is_supported ) {
+    this->deinterlacers_name[deint_count] = vdpau_deinterlacer_name[2];
+    this->deinterlacers_method[deint_count] = DEINT_HALF_TEMPORAL_SPATIAL;
+    strcat( deinterlacers_description, vdpau_deinterlacer_description[2] );
+    ++deint_count;
+  }
+  if ( this->temporal_is_supported ) {
+    this->deinterlacers_name[deint_count] = vdpau_deinterlacer_name[3];
+    this->deinterlacers_method[deint_count] = DEINT_TEMPORAL;
+    strcat( deinterlacers_description, vdpau_deinterlacer_description[3] );
+    deint_default = deint_count;
+    ++deint_count;
+  }
+  if ( this->temporal_spatial_is_supported ) {
+    this->deinterlacers_name[deint_count] = vdpau_deinterlacer_name[4];
+    this->deinterlacers_method[deint_count] = DEINT_TEMPORAL_SPATIAL;
+    strcat( deinterlacers_description, vdpau_deinterlacer_description[4] );
+    ++deint_count;
+  }
+  this->deinterlacers_name[deint_count] = NULL;
+
+  if ( this->scaling_level_max ) {
+    this->scaling_level_current = config->register_range( config, "video.output.vdpau_scaling_quality", 0,
+           0, this->scaling_level_max, _("vdpau: Scaling Quality"),
+           _("Scaling Quality Level"),
+           10, vdpau_update_scaling_level, this );
+  }
+
+  this->deinterlace_method = config->register_enum( config, "video.output.vdpau_deinterlace_method", deint_default,
+         this->deinterlacers_name, _("vdpau: HD deinterlace method"),
+         deinterlacers_description,
+         10, vdpau_update_deinterlace_method, this );
+
+  if ( this->inverse_telecine_is_supported ) {
+    this->enable_inverse_telecine = config->register_bool( config, "video.output.vdpau_enable_inverse_telecine", 1,
+      _("vdpau: Try to recreate progressive frames from pulldown material"),
+      _("Enable this to detect bad-flagged progressive content to which\n"
+        "a 2:2 or 3:2 pulldown was applied.\n\n"),
+        10, vdpau_update_enable_inverse_telecine, this );
+  }
+
+  this->honor_progressive = config->register_bool( config, "video.output.vdpau_honor_progressive", 0,
+        _("vdpau: disable deinterlacing when progressive_frame flag is set"),
+        _("Set to true if you want to trust the progressive_frame stream's flag.\n"
+          "This flag is not always reliable.\n\n"),
+        10, vdpau_honor_progressive_flag, this );
+
+  if ( this->skip_chroma_is_supported ) {
+    this->skip_chroma = config->register_bool( config, "video.output.vdpau_skip_chroma_deinterlace", 0,
+        _("vdpau: disable advanced deinterlacers chroma filter"),
+        _("Setting to true may help if your video card isn't able to run advanced deinterlacers.\n\n"),
+        10, vdpau_set_skip_chroma, this );
+  }
+
+  /* number of video frames from config - register it with the default value. */
+  int frame_num = config->register_num (config, "engine.buffers.video_num_frames", 15, /* default */
+       _("default number of video frames"),
+       _("The default number of video frames to request "
+         "from xine video out driver. Some drivers will "
+         "override this setting with their own values."),
+      20, NULL, this);
+
+  /* now make sure we have at least 22 frames, to prevent
+   * locks with vdpau_h264 */
+  if(frame_num < 22)
+    config->update_num(config,"engine.buffers.video_num_frames",22);
+
+  this->capabilities = VO_CAP_YV12 | VO_CAP_YUY2 | VO_CAP_CROP | VO_CAP_UNSCALED_OVERLAY | VO_CAP_CUSTOM_EXTENT_OVERLAY | VO_CAP_ARGB_LAYER_OVERLAY | VO_CAP_VIDEO_WINDOW_OVERLAY;
+  ok = 0;
+  uint32_t mw, mh, ml, mr;
+  st = vdp_decoder_query_capabilities( vdp_device, VDP_DECODER_PROFILE_H264_MAIN, &ok, &ml, &mr, &mw, &mh );
+  if ( st != VDP_STATUS_OK  )
+    printf( "vo_vdpau: getting h264_supported failed! : %s\n", vdp_get_error_string( st ) );
+  else if ( !ok )
+    printf( "vo_vdpau: this hardware doesn't support h264.\n" );
+  else
+    this->capabilities |= VO_CAP_VDPAU_H264;
+
+  st = vdp_decoder_query_capabilities( vdp_device, VDP_DECODER_PROFILE_VC1_MAIN, &ok, &ml, &mr, &mw, &mh );
+  if ( st != VDP_STATUS_OK  )
+    printf( "vo_vdpau: getting vc1_supported failed! : %s\n", vdp_get_error_string( st ) );
+  else if ( !ok )
+    printf( "vo_vdpau: this hardware doesn't support vc1.\n" );
+  else
+    this->capabilities |= VO_CAP_VDPAU_VC1;
+
+  st = vdp_decoder_query_capabilities( vdp_device, VDP_DECODER_PROFILE_MPEG2_MAIN, &ok, &ml, &mr, &mw, &mh );
+  if ( st != VDP_STATUS_OK  )
+    printf( "vo_vdpau: getting mpeg12_supported failed! : %s\n", vdp_get_error_string( st ) );
+  else if ( !ok )
+    printf( "vo_vdpau: this hardware doesn't support mpeg1/2.\n" );
+  else
+    this->capabilities |= VO_CAP_VDPAU_MPEG12;
+
+  for ( i=0; i<NUM_FRAMES_BACK; i++)
+    this->back_frame[i] = NULL;
+
+  this->hue = 0;
+  this->saturation = 100;
+  this->contrast = 100;
+  this->brightness = 0;
+  this->sharpness = 0;
+  this->noise = 0;
+  this->deinterlace = 0;
+
+  this->allocated_surfaces = 0;
+
+  this->vdp_runtime_nr = 1;
+
+  return &this->vo_driver;
+}
+
+/*
+ * class functions
+ */
+
+static char* vdpau_get_identifier (video_driver_class_t *this_gen)
+{
+  return "vdpau";
+}
+
+
+
+static char* vdpau_get_description (video_driver_class_t *this_gen)
+{
+  return _("xine video output plugin using VDPAU hardware acceleration");
+}
+
+
+
+static void vdpau_dispose_class (video_driver_class_t *this_gen)
+{
+  vdpau_class_t *this = (vdpau_class_t *) this_gen;
+  free (this);
+}
+
+
+
+static void *vdpau_init_class (xine_t *xine, void *visual_gen)
+{
+  vdpau_class_t *this = (vdpau_class_t *) calloc(1, sizeof(vdpau_class_t));
+
+  this->driver_class.open_plugin     = vdpau_open_plugin;
+  this->driver_class.get_identifier  = vdpau_get_identifier;
+  this->driver_class.get_description = vdpau_get_description;
+  this->driver_class.dispose         = vdpau_dispose_class;
+  this->xine                         = xine;
+
+  return this;
+}
+
+
+
+static const vo_info_t vo_info_vdpau = {
+  11,                    /* priority    */
+  XINE_VISUAL_TYPE_X11  /* visual type */
+};
+
+
+/*
+ * exported plugin catalog entry
+ */
+
+const plugin_info_t xine_plugin_info[] EXPORTED = {
+  /* type, API, "name", version, special_info, init_function */
+  { PLUGIN_VIDEO_OUT, 21, "vdpau", XINE_VERSION_CODE, &vo_info_vdpau, vdpau_init_class },
+  { PLUGIN_NONE, 0, "", 0, NULL, NULL }
+};
+\ No newline at end of file
diff --git a/src/video_out/video_out_xv.c b/src/video_out/video_out_xv.c
index f2c47ef7e..3fb088d59 100644
--- a/src/video_out/video_out_xv.c
+++ b/src/video_out/video_out_xv.c
@@ -912,6 +912,8 @@ static int xv_set_property (vo_driver_t *this_gen,
 			    int property, int value) {
   xv_driver_t *this = (xv_driver_t *) this_gen;
 
+  printf("xv_set_property: property=%d, value=%d\n", property, value );
+
   if (this->props[property].atom != None) {
 
     /* value is out of bound */
diff --git a/src/xine-engine/accel_vdpau.h b/src/xine-engine/accel_vdpau.h
new file mode 100644
index 000000000..7ddc5e21f
--- /dev/null
+++ b/src/xine-engine/accel_vdpau.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2008 the xine project
+ *
+ * This file is part of xine, a free video player.
+ *
+ * xine is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * xine is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110, USA
+ *
+ *
+ * Common acceleration definitions for vdpau
+ *
+ *
+ */
+
+#ifndef HAVE_XINE_ACCEL_VDPAU_H
+#define HAVE_XINE_ACCEL_VDPAU_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <vdpau/vdpau.h>
+
+
+typedef struct {
+  vo_frame_t *vo_frame;
+
+  VdpDevice vdp_device;
+
+  VdpGetErrorString *vdp_get_error_string;
+  VdpDecoderCreate *vdp_decoder_create;
+  VdpDecoderDestroy *vdp_decoder_destroy;
+  VdpDecoderRender *vdp_decoder_render;
+
+  VdpVideoSurface surface;
+  VdpChromaType chroma;
+
+  VdpColorStandard color_standard;
+
+  int vdp_runtime_nr; /* this is used to keep in sync on preemptions */
+  int *current_vdp_runtime_nr;
+
+} vdpau_accel_t;
+
+#define VDPAU_DATA(frame_gen)  ((frame_gen) ? (vdpau_accel_t *)(frame_gen)->accel_data : (vdpau_accel_t *)0)
+#define VDPAU_FRAME(frame_gen) ((frame_gen) ? (vdpau_accel_t *)VDPAU_DATA(frame_gen)->vo_frame : (vdpau_frame_t *)0)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/src/xine-engine/load_plugins.c b/src/xine-engine/load_plugins.c
index 5128644da..04e0b4461 100644
--- a/src/xine-engine/load_plugins.c
+++ b/src/xine-engine/load_plugins.c
@@ -1980,7 +1980,13 @@ video_decoder_t *_x_get_video_decoder (xine_stream_t *stream, uint8_t stream_typ
 
     vd = ((video_decoder_class_t *)node->plugin_class)->open_plugin(node->plugin_class, stream);
 
-    if (vd) {
+    if (vd == 1) {
+      /* HACK: plugin failed to instantiate because required resources are unavailable at that time,
+         but may be available later, so don't remove this plugin from catalog. */
+      xprintf(stream->xine, XINE_VERBOSITY_DEBUG,
+          "load_plugins: plugin %s failed to instantiate, resources temporarily unavailable.\n", node->info->id);
+    }
+    else if (vd) {
       inc_node_ref(node);
       vd->node = node;
       xprintf(stream->xine, XINE_VERBOSITY_DEBUG,
diff --git a/src/xine-engine/osd.c b/src/xine-engine/osd.c
index af8286831..0a66efaab 100644
--- a/src/xine-engine/osd.c
+++ b/src/xine-engine/osd.c
@@ -157,14 +157,23 @@ static osd_object_t *XINE_MALLOC osd_new_object (osd_renderer_t *this, int width
   osd->next = this->osds;
   this->osds = osd;
   
+  osd->video_window_x = 0;
+  osd->video_window_y = 0;
+  osd->video_window_width = 0;
+  osd->video_window_height = 0;
+  osd->extent_width = 0;
+  osd->extent_height = 0;
   osd->width = width;
   osd->height = height;
   osd->area = calloc(width, height);
+  osd->area_touched = 0;
   
-  osd->x1 = width;
-  osd->y1 = height;
-  osd->x2 = 0;
-  osd->y2 = 0;
+  osd->x1 = osd->argb_layer.x1 = width;
+  osd->y1 = osd->argb_layer.y1 = height;
+  osd->x2 = osd->argb_layer.x2 = 0;
+  osd->y2 = osd->argb_layer.y2 = 0;
+
+  pthread_mutex_init(&osd->argb_layer.mutex, NULL);
 
   memcpy(osd->color, textpalettes_color[0], sizeof(textpalettes_color[0])); 
   memcpy(osd->trans, textpalettes_trans[0], sizeof(textpalettes_trans[0])); 
@@ -183,6 +192,31 @@ static osd_object_t *XINE_MALLOC osd_new_object (osd_renderer_t *this, int width
   return osd;
 }
 
+/*
+ * osd extent must be set to achive video resolution independent osds
+ * both sizes must be > 0 to take effect. otherwise, video resolution
+ * will still be used. the extent defines the reference coordinate
+ * system which is matched to the video output area.
+ */
+static void osd_set_extent (osd_object_t *osd, int extent_width, int extent_height) {
+
+  osd->extent_width  = extent_width;
+  osd->extent_height = extent_height;
+}
+
+/*
+ * osd video window defines an area withing osd extent where the
+ * video shall be scaled to while an osd is displayed on screen.
+ * both width and height must be > 0 to take effect.
+ */
+static void osd_set_video_window (osd_object_t *osd, int window_x, int window_y, int window_width, int window_height) {
+
+  osd->video_window_x      = window_x;
+  osd->video_window_y      = window_y;
+  osd->video_window_width  = window_width;
+  osd->video_window_height = window_height;
+}
+
 
 
 /*
@@ -242,12 +276,23 @@ static int _osd_show (osd_object_t *osd, int64_t vpts, int unscaled ) {
     this->event.object.handle = osd->handle;
 
     memset( this->event.object.overlay, 0, sizeof(*this->event.object.overlay) );
+
+    this->event.object.overlay->argb_layer = &osd->argb_layer;
+
     this->event.object.overlay->unscaled = unscaled;
     this->event.object.overlay->x = osd->display_x + osd->x1;
     this->event.object.overlay->y = osd->display_y + osd->y1;
     this->event.object.overlay->width = osd->x2 - osd->x1;
     this->event.object.overlay->height = osd->y2 - osd->y1;
  
+    this->event.object.overlay->video_window_x      = osd->video_window_x;
+    this->event.object.overlay->video_window_y      = osd->video_window_y;
+    this->event.object.overlay->video_window_width  = osd->video_window_width;
+    this->event.object.overlay->video_window_height = osd->video_window_height;
+
+    this->event.object.overlay->extent_width  = osd->extent_width;
+    this->event.object.overlay->extent_height = osd->extent_height;
+
     this->event.object.overlay->hili_top    = 0;
     this->event.object.overlay->hili_bottom = this->event.object.overlay->height;
     this->event.object.overlay->hili_left   = 0;
@@ -255,53 +300,59 @@ static int _osd_show (osd_object_t *osd, int64_t vpts, int unscaled ) {
    
     /* there will be at least that many rle objects (one for each row) */
     this->event.object.overlay->num_rle = 0;
-    /* We will never need more rle objects than columns in any row
-       Rely on lazy page allocation to avoid us actually taking up
-       this much RAM */
-    this->event.object.overlay->data_size = osd->width * osd->height;
-    rle_p = this->event.object.overlay->rle = 
-       malloc(this->event.object.overlay->data_size * sizeof(rle_elem_t) );
+    if (!osd->area_touched) {
+      /* avoid rle encoding when only argb_layer is modified */
+      this->event.object.overlay->data_size = 0;
+      rle_p = this->event.object.overlay->rle = NULL;
+    } else {
+      /* We will never need more rle objects than columns in any row
+         Rely on lazy page allocation to avoid us actually taking up
+         this much RAM */
+      this->event.object.overlay->data_size = osd->width * osd->height;
+      rle_p = this->event.object.overlay->rle =
+         malloc(this->event.object.overlay->data_size * sizeof(rle_elem_t) );
     
-    for( y = osd->y1; y < osd->y2; y++ ) {
+      for( y = osd->y1; y < osd->y2; y++ ) {
 #ifdef DEBUG_RLE      
-      lprintf("osd_show %p y = %d: ", osd, y);
+        lprintf("osd_show %p y = %d: ", osd, y);
 #endif      
-      c = osd->area + y * osd->width + osd->x1;
+        c = osd->area + y * osd->width + osd->x1;
 
-      /* initialize a rle object with the first pixel's color */
-      rle.len = 1;
-      rle.color = *c++;
+        /* initialize a rle object with the first pixel's color */
+        rle.len = 1;
+        rle.color = *c++;
 
-      /* loop over the remaining pixels in the row */
-      for( x = osd->x1 + rle.len; x < osd->x2; x++, c++ ) {
-        if( rle.color != *c ) {
+        /* loop over the remaining pixels in the row */
+        for( x = osd->x1 + rle.len; x < osd->x2; x++, c++ ) {
+          if( rle.color != *c ) {
 #ifdef DEBUG_RLE          
-          lprintf("(%d, %d), ", rle.len, rle.color);
+            lprintf("(%d, %d), ", rle.len, rle.color);
 #endif
-          *rle_p++ = rle;
-          this->event.object.overlay->num_rle++;            
+            *rle_p++ = rle;
+            this->event.object.overlay->num_rle++;
 
-          rle.color = *c;
-          rle.len = 1;
-        } else {
-          rle.len++;
+            rle.color = *c;
+            rle.len = 1;
+          } else {
+            rle.len++;
+          }
         }  
-      }
 #ifdef DEBUG_RLE
-      lprintf("(%d, %d)\n", rle.len, rle.color);
+        lprintf("(%d, %d)\n", rle.len, rle.color);
 #endif
-      *rle_p++ = rle;
-      this->event.object.overlay->num_rle++;
-    }
+        *rle_p++ = rle;
+        this->event.object.overlay->num_rle++;
+      }
 #ifdef DEBUG_RLE
-    lprintf("osd_show %p rle ends\n", osd);
+      lprintf("osd_show %p rle ends\n", osd);
 #endif
-    lprintf("num_rle = %d\n", this->event.object.overlay->num_rle);
-  
-    memcpy(this->event.object.overlay->hili_color, osd->color, sizeof(osd->color)); 
-    memcpy(this->event.object.overlay->hili_trans, osd->trans, sizeof(osd->trans)); 
-    memcpy(this->event.object.overlay->color, osd->color, sizeof(osd->color)); 
-    memcpy(this->event.object.overlay->trans, osd->trans, sizeof(osd->trans)); 
+      lprintf("num_rle = %d\n", this->event.object.overlay->num_rle);
+
+      memcpy(this->event.object.overlay->hili_color, osd->color, sizeof(osd->color));
+      memcpy(this->event.object.overlay->hili_trans, osd->trans, sizeof(osd->trans));
+      memcpy(this->event.object.overlay->color, osd->color, sizeof(osd->color));
+      memcpy(this->event.object.overlay->trans, osd->trans, sizeof(osd->trans));
+    }
   
     this->event.event_type = OVERLAY_EVENT_SHOW;
     this->event.vpts = vpts;
@@ -385,11 +436,14 @@ static int osd_hide (osd_object_t *osd, int64_t vpts) {
 static void osd_clear (osd_object_t *osd) {
   lprintf("osd=%p\n",osd);
 
-  memset(osd->area, 0, osd->width * osd->height);
-  osd->x1 = osd->width;
-  osd->y1 = osd->height;
-  osd->x2 = 0;
-  osd->y2 = 0;
+  if (osd->area_touched) {
+    osd->area_touched = 0;
+    memset(osd->area, 0, osd->width * osd->height);
+  }
+  osd->x1 = osd->argb_layer.x1 = osd->width;
+  osd->y1 = osd->argb_layer.y1 = osd->height;
+  osd->x2 = osd->argb_layer.x2 = 0;
+  osd->y2 = osd->argb_layer.y2 = 0;
 }
 
 /*
@@ -411,6 +465,7 @@ static void osd_point (osd_object_t *osd, int x, int y, int color) {
   osd->x2 = MAX(osd->x2, (x + 1));
   osd->y1 = MIN(osd->y1, y);
   osd->y2 = MAX(osd->y2, (y + 1));
+  osd->area_touched = 1;
 
   c = osd->area + y * osd->width + x;
   *c = color;
@@ -470,6 +525,7 @@ static void osd_line (osd_object_t *osd,
   osd->x2 = MAX( osd->x2, x2 );
   osd->y1 = MIN( osd->y1, y1 );
   osd->y2 = MAX( osd->y2, y2 );
+  osd->area_touched = 1;
   
   dx = x2 - x1;
   dy = y2 - y1;
@@ -583,6 +639,7 @@ static void osd_filled_rect (osd_object_t *osd,
   osd->x2 = MAX( osd->x2, dx );
   osd->y1 = MIN( osd->y1, y );
   osd->y2 = MAX( osd->y2, dy );
+  osd->area_touched = 1;
 
   dx -= x;
   dy -= y;
@@ -1143,6 +1200,7 @@ static int osd_render_text (osd_object_t *osd, int x1, int y1,
 
   if( x1 < osd->x1 ) osd->x1 = x1;
   if( y1 < osd->y1 ) osd->y1 = y1;
+  osd->area_touched = 1;
 
   inbuf = text;
   inbytesleft = strlen(text);
@@ -1483,6 +1541,7 @@ static void osd_free_object (osd_object_t *osd_to_close) {
       else
         this->osds = osd->next;
 
+      pthread_mutex_destroy(&osd->argb_layer.mutex);
       free( osd );
       break;
     }
@@ -1528,6 +1587,7 @@ static void osd_draw_bitmap(osd_object_t *osd, uint8_t *bitmap,
   osd->x2 = MAX( osd->x2, x1+width );
   osd->y1 = MIN( osd->y1, y1 );
   osd->y2 = MAX( osd->y2, y1+height );
+  osd->area_touched = 1;
 
   for( y=0; y<height; y++ ) {
     if ( palette_map ) {
@@ -1546,21 +1606,61 @@ static void osd_draw_bitmap(osd_object_t *osd, uint8_t *bitmap,
   }
 }
 
+static void osd_set_argb_buffer(osd_object_t *osd, uint32_t *argb_buffer,
+    int dirty_x, int dirty_y, int dirty_width, int dirty_height)
+{
+  if (osd->argb_layer.buffer != argb_buffer) {
+    dirty_x = 0;
+    dirty_y = 0;
+    dirty_width = osd->width;
+    dirty_height = osd->height;
+  }
+
+  /* keep osd_object clipping behavior */
+  osd->x1 = MIN( osd->x1, dirty_x );
+  osd->x2 = MAX( osd->x2, dirty_x + dirty_width );
+  osd->y1 = MIN( osd->y1, dirty_y );
+  osd->y2 = MAX( osd->y2, dirty_y + dirty_height );
+
+  pthread_mutex_lock(&osd->argb_layer.mutex);
+
+  /* argb layer update area accumulation */
+  osd->argb_layer.x1 = MIN( osd->argb_layer.x1, dirty_x );
+  osd->argb_layer.x2 = MAX( osd->argb_layer.x2, dirty_x + dirty_width );
+  osd->argb_layer.y1 = MIN( osd->argb_layer.y1, dirty_y );
+  osd->argb_layer.y2 = MAX( osd->argb_layer.y2, dirty_y + dirty_height );
+
+  osd->argb_layer.buffer = argb_buffer;
+
+  pthread_mutex_unlock(&osd->argb_layer.mutex);
+}
+
 static uint32_t osd_get_capabilities (osd_object_t *osd) {
      
   osd_renderer_t *this = osd->renderer;
   uint32_t capabilities = 0;
+  uint32_t vo_capabilities;
 
 #ifdef HAVE_FT2
   capabilities |= XINE_OSD_CAP_FREETYPE2;
 #endif
 
   this->stream->xine->port_ticket->acquire(this->stream->xine->port_ticket, 1);
-  if( this->stream->video_out->get_capabilities(this->stream->video_out) &
-      VO_CAP_UNSCALED_OVERLAY)
-    capabilities |= XINE_OSD_CAP_UNSCALED;
+  vo_capabilities = this->stream->video_out->get_capabilities(this->stream->video_out);
   this->stream->xine->port_ticket->release(this->stream->xine->port_ticket, 1);
+
+  if (vo_capabilities & VO_CAP_UNSCALED_OVERLAY)
+    capabilities |= XINE_OSD_CAP_UNSCALED;
+
+  if (vo_capabilities & VO_CAP_CUSTOM_EXTENT_OVERLAY)
+    capabilities |= XINE_OSD_CAP_CUSTOM_EXTENT;
+
+  if (vo_capabilities & VO_CAP_ARGB_LAYER_OVERLAY)
+    capabilities |= XINE_OSD_CAP_ARGB_LAYER;
  
+  if (vo_capabilities & VO_CAP_VIDEO_WINDOW_OVERLAY)
+    capabilities |= XINE_OSD_CAP_VIDEO_WINDOW;
+
   return capabilities; 
 }
 
@@ -1621,8 +1721,11 @@ osd_renderer_t *_x_osd_renderer_init( xine_stream_t *stream ) {
   this->get_text_size      = osd_get_text_size;
   this->close              = osd_renderer_close;
   this->draw_bitmap        = osd_draw_bitmap;
+  this->set_argb_buffer    = osd_set_argb_buffer;
   this->show_unscaled      = osd_show_unscaled;
   this->get_capabilities   = osd_get_capabilities;
+  this->set_extent         = osd_set_extent;
+  this->set_video_window   = osd_set_video_window;
 
   return this;
 }
diff --git a/src/xine-engine/osd.h b/src/xine-engine/osd.h
index 70193a2ea..be5e2218c 100644
--- a/src/xine-engine/osd.h
+++ b/src/xine-engine/osd.h
@@ -47,8 +47,16 @@ struct osd_object_s {
 
   int width, height;    /* work area dimentions */
   uint8_t *area;        /* work area */
+  int area_touched;     /* work area was used for painting */
   int display_x,display_y;  /* where to display it in screen */
   
+  /* video output area within osd extent */
+  int video_window_x, video_window_y;
+  int video_window_width, video_window_height;
+
+  /* extent of reference coordinate system */
+  int extent_width, extent_height;
+
   /* clipping box inside work area */
   int x1, y1;
   int x2, y2;
@@ -65,6 +73,13 @@ struct osd_object_s {
   
   osd_font_t *font;
   osd_ft2context_t *ft2;
+
+
+  /* this holds an optional ARGB overlay, which
+   * is only be used by supported video_out modules.
+   * right now this is only vdpau */
+  argb_layer_t argb_layer;
+
 };
 
 /* this one is public */
@@ -211,6 +226,35 @@ struct osd_renderer_s {
    */
   uint32_t (*get_capabilities) (osd_object_t *osd);
   
+  /*
+   * define extent of reference coordinate system for video
+   * resolution independent osds. both sizes must be > 0 to
+   * take effect. otherwise, video resolution will be used.
+   */
+  void (*set_extent) (osd_object_t *osd, int extent_width, int extent_height);
+
+  /*
+   * set an argb buffer to be blended into video
+   * the buffer must exactly match the osd dimensions
+   * and stay valid while the osd is on screen. pass
+   * a NULL pointer to safely remove the buffer from
+   * the osd layer. only the dirty area  will be
+   * updated on screen. for convinience the whole
+   * osd object will be considered dirty when setting
+   * a different buffer pointer.
+   * see also XINE_OSD_CAP_ARGB_LAYER
+   */
+  void (*set_argb_buffer) (osd_object_t *osd, uint32_t *argb_buffer,
+                           int dirty_x, int dirty_y, int dirty_width, int dirty_height);
+
+  /*
+   * osd video window defines an area withing osd extent where the
+   * video shall be scaled to while an osd is displayed on screen.
+   * both width and height must be > 0 to take effect.
+   */
+  void (*set_video_window) (osd_object_t *osd,
+                            int window_x, int window_y, int window_width, int window_height);
+
   /* private stuff */
 
   pthread_mutex_t             osd_mutex;
diff --git a/src/xine-engine/video_out.c b/src/xine-engine/video_out.c
index be062cc5d..171190bc5 100644
--- a/src/xine-engine/video_out.c
+++ b/src/xine-engine/video_out.c
@@ -930,8 +930,8 @@ static vo_frame_t *get_next_frame (vos_t *this, int64_t cur_vpts,
         img->vpts = cur_vpts;
         /* extra info of the backup is thrown away, because it is not up to date */
         _x_extra_info_reset(img->extra_info);
+        img->future_frame = NULL;
       }
-        
       return img;
 
     } else {
@@ -989,6 +989,13 @@ static vo_frame_t *get_next_frame (vos_t *this, int64_t cur_vpts,
     /*
      * remove frame from display queue and show it
      */
+
+    if ( img ) {
+      if ( img->next )
+        img->future_frame = img->next;
+      else
+        img->future_frame = NULL;
+    }
     
     img = vo_remove_from_img_buf_queue_int (this->display_img_buf_queue, 1, 0, 0, 0, 0, 0);
     pthread_mutex_unlock(&this->display_img_buf_queue->mutex);
@@ -1427,6 +1434,8 @@ static int vo_get_property (xine_video_port_t *this_gen, int property) {
     ret = this->crop_bottom;
     break;
   
+  case XINE_PARAM_VO_SHARPNESS:
+  case XINE_PARAM_VO_NOISE_REDUCTION:
   case XINE_PARAM_VO_HUE:
   case XINE_PARAM_VO_SATURATION:
   case XINE_PARAM_VO_CONTRAST:
@@ -1516,6 +1525,8 @@ static int vo_set_property (xine_video_port_t *this_gen, int property, int value
     ret = this->crop_bottom = value;
     break;
   
+  case XINE_PARAM_VO_SHARPNESS:
+  case XINE_PARAM_VO_NOISE_REDUCTION:
   case XINE_PARAM_VO_HUE:
   case XINE_PARAM_VO_SATURATION:
   case XINE_PARAM_VO_CONTRAST:
diff --git a/src/xine-engine/video_out.h b/src/xine-engine/video_out.h
index 9a2ba06c6..ae0f61407 100644
--- a/src/xine-engine/video_out.h
+++ b/src/xine-engine/video_out.h
@@ -64,6 +64,14 @@ struct vo_frame_s {
    * member functions
    */
 
+  /* Provide a copy of the frame's image in an image format already known to xine. data's member */
+  /* have already been intialized to frame's content on entry, so it's usually only necessary to */
+  /* change format and img_size. In case img is set, it will point to a memory block of suitable */
+  /* size (size has been determined by a previous call with img == NULL). img content and img_size */
+  /* must adhere to the specification of _x_get_current_frame_data(). */
+  /* Currently this is needed for all image formats except XINE_IMGFMT_YV12 and XINE_IMGFMT_YUY2. */
+  void (*proc_provide_standard_frame_data) (vo_frame_t *vo_img, xine_current_frame_data_t *data);
+
   /* Duplicate picture data and acceleration specific data of a frame. */
   /* if the image format isn't already known by Xine. Currently this is needed */
   /* For all image formats except XINE_IMGFMT_YV12 and XINE_IMGFMT_YUY2 */
@@ -145,6 +153,9 @@ struct vo_frame_s {
   
   /* displacement for overlays */
   int                       overlay_offset_x, overlay_offset_y;
+
+  /* pointer to the next frame in display order, used by some vo deint */
+  struct vo_frame_s         *future_frame;
   
   /* 
    * that part is used only by video_out.c for frame management
@@ -245,7 +256,9 @@ struct xine_video_port_s {
 #define VO_PROP_OUTPUT_HEIGHT         20 /* read-only */
 #define VO_PROP_OUTPUT_XOFFSET        21 /* read-only */
 #define VO_PROP_OUTPUT_YOFFSET        22 /* read-only */
-#define VO_NUM_PROPERTIES             23
+#define VO_PROP_SHARPNESS             24
+#define VO_PROP_NOISE_REDUCTION       25
+#define VO_NUM_PROPERTIES             26
 
 /* number of colors in the overlay palette. Currently limited to 256
    at most, because some alphablend functions use an 8-bit index into
@@ -267,6 +280,7 @@ struct xine_video_port_s {
 #define VO_PAN_SCAN_FLAG     4
 #define VO_INTERLACED_FLAG   8
 #define VO_NEW_SEQUENCE_FLAG 16 /* set after MPEG2 Sequence Header Code (used by XvMC) */
+#define VO_CHROMA_422        32 /* used by VDPAU, default is chroma_420 */
 
 /* video driver capabilities */
 #define VO_CAP_YV12                   0x00000001 /* driver can handle YUV 4:2:0 pictures */
@@ -276,6 +290,12 @@ struct xine_video_port_s {
 #define VO_CAP_UNSCALED_OVERLAY       0x00000010 /* driver can blend overlay at output resolution */
 #define VO_CAP_CROP                   0x00000020 /* driver can crop */
 #define VO_CAP_XXMC                   0x00000040 /* driver can use extended XvMC */
+#define VO_CAP_VDPAU_H264             0x00000080 /* driver can use VDPAU for H264 */
+#define VO_CAP_VDPAU_MPEG12           0x00000100 /* driver can use VDPAU for mpeg1/2 */
+#define VO_CAP_VDPAU_VC1              0x00000200 /* driver can use VDPAU for mpeg1/2 */
+#define VO_CAP_CUSTOM_EXTENT_OVERLAY  0x01000000 /* driver can blend custom extent overlay to output extent */
+#define VO_CAP_ARGB_LAYER_OVERLAY     0x02000000 /* driver supports true color overlay */
+#define VO_CAP_VIDEO_WINDOW_OVERLAY   0x04000000 /* driver can scale video to an area within overlay */
 
 
 /*
@@ -388,6 +408,14 @@ typedef struct rle_elem_s {
   uint16_t color;
 } rle_elem_t;
 
+typedef struct argb_layer_s {
+  pthread_mutex_t  mutex;
+  uint32_t        *buffer;
+  /* dirty area */
+  int x1, y1;
+  int x2, y2;
+} argb_layer_t;
+
 struct vo_overlay_s {
 
   rle_elem_t       *rle;           /* rle code buffer                  */
@@ -398,6 +426,16 @@ struct vo_overlay_s {
   int               width;         /* width of subpicture area         */
   int               height;        /* height of subpicture area        */
   
+  /* area within osd extent to scale video to */
+  int               video_window_x;
+  int               video_window_y;
+  int               video_window_width;
+  int               video_window_height;
+
+  /* extent of reference coordinate system */
+  int               extent_width;
+  int               extent_height;
+
   uint32_t          color[OVL_PALETTE_SIZE];  /* color lookup table     */
   uint8_t           trans[OVL_PALETTE_SIZE];  /* mixer key table        */
   int               rgb_clut;      /* true if clut was converted to rgb */
@@ -412,6 +450,8 @@ struct vo_overlay_s {
   int               hili_rgb_clut; /* true if clut was converted to rgb */
   
   int               unscaled;      /* true if it should be blended unscaled */
+
+  argb_layer_t     *argb_layer;
 };
 
 
diff --git a/src/xine-engine/xine.c b/src/xine-engine/xine.c
index aebbffb39..1154a4602 100644
--- a/src/xine-engine/xine.c
+++ b/src/xine-engine/xine.c
@@ -1972,6 +1972,8 @@ static int _x_get_current_frame_data (xine_stream_t *stream,
 
   stream->xine->port_ticket->acquire(stream->xine->port_ticket, 0);
   frame = stream->video_out->get_last_frame (stream->video_out);
+  if (frame)
+    frame->lock(frame);
   stream->xine->port_ticket->release(stream->xine->port_ticket, 0);
   
   if (!frame) {
@@ -2003,6 +2005,30 @@ static int _x_get_current_frame_data (xine_stream_t *stream,
 
   switch (frame->format) {
 
+  default:
+    if (frame->proc_provide_standard_frame_data) {
+      uint8_t *img = data->img;
+      size_t img_size = data->img_size;
+      data->img = 0;
+      data->img_size = 0;
+
+      /* ask frame implementation for required img buffer size */
+      frame->proc_provide_standard_frame_data(frame, data);
+      required_size = data->img_size;
+
+      data->img = img;
+      data->img_size = img_size;
+      break;
+    }
+
+    if (!data->img && !(flags & XINE_FRAME_DATA_ALLOCATE_IMG))
+      break; /* not interested in image data */
+
+    xprintf (stream->xine, XINE_VERBOSITY_DEBUG,
+	     "xine: error, snapshot function not implemented for format 0x%x\n", frame->format);
+    /* fall though and provide "green" YV12 image */
+    data->format = XINE_IMGFMT_YV12;
+
   case XINE_IMGFMT_YV12:
     required_size = frame->width * frame->height
                   + ((frame->width + 1) / 2) * ((frame->height + 1) / 2)
@@ -2015,26 +2041,21 @@ static int _x_get_current_frame_data (xine_stream_t *stream,
                   + ((frame->width + 1) / 2) * frame->height;
     break;
 
-  default:
-    if (data->img || (flags & XINE_FRAME_DATA_ALLOCATE_IMG)) {
-      xprintf (stream->xine, XINE_VERBOSITY_DEBUG, 
-	       "xine: error, snapshot function not implemented for format 0x%x\n", frame->format);
-      _x_abort ();
-    }
-
-    required_size = 0;
   }
 
   if (flags & XINE_FRAME_DATA_ALLOCATE_IMG) {
     /* return allocated buffer size */
     data->img_size = required_size;
     /* allocate img or fail */
-    if (!(data->img = calloc(1, required_size)))
+    if (!(data->img = calloc(1, required_size))) {
+      frame->free(frame);
       return 0;
+    }
   } else {
     /* fail if supplied buffer is to small */
     if (data->img && !img_size_unknown && data->img_size < required_size) {
       data->img_size = required_size;
+      frame->free(frame);
       return 0;
     }
     /* return used buffer size */
@@ -2070,11 +2091,14 @@ static int _x_get_current_frame_data (xine_stream_t *stream,
       break;
 
     default:
-      xprintf (stream->xine, XINE_VERBOSITY_DEBUG, 
-	       "xine: error, snapshot function not implemented for format 0x%x\n", frame->format);
-      _x_abort ();
+      if (frame->proc_provide_standard_frame_data)
+        frame->proc_provide_standard_frame_data(frame, data);
+      else if (!(flags & XINE_FRAME_DATA_ALLOCATE_IMG))
+        memset(data->img, 0, data->img_size);
     }
   }
+
+  frame->free(frame);
   return 1;
 }
 
diff --git a/src/xine-engine/xine_interface.c b/src/xine-engine/xine_interface.c
index 0438aedfa..c9481ab94 100644
--- a/src/xine-engine/xine_interface.c
+++ b/src/xine-engine/xine_interface.c
@@ -474,6 +474,8 @@ void xine_set_param (xine_stream_t *stream, int param, int value) {
     stream->xine->verbosity = value;
     break;
 
+  case XINE_PARAM_VO_SHARPNESS:
+  case XINE_PARAM_VO_NOISE_REDUCTION:
   case XINE_PARAM_VO_HUE:
   case XINE_PARAM_VO_SATURATION:
   case XINE_PARAM_VO_CONTRAST:
@@ -638,6 +640,8 @@ int xine_get_param (xine_stream_t *stream, int param) {
     ret = stream->xine->verbosity;
     break;
 
+  case XINE_PARAM_VO_SHARPNESS:
+  case XINE_PARAM_VO_NOISE_REDUCTION:
   case XINE_PARAM_VO_HUE:
   case XINE_PARAM_VO_SATURATION:
   case XINE_PARAM_VO_CONTRAST:
@@ -858,6 +862,20 @@ void xine_osd_draw_bitmap(xine_osd_t *this, uint8_t *bitmap,
   this->osd.renderer->draw_bitmap(&this->osd, bitmap, x1, y1, width, height, palette_map);
 }
 
+void xine_osd_set_argb_buffer(xine_osd_t *this, uint32_t *argb_buffer,
+    int dirty_x, int dirty_y, int dirty_width, int dirty_height) {
+  this->osd.renderer->set_argb_buffer(&this->osd, argb_buffer, dirty_x, dirty_y, dirty_width, dirty_height);
+}
+
+void xine_osd_set_extent(xine_osd_t *this, int extent_width, int extent_height) {
+  this->osd.renderer->set_extent(&this->osd, extent_width, extent_height);
+}
+
+void xine_osd_set_video_window(xine_osd_t *this, int window_x, int window_y, int window_width, int window_height) {
+  this->osd.renderer->set_video_window(&this->osd, window_x, window_y, window_width, window_height);
+}
+
+
 const char *const *xine_post_list_inputs(xine_post_t *this_gen) {
   post_plugin_t *this = (post_plugin_t *)this_gen;
   return this->input_ids;