/// /// @file video.c @brief Video module /// /// Copyright (c) 2009 - 2012 by Johns. All Rights Reserved. /// /// Contributor(s): /// /// License: AGPLv3 /// /// This program is free software: you can redistribute it and/or modify /// it under the terms of the GNU Affero General Public License as /// published by the Free Software Foundation, either version 3 of the /// License. /// /// This program is distributed in the hope that it will be useful, /// but WITHOUT ANY WARRANTY; without even the implied warranty of /// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the /// GNU Affero General Public License for more details. /// /// $Id$ ////////////////////////////////////////////////////////////////////////////// /// /// @defgroup Video The video module. /// /// This module contains all video rendering functions. /// /// @todo disable screen saver support /// /// Uses Xlib where it is needed for VA-API or vdpau. XCB is used for /// everything else. /// /// - X11 /// - OpenGL rendering /// - OpenGL rendering with GLX texture-from-pixmap /// - Xrender rendering /// #define USE_XLIB_XCB ///< use xlib/xcb backend #define USE_SCREENSAVER ///< support disable screensaver #define USE_AUTOCROP ///< compile auto-crop support #define USE_GRAB ///< experimental grab code #define noUSE_GLX ///< outdated GLX code #define noUSE_DOUBLEBUFFER ///< use GLX double buffers //#define USE_VAAPI ///< enable vaapi support //#define USE_VDPAU ///< enable vdpau support #define noUSE_BITMAP ///< use vdpau bitmap surface //#define AV_INFO ///< log a/v sync informations #ifndef AV_INFO_TIME #define AV_INFO_TIME (50 * 60) ///< a/v info every minute #endif #define USE_VIDEO_THREAD ///< run decoder in an own thread #include #include #include #include #include #include #include #include #include #define _(str) gettext(str) ///< gettext shortcut #define _N(str) str ///< gettext_noop shortcut #include // portable atomic_t #ifdef USE_VIDEO_THREAD #ifndef __USE_GNU #define __USE_GNU #endif #include #include #include #ifndef HAVE_PTHREAD_NAME /// only available with newer glibc #define pthread_setname_np(thread, name) #endif #endif #ifdef USE_XLIB_XCB #include #include #include #include #include //#include //#include //#include #ifdef USE_SCREENSAVER #include #include #endif //#include //#include //#include //#include #include #include #ifdef XCB_ICCCM_NUM_WM_SIZE_HINTS_ELEMENTS #include #else // compatibility hack for old xcb-util /** * @brief Action on the _NET_WM_STATE property */ typedef enum { /* Remove/unset property */ XCB_EWMH_WM_STATE_REMOVE = 0, /* Add/set property */ XCB_EWMH_WM_STATE_ADD = 1, /* Toggle property */ XCB_EWMH_WM_STATE_TOGGLE = 2 } xcb_ewmh_wm_state_action_t; #endif #endif #ifdef USE_GLX #include // For GL_COLOR_BUFFER_BIT #include // only for gluErrorString #include #endif #ifdef USE_VAAPI #include #ifdef USE_GLX #include #endif #ifndef VA_SURFACE_ATTRIB_SETTABLE /// make source compatible with old libva #define vaCreateSurfaces(d, f, w, h, s, ns, a, na) \ vaCreateSurfaces(d, w, h, f, ns, s) #endif #endif #ifdef USE_VDPAU #include #include #endif #include #include #include #include "misc.h" #include "video.h" #include "audio.h" #ifdef USE_XLIB_XCB //---------------------------------------------------------------------------- // Declarations //---------------------------------------------------------------------------- /// /// Video resolutions selector. /// typedef enum _video_resolutions_ { VideoResolution576i, ///< ...x576 interlaced VideoResolution720p, ///< ...x720 progressive VideoResolutionFake1080i, ///< 1280x1080 1440x1080 interlaced VideoResolution1080i, ///< 1920x1080 interlaced VideoResolutionMax ///< number of resolution indexs } VideoResolutions; /// /// Video deinterlace modes. /// typedef enum _video_deinterlace_modes_ { VideoDeinterlaceBob, ///< bob deinterlace VideoDeinterlaceWeave, ///< weave deinterlace VideoDeinterlaceTemporal, ///< temporal deinterlace VideoDeinterlaceTemporalSpatial, ///< temporal spatial deinterlace VideoDeinterlaceSoftBob, ///< software bob deinterlace VideoDeinterlaceSoftSpatial, ///< software spatial deinterlace } VideoDeinterlaceModes; /// /// Video scaleing modes. /// typedef enum _video_scaling_modes_ { VideoScalingNormal, ///< normal scaling VideoScalingFast, ///< fastest scaling VideoScalingHQ, ///< high quality scaling VideoScalingAnamorphic, ///< anamorphic scaling } VideoScalingModes; /// /// Video zoom modes. /// typedef enum _video_zoom_modes_ { VideoNormal, ///< normal VideoStretch, ///< stretch to all edges VideoCenterCutOut, ///< center and cut out VideoAnamorphic, ///< anamorphic scaled (unsupported) } VideoZoomModes; /// /// Video color space conversions. /// typedef enum _video_color_space_ { VideoColorSpaceNone, ///< no conversion VideoColorSpaceBt601, ///< ITU.BT-601 Y'CbCr VideoColorSpaceBt709, ///< ITU.BT-709 HDTV Y'CbCr VideoColorSpaceSmpte240 ///< SMPTE-240M Y'PbPr } VideoColorSpace; /// /// Video output module structure and typedef. /// typedef struct _video_module_ { const char *Name; ///< video output module name char Enabled; ///< flag output module enabled /// allocate new video hw decoder VideoHwDecoder *(*const NewHwDecoder)(void); void (*const DelHwDecoder) (VideoHwDecoder *); unsigned (*const GetSurface) (VideoHwDecoder *); void (*const ReleaseSurface) (VideoHwDecoder *, unsigned); enum PixelFormat (*const get_format) (VideoHwDecoder *, AVCodecContext *, const enum PixelFormat *); void (*const RenderFrame) (VideoHwDecoder *, const AVCodecContext *, const AVFrame *); void (*const SetClock) (VideoHwDecoder *, int64_t); int64_t(*const GetClock) (const VideoHwDecoder *); void (*const SetTrickSpeed) (const VideoHwDecoder *, int); uint8_t *(*const GrabOutput)(int *, int *, int *); void (*const SetBackground) (uint32_t); void (*const SetVideoMode) (void); void (*const ResetAutoCrop) (void); /// module display handler thread void (*const DisplayHandlerThread) (void); void (*const OsdClear) (void); ///< clear OSD /// draw OSD ARGB area void (*const OsdDrawARGB) (int, int, int, int, const uint8_t *); void (*const OsdInit) (int, int); ///< initialize OSD void (*const OsdExit) (void); ///< cleanup OSD int (*const Init) (const char *); ///< initialize video output module void (*const Exit) (void); ///< cleanup video output module } VideoModule; //---------------------------------------------------------------------------- // Defines //---------------------------------------------------------------------------- #define CODEC_SURFACES_MAX 31 ///< maximal of surfaces #define CODEC_SURFACES_DEFAULT (21+4) ///< default of surfaces // FIXME: video-xvba only supports 14 #define xCODEC_SURFACES_DEFAULT 14 ///< default of surfaces #define CODEC_SURFACES_MPEG2 3 ///< 1 decode, up to 2 references #define CODEC_SURFACES_MPEG4 3 ///< 1 decode, up to 2 references #define CODEC_SURFACES_H264 21 ///< 1 decode, up to 20 references #define CODEC_SURFACES_VC1 3 ///< 1 decode, up to 2 references #define VIDEO_SURFACES_MAX 4 ///< video output surfaces for queue #define OUTPUT_SURFACES_MAX 4 ///< output surfaces for flip page //---------------------------------------------------------------------------- // Variables //---------------------------------------------------------------------------- char VideoIgnoreRepeatPict; ///< disable repeat pict warning static const char *VideoDevice; ///< video output device static Display *XlibDisplay; ///< Xlib X11 display static xcb_connection_t *Connection; ///< xcb connection static xcb_colormap_t VideoColormap; ///< video colormap static xcb_window_t VideoWindow; ///< video window static uint32_t VideoBlankTick; ///< blank cursor timer static xcb_cursor_t VideoBlankCursor; ///< empty invisible cursor static int VideoWindowX; ///< video output window x coordinate static int VideoWindowY; ///< video outout window y coordinate static unsigned VideoWindowWidth; ///< video output window width static unsigned VideoWindowHeight; ///< video output window height static const VideoModule NoopModule; ///< forward definition of noop module /// selected video module static const VideoModule *VideoUsedModule = &NoopModule; static char VideoHardwareDecoder; ///< flag use hardware decoder static char VideoSurfaceModesChanged; ///< flag surface modes changed /// flag use transparent OSD. static const char VideoTransparentOsd = 1; static uint32_t VideoBackground; ///< video background color static char VideoStudioLevels; ///< flag use studio levels /// Default deinterlace mode. static VideoDeinterlaceModes VideoDeinterlace[VideoResolutionMax]; /// Default number of deinterlace surfaces static const int VideoDeinterlaceSurfaces = 4; /// Default skip chroma deinterlace flag (VDPAU only). static char VideoSkipChromaDeinterlace[VideoResolutionMax]; /// Default inverse telecine flag (VDPAU only). static char VideoInverseTelecine[VideoResolutionMax]; /// Default amount of noise reduction algorithm to apply (0 .. 1000). static int VideoDenoise[VideoResolutionMax]; /// Default amount of sharpening, or blurring, to apply (-1000 .. 1000). static int VideoSharpen[VideoResolutionMax]; /// Default cut top and bottom in pixels static int VideoCutTopBottom[VideoResolutionMax]; /// Default cut left and right in pixels static int VideoCutLeftRight[VideoResolutionMax]; /// Color space ITU-R BT.601, ITU-R BT.709, ... static const VideoColorSpace VideoColorSpaces[VideoResolutionMax] = { VideoColorSpaceBt601, VideoColorSpaceBt709, VideoColorSpaceBt709, VideoColorSpaceBt709 }; /// Default scaling mode static VideoScalingModes VideoScaling[VideoResolutionMax]; /// Default audio/video delay int VideoAudioDelay; /// Default zoom mode static VideoZoomModes Video4to3ZoomMode; static char Video60HzMode; ///< handle 60hz displays static char VideoSoftStartSync; ///< soft start sync audio/video static const int VideoSoftStartFrames = 120; ///< soft start frames static char VideoShowBlackPicture; ///< flag show black picture static xcb_atom_t WmDeleteWindowAtom; ///< WM delete message atom static xcb_atom_t NetWmState; ///< wm-state message atom static xcb_atom_t NetWmStateFullscreen; ///< fullscreen wm-state message atom #ifdef DEBUG extern uint32_t VideoSwitch; ///< ticks for channel switch #endif extern void AudioVideoReady(int64_t); ///< tell audio video is ready #ifdef USE_VIDEO_THREAD static pthread_t VideoThread; ///< video decode thread static pthread_cond_t VideoWakeupCond; ///< wakeup condition variable static pthread_mutex_t VideoMutex; ///< video condition mutex static pthread_mutex_t VideoLockMutex; ///< video lock mutex #endif static char OsdShown; ///< flag show osd static int OsdWidth; ///< osd width static int OsdHeight; ///< osd height static int OsdDirtyX; ///< osd dirty area x static int OsdDirtyY; ///< osd dirty area y static int OsdDirtyWidth; ///< osd dirty area width static int OsdDirtyHeight; ///< osd dirty area height static int64_t VideoDeltaPTS; ///< FIXME: fix pts //---------------------------------------------------------------------------- // Common Functions //---------------------------------------------------------------------------- static void VideoThreadLock(void); ///< lock video thread static void VideoThreadUnlock(void); ///< unlock video thread static void VideoThreadExit(void); ///< exit/kill video thread /// /// Update video pts. /// /// @param pts_p pointer to pts /// @param interlaced interlaced flag (frame isn't right) /// @param frame frame to display /// /// @note frame->interlaced_frame can't be used for interlace detection /// static void VideoSetPts(int64_t * pts_p, int interlaced, const AVFrame * frame) { int64_t pts; // update video clock if (*pts_p != (int64_t) AV_NOPTS_VALUE) { *pts_p += interlaced ? 40 * 90 : 20 * 90; //Info("video: %s +pts\n", Timestamp2String(*pts_p)); } //av_opt_ptr(avcodec_get_frame_class(), frame, "best_effort_timestamp"); //pts = frame->best_effort_timestamp; pts = frame->pkt_pts; if (pts == (int64_t) AV_NOPTS_VALUE || !pts) { // libav: 0.8pre didn't set pts pts = frame->pkt_dts; } // libav: sets only pkt_dts which can be 0 if (pts && pts != (int64_t) AV_NOPTS_VALUE) { // build a monotonic pts if (*pts_p != (int64_t) AV_NOPTS_VALUE) { int64_t delta; delta = pts - *pts_p; // ignore negative jumps if (delta > -600 * 90 && delta <= -40 * 90) { if (-delta > VideoDeltaPTS) { VideoDeltaPTS = -delta; Debug(4, "video: %#012" PRIx64 "->%#012" PRIx64 " delta+%4" PRId64 " pts\n", *pts_p, pts, pts - *pts_p); } return; } } else { // first new clock value AudioVideoReady(pts); } if (*pts_p != pts) { Debug(4, "video: %#012" PRIx64 "->%#012" PRIx64 " delta=%4" PRId64 " pts\n", *pts_p, pts, pts - *pts_p); *pts_p = pts; } } } /// /// Update output for new size or aspect ratio. /// /// @param input_aspect_ratio video stream aspect /// static void VideoUpdateOutput(AVRational input_aspect_ratio, int input_width, int input_height, VideoResolutions resolution, int *output_x, int *output_y, int *output_width, int *output_height, int *crop_x, int *crop_y, int *crop_width, int *crop_height) { AVRational display_aspect_ratio; if (!input_aspect_ratio.num || !input_aspect_ratio.den) { input_aspect_ratio.num = 1; input_aspect_ratio.den = 1; Debug(3, "video: aspect defaults to %d:%d\n", input_aspect_ratio.num, input_aspect_ratio.den); } av_reduce(&display_aspect_ratio.num, &display_aspect_ratio.den, input_width * input_aspect_ratio.num, input_height * input_aspect_ratio.den, 1024 * 1024); // InputWidth/Height can be zero = uninitialized if (!display_aspect_ratio.num || !display_aspect_ratio.den) { display_aspect_ratio.num = 1; display_aspect_ratio.den = 1; } Debug(3, "video: aspect %d:%d\n", display_aspect_ratio.num, display_aspect_ratio.den); *crop_x = VideoCutLeftRight[resolution]; *crop_y = VideoCutTopBottom[resolution]; *crop_width = input_width - VideoCutLeftRight[resolution] * 2; *crop_height = input_height - VideoCutTopBottom[resolution] * 2; // FIXME: store different positions for the ratios if (display_aspect_ratio.num == 4 && display_aspect_ratio.den == 3) { switch (Video4to3ZoomMode) { case VideoNormal: goto normal; case VideoStretch: goto stretch; case VideoCenterCutOut: goto center_cut_out; case VideoAnamorphic: // FIXME: rest should be done by hardware goto stretch; } } // FIXME: this overwrites user choosen output position normal: *output_x = 0; *output_y = 0; *output_width = (VideoWindowHeight * display_aspect_ratio.num) / display_aspect_ratio.den; *output_height = (VideoWindowWidth * display_aspect_ratio.den) / display_aspect_ratio.num; if ((unsigned)*output_width > VideoWindowWidth) { *output_width = VideoWindowWidth; *output_y = (VideoWindowHeight - *output_height) / 2; } else if ((unsigned)*output_height > VideoWindowHeight) { *output_height = VideoWindowHeight; *output_x = (VideoWindowWidth - *output_width) / 2; } Debug(3, "video: aspect output %dx%d+%d+%d\n", *output_width, *output_height, *output_x, *output_y); return; stretch: *output_x = 0; *output_y = 0; *output_width = VideoWindowWidth; *output_height = VideoWindowHeight; return; center_cut_out: *output_x = 0; *output_y = 0; *output_height = VideoWindowHeight; *output_width = VideoWindowWidth; *crop_width = (VideoWindowHeight * display_aspect_ratio.num) / display_aspect_ratio.den; *crop_height = (VideoWindowWidth * display_aspect_ratio.den) / display_aspect_ratio.num; // look which side must be cut if ((unsigned)*crop_width > VideoWindowWidth) { *crop_height = input_height; // adjust scaling *crop_x = ((*crop_width - (signed)VideoWindowWidth) * input_width) / (2 * VideoWindowWidth); *crop_width = input_width - *crop_x * 2; } else if ((unsigned)*crop_height > VideoWindowHeight) { *crop_width = input_width; // adjust scaling *crop_y = ((*crop_height - (signed)VideoWindowHeight) * input_height) / (2 * VideoWindowHeight); *crop_height = input_height - *crop_y * 2; } else { *crop_width = input_width; *crop_height = input_height; } Debug(3, "video: aspect crop %dx%d+%d+%d\n", *crop_width, *crop_height, *crop_x, *crop_y); return; } //---------------------------------------------------------------------------- // GLX //---------------------------------------------------------------------------- #ifdef USE_GLX static int GlxEnabled = 1; ///< use GLX static int GlxVSyncEnabled = 0; ///< enable/disable v-sync static GLXContext GlxSharedContext; ///< shared gl context static GLXContext GlxContext; ///< our gl context static XVisualInfo *GlxVisualInfo; ///< our gl visual static GLuint OsdGlTextures[2]; ///< gl texture for OSD static int OsdIndex; ///< index into OsdGlTextures /// /// GLX extension functions ///@{ #ifdef GLX_MESA_swap_control static PFNGLXSWAPINTERVALMESAPROC GlxSwapIntervalMESA; #endif #ifdef GLX_SGI_video_sync static PFNGLXGETVIDEOSYNCSGIPROC GlxGetVideoSyncSGI; #endif #ifdef GLX_SGI_swap_control static PFNGLXSWAPINTERVALSGIPROC GlxSwapIntervalSGI; #endif ///@} /// /// GLX check error. /// static void GlxCheck(void) { GLenum err; if ((err = glGetError()) != GL_NO_ERROR) { Debug(3, "video/glx: error %d '%s'\n", err, gluErrorString(err)); } } /// /// GLX check if a GLX extension is supported. /// /// @param ext extension to query /// @returns true if supported, false otherwise /// static int GlxIsExtensionSupported(const char *ext) { const char *extensions; if ((extensions = glXQueryExtensionsString(XlibDisplay, DefaultScreen(XlibDisplay)))) { const char *s; int l; s = strstr(extensions, ext); l = strlen(ext); return s && (s[l] == ' ' || s[l] == '\0'); } return 0; } #if 0 /// /// Setup GLX decoder /// /// @param decoder VA-API decoder /// void GlxSetupDecoder(VaapiDecoder * decoder) { int width; int height; int i; width = decoder->InputWidth; height = decoder->InputHeight; glEnable(GL_TEXTURE_2D); // create 2d texture glGenTextures(2, decoder->GlTexture); GlxCheck(); for (i = 0; i < 2; ++i) { glBindTexture(GL_TEXTURE_2D, decoder->GlTexture[i]); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); glPixelStorei(GL_UNPACK_ALIGNMENT, 4); glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, width, height, 0, GL_BGRA, GL_UNSIGNED_BYTE, NULL); glBindTexture(GL_TEXTURE_2D, 0); } glDisable(GL_TEXTURE_2D); GlxCheck(); } #endif /// /// Render texture. /// /// @param texture 2d texture /// static inline void GlxRenderTexture(GLuint texture, int x, int y, int width, int height) { glEnable(GL_TEXTURE_2D); glBindTexture(GL_TEXTURE_2D, texture); glColor4f(1.0f, 1.0f, 1.0f, 1.0f); // no color glBegin(GL_QUADS); { glTexCoord2f(1.0f, 1.0f); glVertex2i(x + width, y + height); glTexCoord2f(0.0f, 1.0f); glVertex2i(x, y + height); glTexCoord2f(0.0f, 0.0f); glVertex2i(x, y); glTexCoord2f(1.0f, 0.0f); glVertex2i(x + width, y); #if 0 glTexCoord2f(0.0f, 0.0f); glVertex2i(x, y); glTexCoord2f(0.0f, 1.0f); glVertex2i(x, y + height); glTexCoord2f(1.0f, 1.0f); glVertex2i(x + width, y + height); glTexCoord2f(1.0f, 0.0f); glVertex2i(x + width, y); #endif } glEnd(); glBindTexture(GL_TEXTURE_2D, 0); glDisable(GL_TEXTURE_2D); } /// /// Upload texture. /// static void GlxUploadTexture(int x, int y, int width, int height, const uint8_t * argb) { // FIXME: use other / faster uploads // ARB_pixelbuffer_object GL_PIXEL_UNPACK_BUFFER glBindBufferARB() // glMapBuffer() glUnmapBuffer() // glTexSubImage2D glEnable(GL_TEXTURE_2D); // upload 2d texture glBindTexture(GL_TEXTURE_2D, OsdGlTextures[OsdIndex]); glTexSubImage2D(GL_TEXTURE_2D, 0, x, y, width, height, GL_BGRA, GL_UNSIGNED_BYTE, argb); glBindTexture(GL_TEXTURE_2D, 0); glDisable(GL_TEXTURE_2D); } /// /// Render to glx texture. /// static void GlxRender(int osd_width, int osd_height) { static uint8_t *image; static uint8_t cycle; int x; int y; if (!OsdGlTextures[0] || !OsdGlTextures[1]) { return; } // render each frame kills performance // osd 1920 * 1080 * 4 (RGBA) * 50 (HZ) = 396 Mb/s // too big for alloca if (!image) { image = malloc(4 * osd_width * osd_height); memset(image, 0x00, 4 * osd_width * osd_height); } for (y = 0; y < osd_height; ++y) { for (x = 0; x < osd_width; ++x) { ((uint32_t *) image)[x + y * osd_width] = 0x00FFFFFF | (cycle++) << 24; } } cycle++; // FIXME: convert is for GLX texture unneeded // convert internal osd to image //GfxConvert(image, 0, 4 * osd_width); // GlxUploadTexture(0, 0, osd_width, osd_height, image); } /// /// Setup GLX window. /// static void GlxSetupWindow(xcb_window_t window, int width, int height) { uint32_t start; uint32_t end; int i; unsigned count; Debug(3, "video/glx: %s\n %x %dx%d", __FUNCTION__, window, width, height); // set glx context if (!glXMakeCurrent(XlibDisplay, window, GlxContext)) { Fatal(_("video/glx: can't make glx context current\n")); // FIXME: disable glx return; } Debug(3, "video/glx: ok\n"); #ifdef DEBUG // check if v-sync is working correct end = GetMsTicks(); for (i = 0; i < 10; ++i) { start = end; glClear(GL_COLOR_BUFFER_BIT); glXSwapBuffers(XlibDisplay, window); end = GetMsTicks(); GlxGetVideoSyncSGI(&count); Debug(3, "video/glx: %5d frame rate %dms\n", count, end - start); // nvidia can queue 5 swaps if (i > 5 && (end - start) < 15) { Warning(_("video/glx: no v-sync\n")); } } #endif // viewpoint GlxCheck(); glViewport(0, 0, width, height); glDepthRange(-1.0, 1.0); glClearColor(0.0f, 0.0f, 0.0f, 0.0f); glColor3f(1.0f, 1.0f, 1.0f); glClearDepth(1.0); GlxCheck(); glMatrixMode(GL_PROJECTION); glLoadIdentity(); glOrtho(0.0, width, height, 0.0, -1.0, 1.0); GlxCheck(); glMatrixMode(GL_MODELVIEW); glLoadIdentity(); glDisable(GL_DEPTH_TEST); // setup 2d drawing glDepthMask(GL_FALSE); glDisable(GL_CULL_FACE); #ifdef USE_DOUBLEBUFFER glDrawBuffer(GL_BACK); #else glDrawBuffer(GL_FRONT); #endif glTexEnvi(GL_TEXTURE_ENV, GL_TEXTURE_ENV_MODE, GL_MODULATE); glEnable(GL_BLEND); glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA); #ifdef DEBUG #ifdef USE_DOUBLEBUFFER glDrawBuffer(GL_FRONT); glClearColor(1.0f, 0.0f, 1.0f, 1.0f); glClear(GL_COLOR_BUFFER_BIT); glDrawBuffer(GL_BACK); #endif #endif // clear glClearColor(0.0f, 0.0f, 0.0f, 1.0f); // intial background color glClear(GL_COLOR_BUFFER_BIT); #ifdef DEBUG glClearColor(1.0f, 1.0f, 0.0f, 1.0f); // background color #endif GlxCheck(); } /// /// Initialize GLX. /// static void GlxInit(void) { static GLint visual_attr[] = { GLX_RGBA, GLX_RED_SIZE, 8, GLX_GREEN_SIZE, 8, GLX_BLUE_SIZE, 8, #ifdef USE_DOUBLEBUFFER GLX_DOUBLEBUFFER, #endif None }; XVisualInfo *vi; GLXContext context; int major; int minor; int glx_GLX_EXT_swap_control; int glx_GLX_MESA_swap_control; int glx_GLX_SGI_swap_control; int glx_GLX_SGI_video_sync; if (!glXQueryVersion(XlibDisplay, &major, &minor)) { Error(_("video/glx: no GLX support\n")); GlxEnabled = 0; return; } Info(_("video/glx: glx version %d.%d\n"), major, minor); // // check which extension are supported // glx_GLX_EXT_swap_control = GlxIsExtensionSupported("GLX_EXT_swap_control"); glx_GLX_MESA_swap_control = GlxIsExtensionSupported("GLX_MESA_swap_control"); glx_GLX_SGI_swap_control = GlxIsExtensionSupported("GLX_SGI_swap_control"); glx_GLX_SGI_video_sync = GlxIsExtensionSupported("GLX_SGI_video_sync"); #ifdef GLX_MESA_swap_control if (glx_GLX_MESA_swap_control) { GlxSwapIntervalMESA = (PFNGLXSWAPINTERVALMESAPROC) glXGetProcAddress((const GLubyte *)"glXSwapIntervalMESA"); } Debug(3, "video/glx: GlxSwapIntervalMESA=%p\n", GlxSwapIntervalMESA); #endif #ifdef GLX_SGI_swap_control if (glx_GLX_SGI_swap_control) { GlxSwapIntervalSGI = (PFNGLXSWAPINTERVALSGIPROC) glXGetProcAddress((const GLubyte *)"glXSwapIntervalSGI"); } Debug(3, "video/glx: GlxSwapIntervalSGI=%p\n", GlxSwapIntervalSGI); #endif #ifdef GLX_SGI_video_sync if (glx_GLX_SGI_video_sync) { GlxGetVideoSyncSGI = (PFNGLXGETVIDEOSYNCSGIPROC) glXGetProcAddress((const GLubyte *)"glXGetVideoSyncSGI"); } Debug(3, "video/glx: GlxGetVideoSyncSGI=%p\n", GlxGetVideoSyncSGI); #endif // glXGetVideoSyncSGI glXWaitVideoSyncSGI #if 0 // FIXME: use xcb: xcb_glx_create_context #endif // create glx context glXMakeCurrent(XlibDisplay, None, NULL); vi = glXChooseVisual(XlibDisplay, DefaultScreen(XlibDisplay), visual_attr); if (!vi) { Error(_("video/glx: can't get a RGB visual\n")); GlxEnabled = 0; return; } if (!vi->visual) { Error(_("video/glx: no valid visual found\n")); GlxEnabled = 0; return; } if (vi->bits_per_rgb < 8) { Error(_("video/glx: need atleast 8-bits per RGB\n")); GlxEnabled = 0; return; } context = glXCreateContext(XlibDisplay, vi, NULL, GL_TRUE); if (!context) { Error(_("video/glx: can't create glx context\n")); GlxEnabled = 0; return; } GlxSharedContext = context; context = glXCreateContext(XlibDisplay, vi, GlxSharedContext, GL_TRUE); if (!context) { Error(_("video/glx: can't create glx context\n")); GlxEnabled = 0; // FIXME: destroy GlxSharedContext return; } GlxContext = context; GlxVisualInfo = vi; Debug(3, "video/glx: visual %#02x depth %u\n", (unsigned)vi->visualid, vi->depth); // // query default v-sync state // if (glx_GLX_EXT_swap_control) { unsigned tmp; tmp = -1; glXQueryDrawable(XlibDisplay, DefaultRootWindow(XlibDisplay), GLX_SWAP_INTERVAL_EXT, &tmp); GlxCheck(); Debug(3, "video/glx: default v-sync is %d\n", tmp); } else { Debug(3, "video/glx: default v-sync is unknown\n"); } // // disable wait on v-sync // // FIXME: sleep before swap / busy waiting hardware // FIXME: 60hz lcd panel // FIXME: config: default, on, off #ifdef GLX_SGI_swap_control if (GlxVSyncEnabled < 0 && GlxSwapIntervalSGI) { if (GlxSwapIntervalSGI(0)) { GlxCheck(); Warning(_("video/glx: can't disable v-sync\n")); } else { Info(_("video/glx: v-sync disabled\n")); } } else #endif #ifdef GLX_MESA_swap_control if (GlxVSyncEnabled < 0 && GlxSwapIntervalMESA) { if (GlxSwapIntervalMESA(0)) { GlxCheck(); Warning(_("video/glx: can't disable v-sync\n")); } else { Info(_("video/glx: v-sync disabled\n")); } } #endif // // enable wait on v-sync // #ifdef GLX_SGI_swap_control if (GlxVSyncEnabled > 0 && GlxSwapIntervalMESA) { if (GlxSwapIntervalMESA(1)) { GlxCheck(); Warning(_("video/glx: can't enable v-sync\n")); } else { Info(_("video/glx: v-sync enabled\n")); } } else #endif #ifdef GLX_MESA_swap_control if (GlxVSyncEnabled > 0 && GlxSwapIntervalSGI) { if (GlxSwapIntervalSGI(1)) { GlxCheck(); Warning(_("video/glx: can't enable v-sync\n")); } else { Info(_("video/glx: v-sync enabled\n")); } } #endif } /// /// Cleanup GLX. /// static void GlxExit(void) { Debug(3, "video/glx: %s\n", __FUNCTION__); glFinish(); // must destroy glx if (glXGetCurrentContext() == GlxContext) { // if currently used, set to none glXMakeCurrent(XlibDisplay, None, NULL); } if (GlxSharedContext) { glXDestroyContext(XlibDisplay, GlxSharedContext); } if (GlxContext) { glXDestroyContext(XlibDisplay, GlxContext); } #if 0 if (GlxThreadContext) { glXDestroyContext(XlibDisplay, GlxThreadContext); } // FIXME: must free GlxVisualInfo #endif } #endif //---------------------------------------------------------------------------- // common functions //---------------------------------------------------------------------------- /// /// Calculate resolution group. /// /// @param width video picture raw width /// @param height video picture raw height /// @param interlace flag interlaced video picture /// /// @note interlace isn't used yet and probably wrong set by caller. /// static VideoResolutions VideoResolutionGroup(int width, int height, __attribute__ ((unused)) int interlace) { if (height <= 576) { return VideoResolution576i; } if (height <= 720) { return VideoResolution720p; } if (height < 1080) { return VideoResolutionFake1080i; } if (width < 1920) { return VideoResolutionFake1080i; } return VideoResolution1080i; } //---------------------------------------------------------------------------- // auto-crop //---------------------------------------------------------------------------- /// /// auto-crop context structure and typedef. /// typedef struct _auto_crop_ctx_ { int X1; ///< detected left border int X2; ///< detected right border int Y1; ///< detected top border int Y2; ///< detected bottom border int Count; ///< counter to delay switch int State; ///< auto-crop state (0, 14, 16) } AutoCropCtx; #ifdef USE_AUTOCROP #define YBLACK 0x20 ///< below is black #define UVBLACK 0x80 ///< around is black #define M64 UINT64_C(0x0101010101010101) ///< 64bit multiplicator /// auto-crop percent of video width to ignore logos static const int AutoCropLogoIgnore = 24; static int AutoCropInterval; ///< auto-crop check interval static int AutoCropDelay; ///< auto-crop switch delay static int AutoCropTolerance; ///< auto-crop tolerance /// /// Detect black line Y. /// /// @param data Y plane pixel data /// @param length number of pixel to check /// @param stride offset of pixels /// /// @note 8 pixel are checked at once, all values must be 8 aligned /// static int AutoCropIsBlackLineY(const uint8_t * data, int length, int stride) { int n; int o; uint64_t r; const uint64_t *p; #ifdef DEBUG if ((size_t) data & 0x7 || stride & 0x7) { abort(); } #endif p = (const uint64_t *)data; n = length; // FIXME: can remove n o = stride / 8; r = 0UL; while (--n >= 0) { r |= *p; p += o; } // below YBLACK(0x20) is black return !(r & ~((YBLACK - 1) * M64)); } /// /// Auto detect black borders and crop them. /// /// @param autocrop auto-crop variables /// @param width frame width in pixel /// @param height frame height in pixel /// @param data frame planes data (Y, U, V) /// @param pitches frame planes pitches (Y, U, V) /// /// @note FIXME: can reduce the checked range, left, right crop isn't /// used yet. /// /// @note FIXME: only Y is checked, for black. /// static void AutoCropDetect(AutoCropCtx * autocrop, int width, int height, void *data[3], uint32_t pitches[3]) { const void *data_y; unsigned length_y; int x; int y; int x1; int x2; int y1; int y2; int logo_skip; // // ignore top+bottom 6 lines and left+right 8 pixels // #define SKIP_X 8 #define SKIP_Y 6 x1 = width - 1; x2 = 0; y1 = height - 1; y2 = 0; logo_skip = SKIP_X + (((width * AutoCropLogoIgnore) / 100 + 8) / 8) * 8; data_y = data[0]; length_y = pitches[0]; // // search top // for (y = SKIP_Y; y < y1; ++y) { if (!AutoCropIsBlackLineY(data_y + logo_skip + y * length_y, (width - 2 * logo_skip) / 8, 8)) { if (y == SKIP_Y) { y = 0; } y1 = y; break; } } // // search bottom // for (y = height - SKIP_Y - 1; y > y2; --y) { if (!AutoCropIsBlackLineY(data_y + logo_skip + y * length_y, (width - 2 * logo_skip) / 8, 8)) { if (y == height - SKIP_Y - 1) { y = height - 1; } y2 = y; break; } } // // search left // for (x = SKIP_X; x < x1; x += 8) { if (!AutoCropIsBlackLineY(data_y + x + SKIP_Y * length_y, height - 2 * SKIP_Y, length_y)) { if (x == SKIP_X) { x = 0; } x1 = x; break; } } // // search right // for (x = width - SKIP_X - 8; x > x2; x -= 8) { if (!AutoCropIsBlackLineY(data_y + x + SKIP_Y * length_y, height - 2 * SKIP_Y * 8, length_y)) { if (x == width - SKIP_X - 8) { x = width - 1; } x2 = x; break; } } if (0 && (y1 > SKIP_Y || x1 > SKIP_X)) { Debug(3, "video/autocrop: top=%d bottom=%d left=%d right=%d\n", y1, y2, x1, x2); } autocrop->X1 = x1; autocrop->X2 = x2; autocrop->Y1 = y1; autocrop->Y2 = y2; } #endif //---------------------------------------------------------------------------- // software - deinterlace //---------------------------------------------------------------------------- // FIXME: move general software deinterlace functions to here. //---------------------------------------------------------------------------- // VA-API //---------------------------------------------------------------------------- #ifdef USE_VAAPI static int VaapiBuggyVdpau; ///< fix libva-driver-vdpau bugs static int VaapiBuggyIntel; ///< fix libva-driver-intel bugs static int VaapiNewIntel; ///< new libva-driver-intel driver static VADisplay *VaDisplay; ///< VA-API display static VAImage VaOsdImage = { .image_id = VA_INVALID_ID }; ///< osd VA-API image static VASubpictureID VaOsdSubpicture = VA_INVALID_ID; ///< osd VA-API subpicture static char VaapiUnscaledOsd; ///< unscaled osd supported /// VA-API decoder typedef typedef struct _vaapi_decoder_ VaapiDecoder; /// /// VA-API decoder /// struct _vaapi_decoder_ { VADisplay *VaDisplay; ///< VA-API display xcb_window_t Window; ///< output window int OutputX; ///< output window x int OutputY; ///< output window y int OutputWidth; ///< output window width int OutputHeight; ///< output window height /// flags for put surface for different resolutions groups unsigned SurfaceFlagsTable[VideoResolutionMax]; enum PixelFormat PixFmt; ///< ffmpeg frame pixfmt int WrongInterlacedWarned; ///< warning about interlace flag issued int Interlaced; ///< ffmpeg interlaced flag int TopFieldFirst; ///< ffmpeg top field displayed first VAImage DeintImages[5]; ///< deinterlace image buffers int GetPutImage; ///< flag get/put image can be used VAImage Image[1]; ///< image buffer to update surface struct vaapi_context VaapiContext[1]; ///< ffmpeg VA-API context int SurfacesNeeded; ///< number of surface to request int SurfaceUsedN; ///< number of used surfaces /// used surface ids VASurfaceID SurfacesUsed[CODEC_SURFACES_MAX]; int SurfaceFreeN; ///< number of free surfaces /// free surface ids VASurfaceID SurfacesFree[CODEC_SURFACES_MAX]; int InputWidth; ///< video input width int InputHeight; ///< video input height AVRational InputAspect; ///< video input aspect ratio VideoResolutions Resolution; ///< resolution group int CropX; ///< video crop x int CropY; ///< video crop y int CropWidth; ///< video crop width int CropHeight; ///< video crop height #ifdef USE_AUTOCROP AutoCropCtx AutoCrop[1]; ///< auto-crop variables #endif #ifdef USE_GLX GLuint GlTexture[2]; ///< gl texture for VA-API void *GlxSurface[2]; ///< VA-API/GLX surface #endif VASurfaceID BlackSurface; ///< empty black surface /// video surface ring buffer VASurfaceID SurfacesRb[VIDEO_SURFACES_MAX]; int SurfaceWrite; ///< write pointer int SurfaceRead; ///< read pointer atomic_t SurfacesFilled; ///< how many of the buffer is used int SurfaceField; ///< current displayed field int TrickSpeed; ///< current trick speed int TrickCounter; ///< current trick speed counter struct timespec FrameTime; ///< time of last display int Closing; ///< flag about closing current stream int64_t PTS; ///< video PTS clock int SyncCounter; ///< counter to sync frames int FramesDuped; ///< number of frames duplicated int FramesMissed; ///< number of frames missed int FramesDropped; ///< number of frames dropped int FrameCounter; ///< number of frames decoded int FramesDisplayed; ///< number of frames displayed }; static VaapiDecoder *VaapiDecoders[1]; ///< open decoder streams static int VaapiDecoderN; ///< number of decoder streams /// forward display back surface static void VaapiBlackSurface(VaapiDecoder *); /// forward destroy deinterlace images static void VaapiDestroyDeinterlaceImages(VaapiDecoder *); /// forward definition release surface static void VaapiReleaseSurface(VaapiDecoder *, VASurfaceID); //---------------------------------------------------------------------------- // VA-API Functions //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- /// /// Output video messages. /// /// Reduce output. /// /// @param level message level (Error, Warning, Info, Debug, ...) /// @param format printf format string (NULL to flush messages) /// @param ... printf arguments /// /// @returns true, if message shown /// static int VaapiMessage(int level, const char *format, ...) { if (SysLogLevel > level || DebugLevel > level) { static const char *last_format; static char buf[256]; va_list ap; va_start(ap, format); if (format != last_format) { // don't repeat same message last_format = format; if (buf[0]) { // print last repeated message syslog(LOG_ERR, "%s", buf); buf[0] = '\0'; } if (format) { vsyslog(LOG_ERR, format, ap); } va_end(ap); return 1; } vsnprintf(buf, sizeof(buf), format, ap); va_end(ap); } return 0; } // Surfaces ------------------------------------------------------------- /// /// Associate OSD with surface. /// /// @param decoder VA-API decoder /// static void VaapiAssociate(VaapiDecoder * decoder) { int x; int y; int w; int h; if (VaOsdSubpicture == VA_INVALID_ID) { Warning(_("video/vaapi: no osd subpicture yet\n")); return; } x = 0; y = 0; w = VaOsdImage.width; h = VaOsdImage.height; // FIXME: associate only if osd is displayed if (VaapiUnscaledOsd) { if (decoder->SurfaceFreeN && vaAssociateSubpicture(VaDisplay, VaOsdSubpicture, decoder->SurfacesFree, decoder->SurfaceFreeN, x, y, w, h, 0, 0, VideoWindowWidth, VideoWindowHeight, VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't associate subpicture\n")); } if (decoder->SurfaceUsedN && vaAssociateSubpicture(VaDisplay, VaOsdSubpicture, decoder->SurfacesUsed, decoder->SurfaceUsedN, x, y, w, h, 0, 0, VideoWindowWidth, VideoWindowHeight, VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't associate subpicture\n")); } } else { if (decoder->SurfaceFreeN && vaAssociateSubpicture(VaDisplay, VaOsdSubpicture, decoder->SurfacesFree, decoder->SurfaceFreeN, x, y, w, h, decoder->CropX, decoder->CropY / 2, decoder->CropWidth, decoder->CropHeight, 0) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't associate subpicture\n")); } if (decoder->SurfaceUsedN && vaAssociateSubpicture(VaDisplay, VaOsdSubpicture, decoder->SurfacesUsed, decoder->SurfaceUsedN, x, y, w, h, decoder->CropX, decoder->CropY / 2, decoder->CropWidth, decoder->CropHeight, 0) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't associate subpicture\n")); } } } /// /// Deassociate OSD with surface. /// /// @param decoder VA-API decoder /// static void VaapiDeassociate(VaapiDecoder * decoder) { if (VaOsdSubpicture != VA_INVALID_ID) { if (decoder->SurfaceFreeN && vaDeassociateSubpicture(VaDisplay, VaOsdSubpicture, decoder->SurfacesFree, decoder->SurfaceFreeN) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't deassociate %d surfaces\n"), decoder->SurfaceFreeN); } if (decoder->SurfaceUsedN && vaDeassociateSubpicture(VaDisplay, VaOsdSubpicture, decoder->SurfacesUsed, decoder->SurfaceUsedN) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't deassociate %d surfaces\n"), decoder->SurfaceUsedN); } } } /// /// Create surfaces for VA-API decoder. /// /// @param decoder VA-API decoder /// @param width surface source/video width /// @param height surface source/video height /// static void VaapiCreateSurfaces(VaapiDecoder * decoder, int width, int height) { #ifdef DEBUG if (!decoder->SurfacesNeeded) { Error(_("video/vaapi: surface needed not set\n")); decoder->SurfacesNeeded = 3 + VIDEO_SURFACES_MAX; } #endif Debug(3, "video/vaapi: %s: %dx%d * %d\n", __FUNCTION__, width, height, decoder->SurfacesNeeded); decoder->SurfaceFreeN = decoder->SurfacesNeeded; // VA_RT_FORMAT_YUV420 VA_RT_FORMAT_YUV422 VA_RT_FORMAT_YUV444 if (vaCreateSurfaces(decoder->VaDisplay, VA_RT_FORMAT_YUV420, width, height, decoder->SurfacesFree, decoder->SurfaceFreeN, NULL, 0) != VA_STATUS_SUCCESS) { Fatal(_("video/vaapi: can't create %d surfaces\n"), decoder->SurfaceFreeN); // FIXME: write error handler / fallback } } /// /// Destroy surfaces of VA-API decoder. /// /// @param decoder VA-API decoder /// static void VaapiDestroySurfaces(VaapiDecoder * decoder) { Debug(3, "video/vaapi: %s:\n", __FUNCTION__); // // update OSD associate // VaapiDeassociate(decoder); if (vaDestroySurfaces(decoder->VaDisplay, decoder->SurfacesFree, decoder->SurfaceFreeN) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy %d surfaces\n"), decoder->SurfaceFreeN); } decoder->SurfaceFreeN = 0; if (vaDestroySurfaces(decoder->VaDisplay, decoder->SurfacesUsed, decoder->SurfaceUsedN) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy %d surfaces\n"), decoder->SurfaceUsedN); } decoder->SurfaceUsedN = 0; // FIXME surfaces used for output } /// /// Get a free surface. /// /// @param decoder VA-API decoder /// /// @returns the oldest free surface /// static VASurfaceID VaapiGetSurface(VaapiDecoder * decoder) { VASurfaceID surface; VASurfaceStatus status; int i; // try to use oldest surface for (i = 0; i < decoder->SurfaceFreeN; ++i) { surface = decoder->SurfacesFree[i]; if (vaQuerySurfaceStatus(decoder->VaDisplay, surface, &status) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaQuerySurface failed\n")); status = VASurfaceReady; } // surface still in use, try next if (status != VASurfaceReady) { Debug(4, "video/vaapi: surface %#010x not ready: %d\n", surface, status); if (!VaapiBuggyVdpau || i < 1) { continue; } usleep(1 * 1000); } // copy remaining surfaces down decoder->SurfaceFreeN--; for (; i < decoder->SurfaceFreeN; ++i) { decoder->SurfacesFree[i] = decoder->SurfacesFree[i + 1]; } decoder->SurfacesFree[i] = VA_INVALID_ID; // save as used decoder->SurfacesUsed[decoder->SurfaceUsedN++] = surface; return surface; } Error(_("video/vaapi: out of surfaces\n")); return VA_INVALID_ID; } /// /// Release a surface. /// /// @param decoder VA-API decoder /// @param surface surface no longer used /// static void VaapiReleaseSurface(VaapiDecoder * decoder, VASurfaceID surface) { int i; for (i = 0; i < decoder->SurfaceUsedN; ++i) { if (decoder->SurfacesUsed[i] == surface) { // no problem, with last used decoder->SurfacesUsed[i] = decoder->SurfacesUsed[--decoder->SurfaceUsedN]; decoder->SurfacesFree[decoder->SurfaceFreeN++] = surface; return; } } Error(_("video/vaapi: release surface %#010x, which is not in use\n"), surface); } // Init/Exit ------------------------------------------------------------ /// /// Debug VA-API decoder frames drop... /// /// @param decoder video hardware decoder /// static void VaapiPrintFrames(const VaapiDecoder * decoder) { Debug(3, "video/vaapi: %d missed, %d duped, %d dropped frames of %d,%d\n", decoder->FramesMissed, decoder->FramesDuped, decoder->FramesDropped, decoder->FrameCounter, decoder->FramesDisplayed); #ifndef DEBUG (void)decoder; #endif } /// /// Initialize surface flags. /// /// @param decoder video hardware decoder /// static void VaapiInitSurfaceFlags(VaapiDecoder * decoder) { int i; for (i = 0; i < VideoResolutionMax; ++i) { decoder->SurfaceFlagsTable[i] = VA_CLEAR_DRAWABLE; // color space conversion none, ITU-R BT.601, ITU-R BT.709, ... switch (VideoColorSpaces[i]) { case VideoColorSpaceNone: break; case VideoColorSpaceBt601: decoder->SurfaceFlagsTable[i] |= VA_SRC_BT601; break; case VideoColorSpaceBt709: decoder->SurfaceFlagsTable[i] |= VA_SRC_BT709; break; case VideoColorSpaceSmpte240: decoder->SurfaceFlagsTable[i] |= VA_SRC_SMPTE_240; break; } // scaling flags FAST, HQ, NL_ANAMORPHIC switch (VideoScaling[i]) { case VideoScalingNormal: decoder->SurfaceFlagsTable[i] |= VA_FILTER_SCALING_DEFAULT; break; case VideoScalingFast: decoder->SurfaceFlagsTable[i] |= VA_FILTER_SCALING_FAST; break; case VideoScalingHQ: // vdpau backend supports only VA_FILTER_SCALING_HQ // vdpau backend with advanced deinterlacer and my GT-210 // is too slow decoder->SurfaceFlagsTable[i] |= VA_FILTER_SCALING_HQ; break; case VideoScalingAnamorphic: // intel backend supports only VA_FILTER_SCALING_NL_ANAMORPHIC; // FIXME: Highlevel should display 4:3 as 16:9 to support this decoder->SurfaceFlagsTable[i] |= VA_FILTER_SCALING_NL_ANAMORPHIC; break; } // deinterlace flags (not yet supported by libva) switch (VideoDeinterlace[i]) { case VideoDeinterlaceBob: break; case VideoDeinterlaceWeave: break; case VideoDeinterlaceTemporal: //FIXME: private hack //decoder->SurfaceFlagsTable[i] |= 0x00002000; break; case VideoDeinterlaceTemporalSpatial: //FIXME: private hack //decoder->SurfaceFlagsTable[i] |= 0x00006000; break; default: break; } } } /// /// Allocate new VA-API decoder. /// /// @returns a new prepared VA-API hardware decoder. /// static VaapiDecoder *VaapiNewHwDecoder(void) { VaapiDecoder *decoder; int i; if (VaapiDecoderN == 1) { Fatal(_("video/vaapi: out of decoders\n")); } if (!(decoder = calloc(1, sizeof(*decoder)))) { Fatal(_("video/vaapi: out of memory\n")); } decoder->VaDisplay = VaDisplay; decoder->Window = VideoWindow; VaapiInitSurfaceFlags(decoder); decoder->DeintImages[0].image_id = VA_INVALID_ID; decoder->DeintImages[1].image_id = VA_INVALID_ID; decoder->DeintImages[2].image_id = VA_INVALID_ID; decoder->DeintImages[3].image_id = VA_INVALID_ID; decoder->DeintImages[4].image_id = VA_INVALID_ID; decoder->Image->image_id = VA_INVALID_ID; for (i = 0; i < CODEC_SURFACES_MAX; ++i) { decoder->SurfacesUsed[i] = VA_INVALID_ID; decoder->SurfacesFree[i] = VA_INVALID_ID; } // setup video surface ring buffer atomic_set(&decoder->SurfacesFilled, 0); for (i = 0; i < VIDEO_SURFACES_MAX; ++i) { decoder->SurfacesRb[i] = VA_INVALID_ID; } decoder->BlackSurface = VA_INVALID_ID; // // Setup ffmpeg vaapi context // decoder->VaapiContext->display = VaDisplay; decoder->VaapiContext->config_id = VA_INVALID_ID; decoder->VaapiContext->context_id = VA_INVALID_ID; #ifdef USE_GLX decoder->GlxSurface[0] = VA_INVALID_ID; decoder->GlxSurface[1] = VA_INVALID_ID; if (GlxEnabled) { // FIXME: create GLX context here } #endif decoder->OutputWidth = VideoWindowWidth; decoder->OutputHeight = VideoWindowHeight; decoder->PTS = AV_NOPTS_VALUE; // get/put still not working //decoder->GetPutImage = !VaapiBuggyIntel || VaapiNewIntel; decoder->GetPutImage = !VaapiBuggyIntel; VaapiDecoders[VaapiDecoderN++] = decoder; return decoder; } /// /// Cleanup VA-API. /// /// @param decoder va-api hw decoder /// static void VaapiCleanup(VaapiDecoder * decoder) { int filled; VASurfaceID surface; int i; // flush output queue, only 1-2 frames buffered, no big loss while ((filled = atomic_read(&decoder->SurfacesFilled))) { decoder->SurfaceRead = (decoder->SurfaceRead + 1) % VIDEO_SURFACES_MAX; atomic_dec(&decoder->SurfacesFilled); surface = decoder->SurfacesRb[decoder->SurfaceRead]; if (surface == VA_INVALID_ID) { Error(_("video/vaapi: invalid surface in ringbuffer\n")); continue; } // can crash and hang if (0 && vaSyncSurface(decoder->VaDisplay, surface) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaSyncSurface failed\n")); } } if (decoder->SurfaceRead != decoder->SurfaceWrite) { abort(); } // clear ring buffer for (i = 0; i < VIDEO_SURFACES_MAX; ++i) { decoder->SurfacesRb[i] = VA_INVALID_ID; } decoder->WrongInterlacedWarned = 0; // cleanup image if (decoder->Image->image_id != VA_INVALID_ID) { if (vaDestroyImage(VaDisplay, decoder->Image->image_id) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy image!\n")); } decoder->Image->image_id = VA_INVALID_ID; } // cleanup context and config if (decoder->VaapiContext) { if (decoder->VaapiContext->context_id != VA_INVALID_ID) { if (vaDestroyContext(VaDisplay, decoder->VaapiContext->context_id) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy context!\n")); } decoder->VaapiContext->context_id = VA_INVALID_ID; } if (decoder->VaapiContext->config_id != VA_INVALID_ID) { if (vaDestroyConfig(VaDisplay, decoder->VaapiContext->config_id) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy config!\n")); } decoder->VaapiContext->config_id = VA_INVALID_ID; } } // cleanup surfaces if (decoder->SurfaceFreeN || decoder->SurfaceUsedN) { VaapiDestroySurfaces(decoder); } // cleanup images if (decoder->DeintImages[0].image_id != VA_INVALID_ID) { VaapiDestroyDeinterlaceImages(decoder); } decoder->SurfaceRead = 0; decoder->SurfaceWrite = 0; decoder->SurfaceField = 0; decoder->SyncCounter = 0; decoder->FrameCounter = 0; decoder->FramesDisplayed = 0; decoder->Closing = 0; decoder->PTS = AV_NOPTS_VALUE; VideoDeltaPTS = 0; } /// /// Destroy a VA-API decoder. /// /// @param decoder VA-API decoder /// static void VaapiDelHwDecoder(VaapiDecoder * decoder) { int i; for (i = 0; i < VaapiDecoderN; ++i) { if (VaapiDecoders[i] == decoder) { VaapiDecoders[i] = NULL; VaapiDecoderN--; // FIXME: must copy last slot into empty slot and -- break; } } VaapiCleanup(decoder); if (decoder->BlackSurface != VA_INVALID_ID) { // // update OSD associate // if (VaOsdSubpicture != VA_INVALID_ID) { if (vaDeassociateSubpicture(VaDisplay, VaOsdSubpicture, &decoder->BlackSurface, 1) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't deassociate black surfaces\n")); } } if (vaDestroySurfaces(decoder->VaDisplay, &decoder->BlackSurface, 1) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy a surface\n")); } } #ifdef USE_GLX if (decoder->GlxSurface[0] != VA_INVALID_ID) { if (vaDestroySurfaceGLX(VaDisplay, decoder->GlxSurface[0]) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy glx surface!\n")); } } if (decoder->GlxSurface[1] != VA_INVALID_ID) { if (vaDestroySurfaceGLX(VaDisplay, decoder->GlxSurface[1]) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy glx surface!\n")); } } if (decoder->GlTexture[0]) { glDeleteTextures(2, decoder->GlTexture); } #endif VaapiPrintFrames(decoder); free(decoder); } #ifdef DEBUG // currently unused, keep it for later static VAProfile VaapiFindProfile(const VAProfile * profiles, unsigned n, VAProfile profile); static VAEntrypoint VaapiFindEntrypoint(const VAEntrypoint * entrypoints, unsigned n, VAEntrypoint entrypoint); /// /// 1080i /// static void Vaapi1080i(void) { VAProfile profiles[vaMaxNumProfiles(VaDisplay)]; int profile_n; VAEntrypoint entrypoints[vaMaxNumEntrypoints(VaDisplay)]; int entrypoint_n; int p; int e; VAConfigAttrib attrib; VAConfigID config_id; VAContextID context_id; VASurfaceID surfaces[32]; VAImage image[1]; int n; uint32_t start_tick; uint32_t tick; p = -1; e = -1; // prepare va-api profiles if (vaQueryConfigProfiles(VaDisplay, profiles, &profile_n)) { Error(_("codec: vaQueryConfigProfiles failed")); return; } // check profile p = VaapiFindProfile(profiles, profile_n, VAProfileH264High); if (p == -1) { Debug(3, "\tno profile found\n"); return; } // prepare va-api entry points if (vaQueryConfigEntrypoints(VaDisplay, p, entrypoints, &entrypoint_n)) { Error(_("codec: vaQueryConfigEntrypoints failed")); return; } e = VaapiFindEntrypoint(entrypoints, entrypoint_n, VAEntrypointVLD); if (e == -1) { Warning(_("codec: unsupported: slow path\n")); return; } memset(&attrib, 0, sizeof(attrib)); attrib.type = VAConfigAttribRTFormat; attrib.value = VA_RT_FORMAT_YUV420; // create a configuration for the decode pipeline if (vaCreateConfig(VaDisplay, p, e, &attrib, 1, &config_id)) { Error(_("codec: can't create config")); return; } if (vaCreateSurfaces(VaDisplay, VA_RT_FORMAT_YUV420, 1920, 1080, surfaces, 32, NULL, 0) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't create surfaces\n")); return; } // bind surfaces to context if (vaCreateContext(VaDisplay, config_id, 1920, 1080, VA_PROGRESSIVE, surfaces, 32, &context_id)) { Error(_("codec: can't create context")); return; } #if 1 // without this 1080i will crash image->image_id = VA_INVALID_ID; if (vaDeriveImage(VaDisplay, surfaces[0], image) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaDeriveImage failed\n")); } if (image->image_id != VA_INVALID_ID) { if (vaDestroyImage(VaDisplay, image->image_id) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy image!\n")); } } #else vaBeginPicture(VaDisplay, context_id, surfaces[0]); vaRenderPicture(VaDisplay, context_id, NULL, 0); // aborts without valid buffers upload vaEndPicture(VaDisplay, context_id); #endif start_tick = GetMsTicks(); for (n = 1; n < 2; ++n) { if (vaPutSurface(VaDisplay, surfaces[0], VideoWindow, // decoder src 0, 0, 1920, 1080, // video dst 0, 0, 1920, 1080, NULL, 0, VA_TOP_FIELD | VA_CLEAR_DRAWABLE) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaPutSurface failed\n")); } if (vaPutSurface(VaDisplay, surfaces[0], VideoWindow, // decoder src 0, 0, 1920, 1080, // video dst 0, 0, 1920, 1080, NULL, 0, VA_BOTTOM_FIELD | VA_CLEAR_DRAWABLE) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaPutSurface failed\n")); } tick = GetMsTicks(); if (!(n % 10)) { fprintf(stderr, "%dms / frame\n", (tick - start_tick) / n); } } // destory the stuff. if (vaDestroyContext(VaDisplay, context_id) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy context!\n")); } if (vaDestroySurfaces(VaDisplay, surfaces, 32) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy surfaces\n")); } if (vaDestroyConfig(VaDisplay, config_id) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy config!\n")); } fprintf(stderr, "done\n"); } #endif /// /// VA-API setup. /// /// @param display_name x11/xcb display name /// /// @returns true if VA-API could be initialized, false otherwise. /// static int VaapiInit(const char *display_name) { int major; int minor; VADisplayAttribute attr; const char *s; VaOsdImage.image_id = VA_INVALID_ID; VaOsdSubpicture = VA_INVALID_ID; #ifdef USE_GLX if (GlxEnabled) { // support glx VaDisplay = vaGetDisplayGLX(XlibDisplay); } else #endif { VaDisplay = vaGetDisplay(XlibDisplay); } if (!VaDisplay) { Error(_("video/vaapi: Can't connect VA-API to X11 server on '%s'\n"), display_name); return 0; } if (vaInitialize(VaDisplay, &major, &minor) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: Can't inititialize VA-API on '%s'\n"), display_name); vaTerminate(VaDisplay); VaDisplay = NULL; return 0; } s = vaQueryVendorString(VaDisplay); Info(_("video/vaapi: libva %d.%d (%s) initialized\n"), major, minor, s); // // Setup fixes for driver bugs. // if (strstr(s, "VDPAU")) { Info(_("video/vaapi: use vdpau bug workaround\n")); setenv("VDPAU_VIDEO_PUTSURFACE_FAST", "0", 0); VaapiBuggyVdpau = 1; } if (strstr(s, "Intel i965")) { VaapiBuggyIntel = 1; } if (strstr(s, "Intel i965 driver - 1.0.16.")) { VaapiNewIntel = 1; } // // check which attributes are supported // attr.type = VADisplayAttribBackgroundColor; attr.flags = VA_DISPLAY_ATTRIB_SETTABLE; if (vaGetDisplayAttributes(VaDisplay, &attr, 1) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: Can't get background-color attribute\n")); attr.value = 1; } Info(_("video/vaapi: background-color is %s\n"), attr.value ? _("supported") : _("unsupported")); // FIXME: VaapiSetBackground(VideoBackground); #if 0 // // check the chroma format // attr.type = VAConfigAttribRTFormat attr.flags = VA_DISPLAY_ATTRIB_GETTABLE; Vaapi1080i(); #endif return 1; } /// /// VA-API cleanup /// static void VaapiExit(void) { int i; // FIXME: more VA-API cleanups... // FIXME: can hang with vdpau in pthread_rwlock_wrlock for (i = 0; i < VaapiDecoderN; ++i) { if (VaapiDecoders[i]) { VaapiDelHwDecoder(VaapiDecoders[i]); VaapiDecoders[i] = NULL; } } VaapiDecoderN = 0; if (!VaDisplay) { vaTerminate(VaDisplay); VaDisplay = NULL; } } //---------------------------------------------------------------------------- /// /// Update output for new size or aspect ratio. /// /// @param decoder VA-API decoder /// static void VaapiUpdateOutput(VaapiDecoder * decoder) { VideoUpdateOutput(decoder->InputAspect, decoder->InputWidth, decoder->InputHeight, decoder->Resolution, &decoder->OutputX, &decoder->OutputY, &decoder->OutputWidth, &decoder->OutputHeight, &decoder->CropX, &decoder->CropY, &decoder->CropWidth, &decoder->CropHeight); #ifdef USE_AUTOCROP decoder->AutoCrop->State = 0; decoder->AutoCrop->Count = AutoCropDelay; #endif } /// /// Find VA-API profile. /// /// Check if the requested profile is supported by VA-API. /// /// @param profiles a table of all supported profiles /// @param n number of supported profiles /// @param profile requested profile /// /// @returns the profile if supported, -1 if unsupported. /// static VAProfile VaapiFindProfile(const VAProfile * profiles, unsigned n, VAProfile profile) { unsigned u; for (u = 0; u < n; ++u) { if (profiles[u] == profile) { return profile; } } return -1; } /// /// Find VA-API entry point. /// /// Check if the requested entry point is supported by VA-API. /// /// @param entrypoints a table of all supported entrypoints /// @param n number of supported entrypoints /// @param entrypoint requested entrypoint /// /// @returns the entry point if supported, -1 if unsupported. /// static VAEntrypoint VaapiFindEntrypoint(const VAEntrypoint * entrypoints, unsigned n, VAEntrypoint entrypoint) { unsigned u; for (u = 0; u < n; ++u) { if (entrypoints[u] == entrypoint) { return entrypoint; } } return -1; } /// /// Callback to negotiate the PixelFormat. /// /// @param fmt is the list of formats which are supported by the codec, /// it is terminated by -1 as 0 is a valid format, the /// formats are ordered by quality. /// /// @note + 2 surface for software deinterlace /// static enum PixelFormat Vaapi_get_format(VaapiDecoder * decoder, AVCodecContext * video_ctx, const enum PixelFormat *fmt) { const enum PixelFormat *fmt_idx; VAProfile profiles[vaMaxNumProfiles(VaDisplay)]; int profile_n; VAEntrypoint entrypoints[vaMaxNumEntrypoints(VaDisplay)]; int entrypoint_n; int p; int e; VAConfigAttrib attrib; Debug(3, "video: new stream format %dms\n", GetMsTicks() - VideoSwitch); // create initial black surface and display VaapiBlackSurface(decoder); // cleanup last context VaapiCleanup(decoder); if (!VideoHardwareDecoder || (video_ctx->codec_id == CODEC_ID_MPEG2VIDEO && VideoHardwareDecoder == 1) ) { // hardware disabled by config Debug(3, "codec: hardware acceleration disabled\n"); goto slow_path; } p = -1; e = -1; // prepare va-api profiles if (vaQueryConfigProfiles(VaDisplay, profiles, &profile_n)) { Error(_("codec: vaQueryConfigProfiles failed")); goto slow_path; } Debug(3, "codec: %d profiles\n", profile_n); // check profile switch (video_ctx->codec_id) { case CODEC_ID_MPEG2VIDEO: decoder->SurfacesNeeded = CODEC_SURFACES_MPEG2 + VIDEO_SURFACES_MAX + 2; p = VaapiFindProfile(profiles, profile_n, VAProfileMPEG2Main); break; case CODEC_ID_MPEG4: case CODEC_ID_H263: decoder->SurfacesNeeded = CODEC_SURFACES_MPEG4 + VIDEO_SURFACES_MAX + 2; p = VaapiFindProfile(profiles, profile_n, VAProfileMPEG4AdvancedSimple); break; case CODEC_ID_H264: decoder->SurfacesNeeded = CODEC_SURFACES_H264 + VIDEO_SURFACES_MAX + 2; // try more simple formats, fallback to better if (video_ctx->profile == FF_PROFILE_H264_BASELINE) { p = VaapiFindProfile(profiles, profile_n, VAProfileH264Baseline); if (p == -1) { p = VaapiFindProfile(profiles, profile_n, VAProfileH264Main); } } else if (video_ctx->profile == FF_PROFILE_H264_MAIN) { p = VaapiFindProfile(profiles, profile_n, VAProfileH264Main); } if (p == -1) { p = VaapiFindProfile(profiles, profile_n, VAProfileH264High); } break; case CODEC_ID_WMV3: decoder->SurfacesNeeded = CODEC_SURFACES_VC1 + VIDEO_SURFACES_MAX + 2; p = VaapiFindProfile(profiles, profile_n, VAProfileVC1Main); break; case CODEC_ID_VC1: decoder->SurfacesNeeded = CODEC_SURFACES_VC1 + VIDEO_SURFACES_MAX + 2; p = VaapiFindProfile(profiles, profile_n, VAProfileVC1Advanced); break; default: goto slow_path; } if (p == -1) { Debug(3, "\tno profile found\n"); goto slow_path; } Debug(3, "\tprofile %d\n", p); // prepare va-api entry points if (vaQueryConfigEntrypoints(VaDisplay, p, entrypoints, &entrypoint_n)) { Error(_("codec: vaQueryConfigEntrypoints failed")); goto slow_path; } Debug(3, "codec: %d entrypoints\n", entrypoint_n); // look through formats for (fmt_idx = fmt; *fmt_idx != PIX_FMT_NONE; fmt_idx++) { Debug(3, "\t%#010x %s\n", *fmt_idx, av_get_pix_fmt_name(*fmt_idx)); // check supported pixel format with entry point switch (*fmt_idx) { case PIX_FMT_VAAPI_VLD: e = VaapiFindEntrypoint(entrypoints, entrypoint_n, VAEntrypointVLD); break; case PIX_FMT_VAAPI_MOCO: case PIX_FMT_VAAPI_IDCT: Debug(3, "codec: this VA-API pixel format is not supported\n"); default: continue; } if (e != -1) { Debug(3, "\tentry point %d\n", e); break; } } if (e == -1) { Warning(_("codec: unsupported: slow path\n")); goto slow_path; } // // prepare decoder // memset(&attrib, 0, sizeof(attrib)); attrib.type = VAConfigAttribRTFormat; if (vaGetConfigAttributes(decoder->VaDisplay, p, e, &attrib, 1)) { Error(_("codec: can't get attributes")); goto slow_path; } if (attrib.value & VA_RT_FORMAT_YUV420) { Info(_("codec: YUV 420 supported\n")); } if (attrib.value & VA_RT_FORMAT_YUV422) { Info(_("codec: YUV 422 supported\n")); } if (attrib.value & VA_RT_FORMAT_YUV444) { Info(_("codec: YUV 444 supported\n")); } if (!(attrib.value & VA_RT_FORMAT_YUV420)) { Warning(_("codec: YUV 420 not supported\n")); goto slow_path; } // create a configuration for the decode pipeline if (vaCreateConfig(decoder->VaDisplay, p, e, &attrib, 1, &decoder->VaapiContext->config_id)) { Error(_("codec: can't create config")); goto slow_path; } // FIXME: interlaced not valid here? decoder->Resolution = VideoResolutionGroup(video_ctx->width, video_ctx->height, decoder->Interlaced); // FIXME: need only to create and destroy surfaces for size changes // or when number of needed surfaces changed! VaapiCreateSurfaces(decoder, video_ctx->width, video_ctx->height); // bind surfaces to context if (vaCreateContext(decoder->VaDisplay, decoder->VaapiContext->config_id, video_ctx->width, video_ctx->height, VA_PROGRESSIVE, decoder->SurfacesFree, decoder->SurfaceFreeN, &decoder->VaapiContext->context_id)) { Error(_("codec: can't create context")); goto slow_path; } decoder->PixFmt = *fmt_idx; decoder->InputWidth = video_ctx->width; decoder->InputHeight = video_ctx->height; decoder->InputAspect = video_ctx->sample_aspect_ratio; VaapiUpdateOutput(decoder); // // update OSD associate // VaapiAssociate(decoder); #ifdef USE_GLX if (GlxEnabled) { GlxSetupDecoder(decoder); // FIXME: try two textures, but vdpau-backend supports only 1 surface if (vaCreateSurfaceGLX(decoder->VaDisplay, GL_TEXTURE_2D, decoder->GlTexture[0], &decoder->GlxSurface[0]) != VA_STATUS_SUCCESS) { Fatal(_("video/glx: can't create glx surfaces\n")); } // FIXME: this isn't usable with vdpau-backend /* if (vaCreateSurfaceGLX(decoder->VaDisplay, GL_TEXTURE_2D, decoder->GlTexture[1], &decoder->GlxSurface[1]) != VA_STATUS_SUCCESS) { Fatal(_("video/glx: can't create glx surfaces\n")); } */ } #endif Debug(3, "\t%#010x %s\n", fmt_idx[0], av_get_pix_fmt_name(fmt_idx[0])); return *fmt_idx; slow_path: // no accelerated format found decoder->SurfacesNeeded = VIDEO_SURFACES_MAX + 2; decoder->InputWidth = 0; decoder->InputHeight = 0; video_ctx->hwaccel_context = NULL; return avcodec_default_get_format(video_ctx, fmt); } /// /// Draw surface of the VA-API decoder with x11. /// /// vaPutSurface with intel backend does sync on v-sync. /// /// @param decoder VA-API decoder /// @param surface VA-API surface id /// @param interlaced flag interlaced source /// @param top_field_first flag top_field_first for interlaced source /// @param field interlaced draw: 0 first field, 1 second field /// static void VaapiPutSurfaceX11(VaapiDecoder * decoder, VASurfaceID surface, int interlaced, int top_field_first, int field) { unsigned type; VAStatus status; uint32_t s; uint32_t e; // deinterlace if (interlaced && VideoDeinterlace[decoder->Resolution] < VideoDeinterlaceSoftBob && VideoDeinterlace[decoder->Resolution] != VideoDeinterlaceWeave) { if (top_field_first) { if (field) { type = VA_BOTTOM_FIELD; } else { type = VA_TOP_FIELD; } } else { if (field) { type = VA_TOP_FIELD; } else { type = VA_BOTTOM_FIELD; } } } else { type = VA_FRAME_PICTURE; } s = GetMsTicks(); xcb_flush(Connection); if ((status = vaPutSurface(decoder->VaDisplay, surface, decoder->Window, // decoder src decoder->CropX, decoder->CropY, decoder->CropWidth, decoder->CropHeight, // video dst decoder->OutputX, decoder->OutputY, decoder->OutputWidth, decoder->OutputHeight, NULL, 0, type | decoder->SurfaceFlagsTable[decoder->Resolution])) != VA_STATUS_SUCCESS) { // switching video kills VdpPresentationQueueBlockUntilSurfaceIdle Error(_("video/vaapi: vaPutSurface failed %d\n"), status); } if (0 && vaSyncSurface(decoder->VaDisplay, surface) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaSyncSurface failed\n")); } e = GetMsTicks(); if (e - s > 2000) { Error(_("video/vaapi: gpu hung %dms %d\n"), e - s, decoder->FrameCounter); fprintf(stderr, _("video/vaapi: gpu hung %dms %d\n"), e - s, decoder->FrameCounter); } if (0) { // check if surface is really ready // VDPAU backend, says always ready VASurfaceStatus status; if (vaQuerySurfaceStatus(decoder->VaDisplay, surface, &status) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaQuerySurface failed\n")); status = VASurfaceReady; } if (status != VASurfaceReady) { Warning(_ ("video/vaapi: surface %#010x not ready: still displayed %d\n"), surface, status); return; } } if (0) { int i; // look how the status changes the next 40ms for (i = 0; i < 40; ++i) { VASurfaceStatus status; if (vaQuerySurfaceStatus(VaDisplay, surface, &status) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaQuerySurface failed\n")); } Debug(3, "video/vaapi: %2d %d\n", i, status); usleep(1 * 1000); } } usleep(1 * 1000); } #ifdef USE_GLX /// /// Render texture. /// /// @param texture 2d texture /// static inline void VideoRenderTexture(GLuint texture, int x, int y, int width, int height) { glEnable(GL_TEXTURE_2D); glBindTexture(GL_TEXTURE_2D, texture); glColor4f(1.0f, 1.0f, 1.0f, 1.0f); // no color glBegin(GL_QUADS); { glTexCoord2f(1.0f, 1.0f); glVertex2i(x + width, y + height); glTexCoord2f(0.0f, 1.0f); glVertex2i(x, y + height); glTexCoord2f(0.0f, 0.0f); glVertex2i(x, y); glTexCoord2f(1.0f, 0.0f); glVertex2i(x + width, y); #if 0 glTexCoord2f(0.0f, 0.0f); glVertex2i(x, y); glTexCoord2f(0.0f, 1.0f); glVertex2i(x, y + height); glTexCoord2f(1.0f, 1.0f); glVertex2i(x + width, y + height); glTexCoord2f(1.0f, 0.0f); glVertex2i(x + width, y); #endif } glEnd(); glBindTexture(GL_TEXTURE_2D, 0); glDisable(GL_TEXTURE_2D); } /// /// Draw surface of the VA-API decoder with glx. /// /// @param decoder VA-API decoder /// @param surface VA-API surface id /// @param interlaced flag interlaced source /// @param top_field_first flag top_field_first for interlaced source /// @param field interlaced draw: 0 first field, 1 second field /// static void VaapiPutSurfaceGLX(VaapiDecoder * decoder, VASurfaceID surface, int interlaced, int top_field_first, int field) { unsigned type; uint32_t start; uint32_t copy; uint32_t end; // deinterlace if (interlaced && VideoDeinterlace[decoder->Resolution] != VideoDeinterlaceWeave) { if (top_field_first) { if (field) { type = VA_BOTTOM_FIELD; } else { type = VA_TOP_FIELD; } } else { if (field) { type = VA_TOP_FIELD; } else { type = VA_BOTTOM_FIELD; } } } else { type = VA_FRAME_PICTURE; } start = GetMsTicks(); if (vaCopySurfaceGLX(decoder->VaDisplay, decoder->GlxSurface[0], surface, type | decoder->SurfaceFlagsTable[decoder->Resolution]) != VA_STATUS_SUCCESS) { Error(_("video/glx: vaCopySurfaceGLX failed\n")); return; } copy = GetMsTicks(); // hardware surfaces are always busy VideoRenderTexture(decoder->GlTexture[0], decoder->OutputX, decoder->OutputY, decoder->OutputWidth, decoder->OutputHeight); end = GetMsTicks(); //Debug(3, "video/vaapi/glx: %d copy %d render\n", copy - start, end - copy); } #endif /// /// Find VA-API image format. /// /// @param decoder VA-API decoder /// @param pix_fmt ffmpeg pixel format /// @param[out] format image format /// /// FIXME: can fallback from I420 to YV12, if not supported /// FIXME: must check if put/get with this format is supported (see intel) /// static int VaapiFindImageFormat(VaapiDecoder * decoder, enum PixelFormat pix_fmt, VAImageFormat * format) { VAImageFormat *imgfrmts; int imgfrmt_n; int i; unsigned fourcc; switch (pix_fmt) { // convert ffmpeg to VA-API // NV12, YV12, I420, BGRA // intel: I420 is native format for MPEG-2 decoded surfaces // intel: NV12 is native format for H.264 decoded surfaces case PIX_FMT_YUV420P: // fourcc = VA_FOURCC_YV12; // YVU fourcc = VA_FOURCC('I', '4', '2', '0'); // YUV break; case PIX_FMT_NV12: fourcc = VA_FOURCC_NV12; break; default: Fatal(_("video/vaapi: unsupported pixel format %d\n"), pix_fmt); } imgfrmt_n = vaMaxNumImageFormats(decoder->VaDisplay); imgfrmts = alloca(imgfrmt_n * sizeof(*imgfrmts)); if (vaQueryImageFormats(decoder->VaDisplay, imgfrmts, &imgfrmt_n) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaQueryImageFormats failed\n")); return 0; } Debug(3, "video/vaapi: search format %c%c%c%c in %d image formats\n", fourcc, fourcc >> 8, fourcc >> 16, fourcc >> 24, imgfrmt_n); Debug(3, "video/vaapi: supported image formats:\n"); for (i = 0; i < imgfrmt_n; ++i) { Debug(3, "video/vaapi:\t%c%c%c%c\t%d\n", imgfrmts[i].fourcc, imgfrmts[i].fourcc >> 8, imgfrmts[i].fourcc >> 16, imgfrmts[i].fourcc >> 24, imgfrmts[i].depth); } // // search image format // for (i = 0; i < imgfrmt_n; ++i) { if (imgfrmts[i].fourcc == fourcc) { *format = imgfrmts[i]; Debug(3, "video/vaapi: use\t%c%c%c%c\t%d\n", imgfrmts[i].fourcc, imgfrmts[i].fourcc >> 8, imgfrmts[i].fourcc >> 16, imgfrmts[i].fourcc >> 24, imgfrmts[i].depth); return 1; } } Fatal("video/vaapi: pixel format %d unsupported by VA-API\n", pix_fmt); // FIXME: no fatal error! return 0; } /// /// Configure VA-API for new video format. /// /// @param decoder VA-API decoder /// /// @note called only for software decoder. /// @note FIXME: combine with hardware decoder setup. /// static void VaapiSetup(VaapiDecoder * decoder, const AVCodecContext * video_ctx) { int width; int height; VAImageFormat format[1]; // create initial black surface and display VaapiBlackSurface(decoder); // cleanup last context VaapiCleanup(decoder); width = video_ctx->width; height = video_ctx->height; // FIXME: remove this if if (decoder->Image->image_id != VA_INVALID_ID) { abort(); // should be done by VaapiCleanup() } VaapiFindImageFormat(decoder, video_ctx->pix_fmt, format); // FIXME: this image is only needed for software decoder and auto-crop if (decoder->GetPutImage && vaCreateImage(VaDisplay, format, width, height, decoder->Image) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't create image!\n")); } Debug(3, "video/vaapi: created image %dx%d with id 0x%08x and buffer id 0x%08x\n", width, height, decoder->Image->image_id, decoder->Image->buf); // FIXME: interlaced not valid here? decoder->Resolution = VideoResolutionGroup(width, height, decoder->Interlaced); VaapiCreateSurfaces(decoder, width, height); #ifdef USE_GLX if (GlxEnabled) { // FIXME: destroy old context GlxSetupDecoder(decoder); // FIXME: try two textures if (vaCreateSurfaceGLX(decoder->VaDisplay, GL_TEXTURE_2D, decoder->GlTexture[0], &decoder->GlxSurface[0]) != VA_STATUS_SUCCESS) { Fatal(_("video/glx: can't create glx surfaces\n")); } /* if (vaCreateSurfaceGLX(decoder->VaDisplay, GL_TEXTURE_2D, decoder->GlTexture[1], &decoder->GlxSurface[1]) != VA_STATUS_SUCCESS) { Fatal(_("video/glx: can't create glx surfaces\n")); } */ } #endif VaapiUpdateOutput(decoder); // // update OSD associate // VaapiAssociate(decoder); } #ifdef USE_AUTOCROP /// /// VA-API auto-crop support. /// /// @param decoder VA-API hw decoder /// static void VaapiAutoCrop(VaapiDecoder * decoder) { VASurfaceID surface; uint32_t width; uint32_t height; void *va_image_data; void *data[3]; uint32_t pitches[3]; int crop14; int crop16; int next_state; int i; width = decoder->InputWidth; height = decoder->InputHeight; again: if (decoder->GetPutImage && decoder->Image->image_id == VA_INVALID_ID) { VAImageFormat format[1]; Debug(3, "video/vaapi: download image not available\n"); // FIXME: PixFmt not set! //VaapiFindImageFormat(decoder, decoder->PixFmt, format); VaapiFindImageFormat(decoder, PIX_FMT_NV12, format); //VaapiFindImageFormat(decoder, PIX_FMT_YUV420P, format); if (vaCreateImage(VaDisplay, format, width, height, decoder->Image) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't create image!\n")); return; } } // no problem to go back, we just wrote it // FIXME: we can pass the surface through. surface = decoder->SurfacesRb[(decoder->SurfaceWrite + VIDEO_SURFACES_MAX - 1) % VIDEO_SURFACES_MAX]; // Copy data from frame to image if (!decoder->GetPutImage && vaDeriveImage(decoder->VaDisplay, surface, decoder->Image) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaDeriveImage failed\n")); decoder->GetPutImage = 1; goto again; } if (decoder->GetPutImage && (i = vaGetImage(decoder->VaDisplay, surface, 0, 0, decoder->InputWidth, decoder->InputHeight, decoder->Image->image_id)) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't get auto-crop image %d\n"), i); printf(_("video/vaapi: can't get auto-crop image %d\n"), i); return; } if (vaMapBuffer(VaDisplay, decoder->Image->buf, &va_image_data) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't map auto-crop image!\n")); return; } // convert vaapi to our frame format for (i = 0; (unsigned)i < decoder->Image->num_planes; ++i) { data[i] = va_image_data + decoder->Image->offsets[i]; pitches[i] = decoder->Image->pitches[i]; } AutoCropDetect(decoder->AutoCrop, width, height, data, pitches); if (vaUnmapBuffer(VaDisplay, decoder->Image->buf) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't unmap auto-crop image!\n")); } if (!decoder->GetPutImage) { if (vaDestroyImage(VaDisplay, decoder->Image->image_id) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy image!\n")); } decoder->Image->image_id = VA_INVALID_ID; } // FIXME: this a copy of vdpau, combine the two same things // ignore black frames if (decoder->AutoCrop->Y1 >= decoder->AutoCrop->Y2) { return; } crop14 = (decoder->InputWidth * decoder->InputAspect.num * 9) / (decoder->InputAspect.den * 14); crop14 = (decoder->InputHeight - crop14) / 2; crop16 = (decoder->InputWidth * decoder->InputAspect.num * 9) / (decoder->InputAspect.den * 16); crop16 = (decoder->InputHeight - crop16) / 2; if (decoder->AutoCrop->Y1 >= crop16 - AutoCropTolerance && decoder->InputHeight - decoder->AutoCrop->Y2 >= crop16 - AutoCropTolerance) { next_state = 16; } else if (decoder->AutoCrop->Y1 >= crop14 - AutoCropTolerance && decoder->InputHeight - decoder->AutoCrop->Y2 >= crop14 - AutoCropTolerance) { next_state = 14; } else { next_state = 0; } if (decoder->AutoCrop->State == next_state) { return; } Debug(3, "video: crop aspect %d:%d %d/%d %d+%d\n", decoder->InputAspect.num, decoder->InputAspect.den, crop14, crop16, decoder->AutoCrop->Y1, decoder->InputHeight - decoder->AutoCrop->Y2); Debug(3, "video: crop aspect %d -> %d\n", decoder->AutoCrop->State, next_state); switch (decoder->AutoCrop->State) { case 16: case 14: if (decoder->AutoCrop->Count++ < AutoCropDelay / 2) { return; } break; case 0: if (decoder->AutoCrop->Count++ < AutoCropDelay) { return; } break; } decoder->AutoCrop->State = next_state; if (next_state) { decoder->CropX = VideoCutLeftRight[decoder->Resolution]; decoder->CropY = (next_state == 16 ? crop16 : crop14) + VideoCutTopBottom[decoder->Resolution]; decoder->CropWidth = decoder->InputWidth - decoder->CropX * 2; decoder->CropHeight = decoder->InputHeight - decoder->CropY * 2; // FIXME: this overwrites user choosen output position // FIXME: resize kills the auto crop values // FIXME: support other 4:3 zoom modes decoder->OutputX = 0; decoder->OutputY = 0; decoder->OutputWidth = (VideoWindowHeight * next_state) / 9; decoder->OutputHeight = (VideoWindowWidth * 9) / next_state; if ((unsigned)decoder->OutputWidth > VideoWindowWidth) { decoder->OutputWidth = VideoWindowWidth; decoder->OutputY = (VideoWindowHeight - decoder->OutputHeight) / 2; } else if ((unsigned)decoder->OutputHeight > VideoWindowHeight) { decoder->OutputHeight = VideoWindowHeight; decoder->OutputX = (VideoWindowWidth - decoder->OutputWidth) / 2; } Debug(3, "video: aspect output %dx%d %dx%d+%d+%d\n", decoder->InputWidth, decoder->InputHeight, decoder->OutputWidth, decoder->OutputHeight, decoder->OutputX, decoder->OutputY); } else { // sets AutoCrop->Count VaapiUpdateOutput(decoder); } decoder->AutoCrop->Count = 0; // // update OSD associate // VaapiDeassociate(decoder); VaapiAssociate(decoder); } /// /// VA-API check if auto-crop todo. /// /// @param decoder VA-API hw decoder /// /// @note a copy of VdpauCheckAutoCrop /// @note auto-crop only supported with normal 4:3 display mode /// static void VaapiCheckAutoCrop(VaapiDecoder * decoder) { // reduce load, check only n frames if (Video4to3ZoomMode == VideoNormal && AutoCropInterval && !(decoder->FrameCounter % AutoCropInterval)) { AVRational display_aspect_ratio; av_reduce(&display_aspect_ratio.num, &display_aspect_ratio.den, decoder->InputWidth * decoder->InputAspect.num, decoder->InputHeight * decoder->InputAspect.den, 1024 * 1024); // only 4:3 with 16:9/14:9 inside supported if (display_aspect_ratio.num == 4 && display_aspect_ratio.den == 3) { VaapiAutoCrop(decoder); } else { decoder->AutoCrop->Count = 0; decoder->AutoCrop->State = 0; } } } /// /// VA-API reset auto-crop. /// static void VaapiResetAutoCrop(void) { int i; for (i = 0; i < VaapiDecoderN; ++i) { VaapiDecoders[i]->AutoCrop->State = 0; VaapiDecoders[i]->AutoCrop->Count = 0; } } #endif /// /// Queue output surface. /// /// @param decoder VA-API decoder /// @param surface output surface /// @param softdec software decoder /// /// @note we can't mix software and hardware decoder surfaces /// static void VaapiQueueSurface(VaapiDecoder * decoder, VASurfaceID surface, int softdec) { VASurfaceID old; ++decoder->FrameCounter; if (1) { // can't wait for output queue empty if (atomic_read(&decoder->SurfacesFilled) >= VIDEO_SURFACES_MAX) { ++decoder->FramesDropped; Warning(_("video: output buffer full, dropping frame (%d/%d)\n"), decoder->FramesDropped, decoder->FrameCounter); if (!(decoder->FramesDisplayed % 300)) { VaapiPrintFrames(decoder); } if (softdec) { // software surfaces only VaapiReleaseSurface(decoder, surface); } return; } #if 0 } else { // wait for output queue empty while (atomic_read(&decoder->SurfacesFilled) >= VIDEO_SURFACES_MAX) { VideoDisplayHandler(); } #endif } // // Check and release, old surface // if ((old = decoder->SurfacesRb[decoder->SurfaceWrite]) != VA_INVALID_ID) { #if 0 if (vaSyncSurface(decoder->VaDisplay, old) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaSyncSurface failed\n")); } VASurfaceStatus status; if (vaQuerySurfaceStatus(decoder->VaDisplay, old, &status) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaQuerySurface failed\n")); status = VASurfaceReady; } if (status != VASurfaceReady) { Warning(_ ("video/vaapi: surface %#010x not ready: still displayed %d\n"), old, status); if (0 && vaSyncSurface(decoder->VaDisplay, old) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaSyncSurface failed\n")); } } #endif // now we can release the surface if (softdec) { // software surfaces only VaapiReleaseSurface(decoder, old); } } #if 0 // FIXME: intel seems to forget this, nvidia GT 210 has speed problems here if (VaapiBuggyIntel && VaOsdSubpicture != VA_INVALID_ID) { // FIXME: associate only if osd is displayed // // associate the OSD with surface // if (VaapiUnscaledOsd) { if (vaAssociateSubpicture(VaDisplay, VaOsdSubpicture, &surface, 1, 0, 0, VaOsdImage.width, VaOsdImage.height, 0, 0, VideoWindowWidth, VideoWindowHeight, VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't associate subpicture\n")); } } else { // FIXME: auto-crop wrong position if (vaAssociateSubpicture(VaDisplay, VaOsdSubpicture, &surface, 1, 0, 0, VaOsdImage.width, VaOsdImage.height, 0, 0, decoder->InputWidth, decoder->InputHeight, 0) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't associate subpicture\n")); } } } #endif decoder->SurfacesRb[decoder->SurfaceWrite] = surface; decoder->SurfaceWrite = (decoder->SurfaceWrite + 1) % VIDEO_SURFACES_MAX; atomic_inc(&decoder->SurfacesFilled); Debug(4, "video/vaapi: yy video surface %#010x ready\n", surface); } /// /// Create and display a black empty surface. /// /// @param decoder VA-API decoder /// static void VaapiBlackSurface(VaapiDecoder * decoder) { VAStatus status; uint32_t start; uint32_t sync; uint32_t put1; // wait until we have osd subpicture if (VaOsdSubpicture == VA_INVALID_ID) { Warning(_("video/vaapi: no osd subpicture yet\n")); return; } if (decoder->BlackSurface == VA_INVALID_ID) { if (vaCreateSurfaces(decoder->VaDisplay, VA_RT_FORMAT_YUV420, VideoWindowWidth, VideoWindowHeight, &decoder->BlackSurface, 1, NULL, 0) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't create a surface\n")); return; } // full sized surface, no difference unscaled/scaled osd if (vaAssociateSubpicture(decoder->VaDisplay, VaOsdSubpicture, &decoder->BlackSurface, 1, 0, 0, VaOsdImage.width, VaOsdImage.height, 0, 0, VideoWindowWidth, VideoWindowHeight, 0) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't associate subpicture\n")); } Debug(3, "video/vaapi: associate %08x\n", decoder->BlackSurface); // FIXME: check if intel forgets this also if (0 && decoder->Image->image_id == VA_INVALID_ID) { VAImageFormat format[1]; void *va_image_data; int i; printf("No image\n"); VaapiFindImageFormat(decoder, PIX_FMT_NV12, format); if ((status = vaDeriveImage(decoder->VaDisplay, decoder->BlackSurface, decoder->Image)) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaDeriveImage failed %d\n"), status); if (vaCreateImage(VaDisplay, format, VideoWindowWidth, VideoWindowHeight, decoder->Image) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't create image!\n")); } } if (vaMapBuffer(VaDisplay, decoder->Image->buf, &va_image_data) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't map the image!\n")); } for (i = 0; (unsigned)i < decoder->Image->data_size; i += 2) { ((uint8_t *) va_image_data)[i + 0] = 0xFF; ((uint8_t *) va_image_data)[i + 1] = 0xFF; } if (vaUnmapBuffer(VaDisplay, decoder->Image->buf) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't unmap the image!\n")); } if (vaDestroyImage(VaDisplay, decoder->Image->image_id) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy image!\n")); } } // FIXME: intel didn't support put image. if (0 && vaPutImage(VaDisplay, decoder->BlackSurface, decoder->Image->image_id, 0, 0, VideoWindowWidth, VideoWindowHeight, 0, 0, VideoWindowWidth, VideoWindowHeight) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't put image!\n")); } start = GetMsTicks(); if (vaSyncSurface(decoder->VaDisplay, decoder->BlackSurface) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaSyncSurface failed\n")); } } else { start = GetMsTicks(); } Debug(4, "video/vaapi: yy black video surface %#010x displayed\n", decoder->BlackSurface); sync = GetMsTicks(); xcb_flush(Connection); if ((status = vaPutSurface(decoder->VaDisplay, decoder->BlackSurface, decoder->Window, // decoder src decoder->OutputX, decoder->OutputY, decoder->OutputWidth, decoder->OutputHeight, // video dst decoder->OutputX, decoder->OutputY, decoder->OutputWidth, decoder->OutputHeight, NULL, 0, VA_FRAME_PICTURE)) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaPutSurface failed %d\n"), status); } clock_gettime(CLOCK_REALTIME, &decoder->FrameTime); put1 = GetMsTicks(); if (put1 - sync > 2000) { Error(_("video/vaapi: gpu hung %dms %d\n"), put1 - sync, decoder->FrameCounter); fprintf(stderr, _("video/vaapi: gpu hung %dms %d\n"), put1 - sync, decoder->FrameCounter); } Debug(4, "video/vaapi: sync %2u put1 %2u\n", sync - start, put1 - sync); if (0 && vaSyncSurface(decoder->VaDisplay, decoder->BlackSurface) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaSyncSurface failed\n")); } usleep(1 * 1000); } #define noUSE_VECTOR ///< use gcc vector extension #ifdef USE_VECTOR typedef char v16qi __attribute__ ((vector_size(16))); typedef char v8qi __attribute__ ((vector_size(8))); typedef int16_t v4hi __attribute__ ((vector_size(4))); typedef int16_t v8hi __attribute__ ((vector_size(8))); /// /// ELA Edge-based Line Averaging /// Low-Complexity Interpolation Method /// /// abcdefg abcdefg abcdefg abcdefg abcdefg /// x x x x x /// hijklmn hijklmn hijklmn hijklmn hijklmn /// static void FilterLineSpatial(uint8_t * dst, const uint8_t * cur, int width, int above, int below, int next) { int x; // 8/16 128bit xmm register for (x = 0; x < width; x += 8) { v8qi c; v8qi d; v8qi e; v8qi j; v8qi k; v8qi l; v8qi t1; v8qi t2; v8qi pred; v8qi score_l; v8qi score_h; v8qi t_l; v8qi t_h; v8qi zero; // ignore bound violation d = *(v8qi *) & cur[above + x]; k = *(v8qi *) & cur[below + x]; pred = __builtin_ia32_pavgb(d, k); // score = ABS(c - j) + ABS(d - k) + ABS(e - l); c = *(v8qi *) & cur[above + x - 1 * next]; e = *(v8qi *) & cur[above + x + 1 * next]; j = *(v8qi *) & cur[below + x - 1 * next]; l = *(v8qi *) & cur[below + x + 1 * next]; t1 = __builtin_ia32_psubusb(c, j); t2 = __builtin_ia32_psubusb(j, c); t1 = __builtin_ia32_pmaxub(t1, t2); zero ^= zero; score_l = __builtin_ia32_punpcklbw(t1, zero); score_h = __builtin_ia32_punpckhbw(t1, zero); t1 = __builtin_ia32_psubusb(d, k); t2 = __builtin_ia32_psubusb(k, d); t1 = __builtin_ia32_pmaxub(t1, t2); t_l = __builtin_ia32_punpcklbw(t1, zero); t_h = __builtin_ia32_punpckhbw(t1, zero); score_l = __builtin_ia32_paddw(score_l, t_l); score_h = __builtin_ia32_paddw(score_h, t_h); t1 = __builtin_ia32_psubusb(e, l); t2 = __builtin_ia32_psubusb(l, e); t1 = __builtin_ia32_pmaxub(t1, t2); t_l = __builtin_ia32_punpcklbw(t1, zero); t_h = __builtin_ia32_punpckhbw(t1, zero); score_l = __builtin_ia32_paddw(score_l, t_l); score_h = __builtin_ia32_paddw(score_h, t_h); *(v8qi *) & dst[x] = pred; } } #else /// Return the absolute value of an integer. #define ABS(i) ((i) >= 0 ? (i) : (-(i))) /// /// ELA Edge-based Line Averaging /// Low-Complexity Interpolation Method /// /// abcdefg abcdefg abcdefg abcdefg abcdefg /// x x x x x /// hijklmn hijklmn hijklmn hijklmn hijklmn /// static void FilterLineSpatial(uint8_t * dst, const uint8_t * cur, int width, int above, int below, int next) { int a, b, c, d, e, f, g, h, i, j, k, l, m, n; int spatial_pred; int spatial_score; int score; int x; for (x = 0; x < width; ++x) { a = cur[above + x - 3 * next]; // ignore bound violation b = cur[above + x - 2 * next]; c = cur[above + x - 1 * next]; d = cur[above + x + 0 * next]; e = cur[above + x + 1 * next]; f = cur[above + x + 2 * next]; g = cur[above + x + 3 * next]; h = cur[below + x - 3 * next]; i = cur[below + x - 2 * next]; j = cur[below + x - 1 * next]; k = cur[below + x + 0 * next]; l = cur[below + x + 1 * next]; m = cur[below + x + 2 * next]; n = cur[below + x + 3 * next]; spatial_pred = (d + k) / 2; // 0 pixel spatial_score = ABS(c - j) + ABS(d - k) + ABS(e - l); score = ABS(b - k) + ABS(c - l) + ABS(d - m); if (score < spatial_score) { spatial_pred = (c + l) / 2; // 1 pixel spatial_score = score; score = ABS(a - l) + ABS(b - m) + ABS(c - n); if (score < spatial_score) { spatial_pred = (b + m) / 2; // 2 pixel spatial_score = score; } } score = ABS(d - i) + ABS(e - j) + ABS(f - k); if (score < spatial_score) { spatial_pred = (e + j) / 2; // -1 pixel spatial_score = score; score = ABS(e - h) + ABS(f - i) + ABS(g - j); if (score < spatial_score) { spatial_pred = (f + i) / 2; // -2 pixel spatial_score = score; } } dst[x + 0] = spatial_pred; } } #endif /// /// Vaapi spatial deinterlace. /// /// @note FIXME: use common software deinterlace functions. /// static void VaapiSpatial(VaapiDecoder * decoder, VAImage * src, VAImage * dst1, VAImage * dst2) { uint32_t tick1; uint32_t tick2; uint32_t tick3; uint32_t tick4; uint32_t tick5; uint32_t tick6; uint32_t tick7; uint32_t tick8; void *src_base; void *dst1_base; void *dst2_base; unsigned y; unsigned p; uint8_t *tmp; int pitch; int width; tick1 = GetMsTicks(); if (vaMapBuffer(decoder->VaDisplay, src->buf, &src_base) != VA_STATUS_SUCCESS) { Fatal("video/vaapi: can't map the image!\n"); } tick2 = GetMsTicks(); if (vaMapBuffer(decoder->VaDisplay, dst1->buf, &dst1_base) != VA_STATUS_SUCCESS) { Fatal("video/vaapi: can't map the image!\n"); } tick3 = GetMsTicks(); if (vaMapBuffer(decoder->VaDisplay, dst2->buf, &dst2_base) != VA_STATUS_SUCCESS) { Fatal("video/vaapi: can't map the image!\n"); } tick4 = GetMsTicks(); if (0) { // test all updated memset(dst1_base, 0x00, dst1->data_size); memset(dst2_base, 0xFF, dst2->data_size); } // use tmp copy FIXME: only for intel needed tmp = malloc(src->data_size); memcpy(tmp, src_base, src->data_size); if (src->num_planes == 2) { // NV12 pitch = src->pitches[0]; width = src->width; for (y = 0; y < (unsigned)src->height; y++) { // Y const uint8_t *cur; cur = tmp + src->offsets[0] + y * pitch; if (y & 1) { // copy to 2nd memcpy(dst2_base + src->offsets[0] + y * pitch, cur, width); // create 1st FilterLineSpatial(dst1_base + src->offsets[0] + y * pitch, cur, width, y ? -pitch : pitch, y + 1 < (unsigned)src->height ? pitch : -pitch, 1); } else { // copy to 1st memcpy(dst1_base + src->offsets[0] + y * pitch, cur, width); // create 2nd FilterLineSpatial(dst2_base + src->offsets[0] + y * pitch, cur, width, y ? -pitch : pitch, y + 1 < (unsigned)src->height ? pitch : -pitch, 1); } } if (VideoSkipChromaDeinterlace[decoder->Resolution]) { for (y = 0; y < (unsigned)src->height / 2; y++) { // UV const uint8_t *cur; cur = tmp + src->offsets[1] + y * pitch; // copy to 1st memcpy(dst1_base + src->offsets[1] + y * pitch, cur, width); // copy to 2nd memcpy(dst2_base + src->offsets[1] + y * pitch, cur, width); } } else { for (y = 0; y < (unsigned)src->height / 2; y++) { // UV const uint8_t *cur; cur = tmp + src->offsets[1] + y * pitch; if (y & 1) { // copy to 2nd memcpy(dst2_base + src->offsets[1] + y * pitch, cur, width); // create 1st FilterLineSpatial(dst1_base + src->offsets[1] + y * pitch, cur, width, y ? -pitch : pitch, y + 1 < (unsigned)src->height / 2 ? pitch : -pitch, 2); } else { // copy to 1st memcpy(dst1_base + src->offsets[1] + y * pitch, cur, width); // create 2nd FilterLineSpatial(dst2_base + src->offsets[1] + y * pitch, cur, width, y ? -pitch : pitch, y + 1 < (unsigned)src->height / 2 ? pitch : -pitch, 2); } } } } else { // YV12 or I420 for (p = 0; p < src->num_planes; ++p) { pitch = src->pitches[p]; width = src->width >> (p != 0); if (VideoSkipChromaDeinterlace[decoder->Resolution] && p) { for (y = 0; y < (unsigned)(src->height >> 1); y++) { const uint8_t *cur; cur = tmp + src->offsets[p] + y * pitch; // copy to 1st memcpy(dst1_base + src->offsets[p] + y * pitch, cur, width); // copy to 2nd memcpy(dst2_base + src->offsets[p] + y * pitch, cur, width); } } else { for (y = 0; y < (unsigned)(src->height >> (p != 0)); y++) { const uint8_t *cur; cur = tmp + src->offsets[p] + y * pitch; if (y & 1) { // copy to 2nd memcpy(dst2_base + src->offsets[p] + y * pitch, cur, width); // create 1st FilterLineSpatial(dst1_base + src->offsets[p] + y * pitch, cur, width, y ? -pitch : pitch, y + 1 < (unsigned)(src->height >> (p != 0)) ? pitch : -pitch, 1); } else { // copy to 1st memcpy(dst1_base + src->offsets[p] + y * pitch, cur, width); // create 2nd FilterLineSpatial(dst2_base + src->offsets[p] + y * pitch, cur, width, y ? -pitch : pitch, y + 1 < (unsigned)(src->height >> (p != 0)) ? pitch : -pitch, 1); } } } } } free(tmp); tick5 = GetMsTicks(); if (vaUnmapBuffer(decoder->VaDisplay, dst2->buf) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't unmap image buffer\n")); } tick6 = GetMsTicks(); if (vaUnmapBuffer(decoder->VaDisplay, dst1->buf) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't unmap image buffer\n")); } tick7 = GetMsTicks(); if (vaUnmapBuffer(decoder->VaDisplay, src->buf) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't unmap image buffer\n")); } tick8 = GetMsTicks(); Debug(3, "video/vaapi: map=%2d/%2d/%2d deint=%2d umap=%2d/%2d/%2d\n", tick2 - tick1, tick3 - tick2, tick4 - tick3, tick5 - tick4, tick6 - tick5, tick7 - tick6, tick8 - tick7); } /// /// Vaapi bob deinterlace. /// /// @note FIXME: use common software deinterlace functions. /// static void VaapiBob(VaapiDecoder * decoder, VAImage * src, VAImage * dst1, VAImage * dst2) { uint32_t tick1; uint32_t tick2; uint32_t tick3; uint32_t tick4; uint32_t tick5; uint32_t tick6; uint32_t tick7; uint32_t tick8; void *src_base; void *dst1_base; void *dst2_base; unsigned y; unsigned p; tick1 = GetMsTicks(); if (vaMapBuffer(decoder->VaDisplay, src->buf, &src_base) != VA_STATUS_SUCCESS) { Fatal("video/vaapi: can't map the image!\n"); } tick2 = GetMsTicks(); if (vaMapBuffer(decoder->VaDisplay, dst1->buf, &dst1_base) != VA_STATUS_SUCCESS) { Fatal("video/vaapi: can't map the image!\n"); } tick3 = GetMsTicks(); if (vaMapBuffer(decoder->VaDisplay, dst2->buf, &dst2_base) != VA_STATUS_SUCCESS) { Fatal("video/vaapi: can't map the image!\n"); } tick4 = GetMsTicks(); if (0) { // test all updated memset(dst1_base, 0x00, dst1->data_size); memset(dst2_base, 0xFF, dst2->data_size); return; } #if 0 // interleave for (p = 0; p < src->num_planes; ++p) { for (y = 0; y < (unsigned)(src->height >> (p != 0)); y += 2) { memcpy(dst1_base + src->offsets[p] + (y + 0) * src->pitches[p], src_base + src->offsets[p] + (y + 0) * src->pitches[p], src->pitches[p]); memcpy(dst1_base + src->offsets[p] + (y + 1) * src->pitches[p], src_base + src->offsets[p] + (y + 0) * src->pitches[p], src->pitches[p]); memcpy(dst2_base + src->offsets[p] + (y + 0) * src->pitches[p], src_base + src->offsets[p] + (y + 1) * src->pitches[p], src->pitches[p]); memcpy(dst2_base + src->offsets[p] + (y + 1) * src->pitches[p], src_base + src->offsets[p] + (y + 1) * src->pitches[p], src->pitches[p]); } } #endif #if 1 // use tmp copy if (1) { uint8_t *tmp; tmp = malloc(src->data_size); memcpy(tmp, src_base, src->data_size); for (p = 0; p < src->num_planes; ++p) { for (y = 0; y < (unsigned)(src->height >> (p != 0)); y += 2) { memcpy(dst1_base + src->offsets[p] + (y + 0) * src->pitches[p], tmp + src->offsets[p] + (y + 0) * src->pitches[p], src->pitches[p]); memcpy(dst1_base + src->offsets[p] + (y + 1) * src->pitches[p], tmp + src->offsets[p] + (y + 0) * src->pitches[p], src->pitches[p]); memcpy(dst2_base + src->offsets[p] + (y + 0) * src->pitches[p], tmp + src->offsets[p] + (y + 1) * src->pitches[p], src->pitches[p]); memcpy(dst2_base + src->offsets[p] + (y + 1) * src->pitches[p], tmp + src->offsets[p] + (y + 1) * src->pitches[p], src->pitches[p]); } } free(tmp); } #endif #if 0 // use multiple tmp copy if (1) { uint8_t *tmp_src; uint8_t *tmp_dst1; uint8_t *tmp_dst2; tmp_src = malloc(src->data_size); memcpy(tmp_src, src_base, src->data_size); tmp_dst1 = malloc(src->data_size); tmp_dst2 = malloc(src->data_size); for (p = 0; p < src->num_planes; ++p) { for (y = 0; y < (unsigned)(src->height >> (p != 0)); y += 2) { memcpy(tmp_dst1 + src->offsets[p] + (y + 0) * src->pitches[p], tmp_src + src->offsets[p] + (y + 0) * src->pitches[p], src->pitches[p]); memcpy(tmp_dst1 + src->offsets[p] + (y + 1) * src->pitches[p], tmp_src + src->offsets[p] + (y + 0) * src->pitches[p], src->pitches[p]); memcpy(tmp_dst2 + src->offsets[p] + (y + 0) * src->pitches[p], tmp_src + src->offsets[p] + (y + 1) * src->pitches[p], src->pitches[p]); memcpy(tmp_dst2 + src->offsets[p] + (y + 1) * src->pitches[p], tmp_src + src->offsets[p] + (y + 1) * src->pitches[p], src->pitches[p]); } } memcpy(dst1_base, tmp_dst1, src->data_size); memcpy(dst2_base, tmp_dst2, src->data_size); free(tmp_src); free(tmp_dst1); free(tmp_dst2); } #endif #if 0 // dst1 first for (p = 0; p < src->num_planes; ++p) { for (y = 0; y < (unsigned)(src->height >> (p != 0)); y += 2) { memcpy(dst1_base + src->offsets[p] + (y + 0) * src->pitches[p], src_base + src->offsets[p] + (y + 0) * src->pitches[p], src->pitches[p]); memcpy(dst1_base + src->offsets[p] + (y + 1) * src->pitches[p], src_base + src->offsets[p] + (y + 0) * src->pitches[p], src->pitches[p]); } } // dst2 next for (p = 0; p < src->num_planes; ++p) { for (y = 0; y < (unsigned)(src->height >> (p != 0)); y += 2) { memcpy(dst2_base + src->offsets[p] + (y + 0) * src->pitches[p], src_base + src->offsets[p] + (y + 1) * src->pitches[p], src->pitches[p]); memcpy(dst2_base + src->offsets[p] + (y + 1) * src->pitches[p], src_base + src->offsets[p] + (y + 1) * src->pitches[p], src->pitches[p]); } } #endif tick5 = GetMsTicks(); if (vaUnmapBuffer(decoder->VaDisplay, dst2->buf) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't unmap image buffer\n")); } tick6 = GetMsTicks(); if (vaUnmapBuffer(decoder->VaDisplay, dst1->buf) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't unmap image buffer\n")); } tick7 = GetMsTicks(); if (vaUnmapBuffer(decoder->VaDisplay, src->buf) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't unmap image buffer\n")); } tick8 = GetMsTicks(); Debug(4, "video/vaapi: map=%2d/%2d/%2d deint=%2d umap=%2d/%2d/%2d\n", tick2 - tick1, tick3 - tick2, tick4 - tick3, tick5 - tick4, tick6 - tick5, tick7 - tick6, tick8 - tick7); } /// /// Create software deinterlace images. /// /// @param decoder VA-API decoder /// static void VaapiCreateDeinterlaceImages(VaapiDecoder * decoder) { VAImageFormat format[1]; int i; // NV12, YV12, I420, BGRA // NV12 Y U/V 2x2 // YV12 Y V U 2x2 // I420 Y U V 2x2 // Intel needs NV12 VaapiFindImageFormat(decoder, PIX_FMT_NV12, format); //VaapiFindImageFormat(decoder, PIX_FMT_YUV420P, format); for (i = 0; i < 5; ++i) { if (vaCreateImage(decoder->VaDisplay, format, decoder->InputWidth, decoder->InputHeight, decoder->DeintImages + i) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't create image!\n")); } } #ifdef DEBUG if (1) { VAImage *img; img = decoder->DeintImages; Debug(3, "video/vaapi: %c%c%c%c %dx%d*%d\n", img->format.fourcc, img->format.fourcc >> 8, img->format.fourcc >> 16, img->format.fourcc >> 24, img->width, img->height, img->num_planes); } #endif } /// /// Destroy software deinterlace images. /// /// @param decoder VA-API decoder /// static void VaapiDestroyDeinterlaceImages(VaapiDecoder * decoder) { int i; for (i = 0; i < 5; ++i) { if (vaDestroyImage(decoder->VaDisplay, decoder->DeintImages[i].image_id) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy image!\n")); } decoder->DeintImages[i].image_id = VA_INVALID_ID; } } /// /// Vaapi software deinterlace. /// /// @param decoder VA-API decoder /// @param surface interlaced hardware surface /// static void VaapiCpuDerive(VaapiDecoder * decoder, VASurfaceID surface) { // // vaPutImage not working, vaDeriveImage // uint32_t tick1; uint32_t tick2; uint32_t tick3; uint32_t tick4; uint32_t tick5; VAImage image[1]; VAImage dest1[1]; VAImage dest2[1]; VAStatus status; VASurfaceID out1; VASurfaceID out2; tick1 = GetMsTicks(); #if 0 // get image test if (decoder->Image->image_id == VA_INVALID_ID) { VAImageFormat format[1]; VaapiFindImageFormat(decoder, PIX_FMT_NV12, format); if (vaCreateImage(VaDisplay, format, decoder->InputWidth, decoder->InputHeight, decoder->Image) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't create image!\n")); } } if (vaGetImage(decoder->VaDisplay, surface, 0, 0, decoder->InputWidth, decoder->InputHeight, decoder->Image->image_id) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't get source image\n")); VaapiQueueSurface(decoder, surface, 0); VaapiQueueSurface(decoder, surface, 0); return; } *image = *decoder->Image; #else if ((status = vaDeriveImage(decoder->VaDisplay, surface, image)) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaDeriveImage failed %d\n"), status); VaapiQueueSurface(decoder, surface, 0); VaapiQueueSurface(decoder, surface, 0); return; } #endif tick2 = GetMsTicks(); Debug(4, "video/vaapi: %c%c%c%c %dx%d*%d\n", image->format.fourcc, image->format.fourcc >> 8, image->format.fourcc >> 16, image->format.fourcc >> 24, image->width, image->height, image->num_planes); // get a free surfaces out1 = VaapiGetSurface(decoder); if (out1 == VA_INVALID_ID) { abort(); } if ((status = vaDeriveImage(decoder->VaDisplay, out1, dest1)) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaDeriveImage failed %d\n"), status); } tick3 = GetMsTicks(); out2 = VaapiGetSurface(decoder); if (out2 == VA_INVALID_ID) { abort(); } if ((status = vaDeriveImage(decoder->VaDisplay, out2, dest2)) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaDeriveImage failed %d\n"), status); } tick4 = GetMsTicks(); switch (VideoDeinterlace[decoder->Resolution]) { case VideoDeinterlaceSoftBob: default: VaapiBob(decoder, image, dest1, dest2); break; case VideoDeinterlaceSoftSpatial: VaapiSpatial(decoder, image, dest1, dest2); break; } tick5 = GetMsTicks(); #if 1 if (vaDestroyImage(VaDisplay, image->image_id) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy image!\n")); } #endif if (vaDestroyImage(VaDisplay, dest1->image_id) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy image!\n")); } if (vaDestroyImage(VaDisplay, dest2->image_id) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy image!\n")); } VaapiQueueSurface(decoder, out1, 1); VaapiQueueSurface(decoder, out2, 1); tick5 = GetMsTicks(); Debug(4, "video/vaapi: get=%2d get1=%2d get2=%d deint=%2d\n", tick2 - tick1, tick3 - tick2, tick4 - tick3, tick5 - tick4); } /// /// Vaapi software deinterlace. /// /// @param decoder VA-API decoder /// @param surface interlaced hardware surface /// static void VaapiCpuPut(VaapiDecoder * decoder, VASurfaceID surface) { // // vaPutImage working // uint32_t tick1; uint32_t tick2; uint32_t tick3; uint32_t tick4; uint32_t tick5; VAImage *img1; VAImage *img2; VAImage *img3; VASurfaceID out; VAStatus status; // // Create deinterlace images. // if (decoder->DeintImages[0].image_id == VA_INVALID_ID) { VaapiCreateDeinterlaceImages(decoder); } if (0 && vaSyncSurface(decoder->VaDisplay, surface) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaSyncSurface failed\n")); } img1 = decoder->DeintImages; img2 = decoder->DeintImages + 1; img3 = decoder->DeintImages + 2; tick1 = GetMsTicks(); if (vaGetImage(decoder->VaDisplay, surface, 0, 0, decoder->InputWidth, decoder->InputHeight, img1->image_id) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't get source image\n")); VaapiQueueSurface(decoder, surface, 0); VaapiQueueSurface(decoder, surface, 0); return; } tick2 = GetMsTicks(); // FIXME: handle top_field_first switch (VideoDeinterlace[decoder->Resolution]) { case VideoDeinterlaceSoftBob: default: VaapiBob(decoder, img1, img2, img3); break; case VideoDeinterlaceSoftSpatial: VaapiSpatial(decoder, img1, img2, img3); break; } tick3 = GetMsTicks(); // get a free surface and upload the image out = VaapiGetSurface(decoder); if (out == VA_INVALID_ID) { abort(); } if ((status = vaPutImage(VaDisplay, out, img2->image_id, 0, 0, img2->width, img2->height, 0, 0, img2->width, img2->height)) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't put image: %d!\n"), status); abort(); } VaapiQueueSurface(decoder, out, 1); if (0 && vaSyncSurface(decoder->VaDisplay, out) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaSyncSurface failed\n")); } tick4 = GetMsTicks(); Debug(4, "video/vaapi: deint %d %#010x -> %#010x\n", decoder->SurfaceField, surface, out); // get a free surface and upload the image out = VaapiGetSurface(decoder); if (out == VA_INVALID_ID) { abort(); } if (vaPutImage(VaDisplay, out, img3->image_id, 0, 0, img3->width, img3->height, 0, 0, img3->width, img3->height) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't put image!\n")); } VaapiQueueSurface(decoder, out, 1); if (0 && vaSyncSurface(decoder->VaDisplay, out) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaSyncSurface failed\n")); } tick5 = GetMsTicks(); Debug(4, "video/vaapi: get=%2d deint=%2d put1=%2d put2=%2d\n", tick2 - tick1, tick3 - tick2, tick4 - tick3, tick5 - tick4); } /// /// Vaapi software deinterlace. /// /// @param decoder VA-API decoder /// @param surface interlaced hardware surface /// static void VaapiCpuDeinterlace(VaapiDecoder * decoder, VASurfaceID surface) { if (decoder->GetPutImage) { VaapiCpuPut(decoder, surface); } else { VaapiCpuDerive(decoder, surface); } // FIXME: must release software input surface } /// /// Render a ffmpeg frame /// /// @param decoder VA-API decoder /// @param video_ctx ffmpeg video codec context /// @param frame frame to display /// static void VaapiRenderFrame(VaapiDecoder * decoder, const AVCodecContext * video_ctx, const AVFrame * frame) { VASurfaceID surface; int interlaced; // FIXME: some tv-stations toggle interlace on/off // frame->interlaced_frame isn't always correct set interlaced = frame->interlaced_frame; if (video_ctx->height == 720) { if (interlaced && !decoder->WrongInterlacedWarned) { Debug(3, "video/vaapi: wrong interlace flag fixed\n"); decoder->WrongInterlacedWarned = 1; } interlaced = 0; } else { if (!interlaced && !decoder->WrongInterlacedWarned) { Debug(3, "video/vaapi: wrong interlace flag fixed\n"); decoder->WrongInterlacedWarned = 1; } interlaced = 1; } // FIXME: should be done by init video_ctx->field_order if (decoder->Interlaced != interlaced || decoder->TopFieldFirst != frame->top_field_first) { #if 0 // field_order only in git Debug(3, "video/vaapi: interlaced %d top-field-first %d - %d\n", interlaced, frame->top_field_first, video_ctx->field_order); #else Debug(3, "video/vaapi: interlaced %d top-field-first %d\n", interlaced, frame->top_field_first); #endif decoder->Interlaced = interlaced; decoder->TopFieldFirst = frame->top_field_first; decoder->SurfaceField = 0; } // update aspect ratio changes #if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(53,60,100) if (decoder->InputWidth && decoder->InputHeight && av_cmp_q(decoder->InputAspect, frame->sample_aspect_ratio)) { Debug(3, "video/vaapi: aspect ratio changed\n"); decoder->InputAspect = frame->sample_aspect_ratio; VaapiUpdateOutput(decoder); } #else if (decoder->InputWidth && decoder->InputHeight && av_cmp_q(decoder->InputAspect, video_ctx->sample_aspect_ratio)) { Debug(3, "video/vaapi: aspect ratio changed\n"); decoder->InputAspect = video_ctx->sample_aspect_ratio; VaapiUpdateOutput(decoder); } #endif // // Hardware render // if (video_ctx->hwaccel_context) { if (video_ctx->height != decoder->InputHeight || video_ctx->width != decoder->InputWidth) { Error(_("video/vaapi: stream <-> surface size mismatch\n")); return; } surface = (unsigned)(size_t) frame->data[3]; Debug(4, "video/vaapi: hw render hw surface %#010x\n", surface); if (interlaced && VideoDeinterlace[decoder->Resolution] >= VideoDeinterlaceSoftBob) { VaapiCpuDeinterlace(decoder, surface); } else { VaapiQueueSurface(decoder, surface, 0); } // // VAImage render // } else { void *va_image_data; int i; AVPicture picture[1]; int width; int height; Debug(4, "video/vaapi: hw render sw surface\n"); width = video_ctx->width; height = video_ctx->height; // // Check image, format, size // if ((decoder->GetPutImage && decoder->Image->image_id == VA_INVALID_ID) || decoder->PixFmt != video_ctx->pix_fmt || width != decoder->InputWidth || height != decoder->InputHeight) { Debug(3, "video/vaapi: stream <-> surface size/interlace mismatch\n"); decoder->PixFmt = video_ctx->pix_fmt; // FIXME: aspect done above! decoder->InputWidth = width; decoder->InputHeight = height; VaapiSetup(decoder, video_ctx); } // FIXME: Need to insert software deinterlace here // FIXME: can/must insert auto-crop here (is done after upload) // get a free surface and upload the image surface = VaapiGetSurface(decoder); Debug(4, "video/vaapi: video surface %#010x displayed\n", surface); if (!decoder->GetPutImage && vaDeriveImage(decoder->VaDisplay, surface, decoder->Image) != VA_STATUS_SUCCESS) { VAImageFormat format[1]; Error(_("video/vaapi: vaDeriveImage failed\n")); decoder->GetPutImage = 1; VaapiFindImageFormat(decoder, video_ctx->pix_fmt, format); if (vaCreateImage(VaDisplay, format, width, height, decoder->Image) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't create image!\n")); } } // // Copy data from frame to image // if (vaMapBuffer(VaDisplay, decoder->Image->buf, &va_image_data) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't map the image!\n")); } // crazy: intel mixes YV12 and NV12 with mpeg if (decoder->Image->format.fourcc == VA_FOURCC_NV12) { int x; // intel NV12 convert YV12 to NV12 // copy Y for (i = 0; i < height; ++i) { memcpy(va_image_data + decoder->Image->offsets[0] + decoder->Image->pitches[0] * i, frame->data[0] + frame->linesize[0] * i, frame->linesize[0]); } // copy UV for (i = 0; i < height / 2; ++i) { for (x = 0; x < frame->linesize[1]; ++x) { ((uint8_t *) va_image_data)[decoder->Image->offsets[1] + decoder->Image->pitches[1] * i + x * 2 + 0] = frame->data[1][i * frame->linesize[1] + x]; ((uint8_t *) va_image_data)[decoder->Image->offsets[1] + decoder->Image->pitches[1] * i + x * 2 + 1] = frame->data[2][i * frame->linesize[2] + x]; } } // vdpau uses this } else if (decoder->Image->format.fourcc == VA_FOURCC('I', '4', '2', '0')) { picture->data[0] = va_image_data + decoder->Image->offsets[0]; picture->linesize[0] = decoder->Image->pitches[0]; picture->data[1] = va_image_data + decoder->Image->offsets[1]; picture->linesize[1] = decoder->Image->pitches[2]; picture->data[2] = va_image_data + decoder->Image->offsets[2]; picture->linesize[2] = decoder->Image->pitches[1]; av_picture_copy(picture, (AVPicture *) frame, video_ctx->pix_fmt, width, height); } else if (decoder->Image->num_planes == 3) { picture->data[0] = va_image_data + decoder->Image->offsets[0]; picture->linesize[0] = decoder->Image->pitches[0]; picture->data[1] = va_image_data + decoder->Image->offsets[2]; picture->linesize[1] = decoder->Image->pitches[2]; picture->data[2] = va_image_data + decoder->Image->offsets[1]; picture->linesize[2] = decoder->Image->pitches[1]; av_picture_copy(picture, (AVPicture *) frame, video_ctx->pix_fmt, width, height); } if (vaUnmapBuffer(VaDisplay, decoder->Image->buf) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't unmap the image!\n")); } Debug(4, "video/vaapi: buffer %dx%d <- %dx%d\n", decoder->Image->width, decoder->Image->height, width, height); if (decoder->GetPutImage && (i = vaPutImage(VaDisplay, surface, decoder->Image->image_id, 0, 0, width, height, 0, 0, width, height)) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't put image err:%d!\n"), i); } if (!decoder->GetPutImage) { if (vaDestroyImage(VaDisplay, decoder->Image->image_id) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: can't destroy image!\n")); } decoder->Image->image_id = VA_INVALID_ID; } VaapiQueueSurface(decoder, surface, 1); } if (decoder->Interlaced) { ++decoder->FrameCounter; } } /// /// Advance displayed frame of decoder. /// /// @param decoder VA-API hw decoder /// static void VaapiAdvanceDecoderFrame(VaapiDecoder * decoder) { // next surface, if complete frame is displayed (1 -> 0) if (decoder->SurfaceField) { VASurfaceID surface; int filled; filled = atomic_read(&decoder->SurfacesFilled); // FIXME: this should check the caller // check decoder, if new surface is available if (filled <= 1) { // keep use of last surface ++decoder->FramesDuped; // FIXME: don't warn after stream start, don't warn during pause Error(_("video: display buffer empty, duping frame (%d/%d) %d\n"), decoder->FramesDuped, decoder->FrameCounter, VideoGetBuffers()); return; } // wait for rendering finished surface = decoder->SurfacesRb[decoder->SurfaceRead]; if (vaSyncSurface(decoder->VaDisplay, surface) != VA_STATUS_SUCCESS) { Error(_("video/vaapi: vaSyncSurface failed\n")); } decoder->SurfaceRead = (decoder->SurfaceRead + 1) % VIDEO_SURFACES_MAX; atomic_dec(&decoder->SurfacesFilled); // progressiv oder software deinterlacer decoder->SurfaceField = !decoder->Interlaced || VideoDeinterlace[decoder->Resolution] >= VideoDeinterlaceSoftBob; return; } decoder->SurfaceField = 1; } /// /// Display a video frame. /// /// @todo FIXME: add detection of missed frames /// static void VaapiDisplayFrame(void) { struct timespec nowtime; uint32_t start; uint32_t put1; uint32_t put2; int i; VaapiDecoder *decoder; if (VideoSurfaceModesChanged) { // handle changed modes for (i = 0; i < VaapiDecoderN; ++i) { VaapiInitSurfaceFlags(VaapiDecoders[i]); } VideoSurfaceModesChanged = 0; } // look if any stream have a new surface available for (i = 0; i < VaapiDecoderN; ++i) { VASurfaceID surface; int filled; decoder = VaapiDecoders[i]; decoder->FramesDisplayed++; filled = atomic_read(&decoder->SurfacesFilled); // no surface availble show black with possible osd if (!filled) { VaapiBlackSurface(decoder); VaapiMessage(3, "video/vaapi: black surface displayed\n"); continue; } surface = decoder->SurfacesRb[decoder->SurfaceRead]; #ifdef DEBUG if (surface == VA_INVALID_ID) { printf(_("video/vaapi: invalid surface in ringbuffer\n")); } Debug(4, "video/vaapi: yy video surface %#010x displayed\n", surface); #endif start = GetMsTicks(); // VDPAU driver + INTEL driver does no v-sync with 1080 if (0 && decoder->Interlaced // FIXME: buggy libva-driver-vdpau, buggy libva-driver-intel && (VaapiBuggyVdpau || (0 && VaapiBuggyIntel && decoder->InputHeight == 1080)) && VideoDeinterlace[decoder->Resolution] != VideoDeinterlaceWeave) { VaapiPutSurfaceX11(decoder, surface, decoder->Interlaced, decoder->TopFieldFirst, 0); put1 = GetMsTicks(); VaapiPutSurfaceX11(decoder, surface, decoder->Interlaced, decoder->TopFieldFirst, 1); put2 = GetMsTicks(); } else { VaapiPutSurfaceX11(decoder, surface, decoder->Interlaced, decoder->TopFieldFirst, decoder->SurfaceField); put1 = GetMsTicks(); put2 = put1; } clock_gettime(CLOCK_REALTIME, &nowtime); // FIXME: 31 only correct for 50Hz if ((nowtime.tv_sec - decoder->FrameTime.tv_sec) * 1000 * 1000 * 1000 + (nowtime.tv_nsec - decoder->FrameTime.tv_nsec) > 31 * 1000 * 1000) { // FIXME: ignore still-frame, trick-speed Debug(3, "video/vaapi: time/frame too long %ldms\n", ((nowtime.tv_sec - decoder->FrameTime.tv_sec) * 1000 * 1000 * 1000 + (nowtime.tv_nsec - decoder->FrameTime.tv_nsec)) / (1000 * 1000)); Debug(4, "video/vaapi: put1 %2u put2 %2u\n", put1 - start, put2 - put1); } #ifdef noDEBUG Debug(3, "video/vaapi: time/frame %ldms\n", ((nowtime.tv_sec - decoder->FrameTime.tv_sec) * 1000 * 1000 * 1000 + (nowtime.tv_nsec - decoder->FrameTime.tv_nsec)) / (1000 * 1000)); if (put2 > start + 20) { Debug(3, "video/vaapi: putsurface too long %ums\n", put2 - start); } Debug(4, "video/vaapi: put1 %2u put2 %2u\n", put1 - start, put2 - put1); #endif decoder->FrameTime = nowtime; } } /// /// Set VA-API decoder video clock. /// /// @param decoder VA-API hardware decoder /// @param pts audio presentation timestamp /// void VaapiSetClock(VaapiDecoder * decoder, int64_t pts) { decoder->PTS = pts; } /// /// Get VA-API decoder video clock. /// /// @param decoder VA-API decoder /// static int64_t VaapiGetClock(const VaapiDecoder * decoder) { // pts is the timestamp of the latest decoded frame if (decoder->PTS == (int64_t) AV_NOPTS_VALUE) { return AV_NOPTS_VALUE; } // subtract buffered decoded frames if (decoder->Interlaced) { return decoder->PTS - 20 * 90 * (2 * atomic_read(&decoder->SurfacesFilled) - decoder->SurfaceField); } return decoder->PTS - 20 * 90 * (atomic_read(&decoder->SurfacesFilled) + 2); } /// /// Set trick play speed. /// /// @param decoder VA-API decoder /// @param speed trick speed (0 = normal) /// static void VaapiSetTrickSpeed(VaapiDecoder * decoder, int speed) { decoder->TrickSpeed = speed; decoder->TrickCounter = 0; } /// /// Sync decoder output to audio. /// /// trick-speed show frame times /// still-picture show frame until new frame arrives /// 60hz-mode repeat every 5th picture /// video>audio slow down video by duplicating frames /// video