From b0fa6cab5e64886d3f47156a049c5d1b85dcabd9 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Tue, 17 Sep 2013 23:58:37 +0100 Subject: Emit vzeroupper after avx memcpy Emitting vzeroupper is necessary to avoid avx<->sse transition penalties (when using avx-256 instructions). This didn't really matter much in the past, since other code wasn't using avx, hence there was just a penalty once afterwards when sse code was executed. However, there's code in ffmpeg which mixes avx-128 and sse a lot, and each time this happens there's a huge penalty. This causes in particular ff_deblock_v_luma_8_avx to slow down by a factor of 50 or so which makes the whole decoding about twice as slow (might be dependent on the h264 stream or maybe ffmpeg version too, since ffmpeg will also emit vzeroupper when using avx-256 hence not doing it here might not always be an issue, but in the case I was seeing nothing else used avx-256). --- src/xine-utils/memcpy.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src') diff --git a/src/xine-utils/memcpy.c b/src/xine-utils/memcpy.c index 6eda220ed..a59f4e67a 100644 --- a/src/xine-utils/memcpy.c +++ b/src/xine-utils/memcpy.c @@ -251,6 +251,7 @@ static void * sse_memcpy(void * to, const void * from, size_t len) /* since movntq is weakly-ordered, a "sfence" * is needed to become ordered again. */ __asm__ __volatile__ ("sfence":::"memory"); + __asm__ __volatile__ ("vzeroupper"); } /* * Now do the tail of the block -- cgit v1.2.3 From 4c6876c9ebbaa7e793219d20a8528d939149563d Mon Sep 17 00:00:00 2001 From: Darren Salt Date: Wed, 18 Sep 2013 13:16:40 +0100 Subject: Avoid using deprecated autoconf & automake features. asfheader.c is used in two places: same compiler flags, so is safe. --- src/demuxers/Makefile.am | 2 ++ src/input/Makefile.am | 1 + 2 files changed, 3 insertions(+) (limited to 'src') diff --git a/src/demuxers/Makefile.am b/src/demuxers/Makefile.am index 32b5616f7..1f7948fdb 100644 --- a/src/demuxers/Makefile.am +++ b/src/demuxers/Makefile.am @@ -84,6 +84,8 @@ xineplug_dmx_qt_la_SOURCES = demux_qt.c xineplug_dmx_qt_la_LIBADD = $(XINE_LIB) $(LTLIBINTL) $(ZLIB_LIBS) xineplug_dmx_qt_la_CPPFLAGS = $(AM_CPPFLAGS) $(ZLIB_CPPFLAGS) +# note: asfheader.c also used from ../input +# is safe given same compiler options xineplug_dmx_asf_la_SOURCES = demux_asf.c asfheader.c xineplug_dmx_asf_la_LIBADD = $(XINE_LIB) $(LTLIBINTL) $(LTLIBICONV) xineplug_dmx_asf_la_CFLAGS = $(AM_CFLAGS) -fno-strict-aliasing diff --git a/src/input/Makefile.am b/src/input/Makefile.am index 804b36352..3752d4c9b 100644 --- a/src/input/Makefile.am +++ b/src/input/Makefile.am @@ -103,6 +103,7 @@ xineplug_inp_dvd_la_CFLAGS = $(AM_CFLAGS) $(DVD_CFLAGS) xineplug_inp_net_la_SOURCES = input_net.c net_buf_ctrl.c xineplug_inp_net_la_LIBADD = $(XINE_LIB) $(NET_LIBS) $(PTHREAD_LIBS) $(LTLIBINTL) +# note: compiling ../demuxers/asfheader.c here is safe given same compiler options xineplug_inp_mms_la_SOURCES = input_mms.c net_buf_ctrl.c mms.c mmsh.c http_helper.c ../demuxers/asfheader.c xineplug_inp_mms_la_LIBADD = $(XINE_LIB) $(LTLIBICONV) $(PTHREAD_LIBS) $(LTLIBINTL) -- cgit v1.2.3 From bf8bf1f5f2b64797d1f85f3007711109aede95d1 Mon Sep 17 00:00:00 2001 From: Darren Salt Date: Wed, 18 Sep 2013 13:49:34 +0100 Subject: More vaapi build fixes. --- src/video_out/video_out_vaapi.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/video_out/video_out_vaapi.c b/src/video_out/video_out_vaapi.c index ade498d3a..2c1b4e16a 100644 --- a/src/video_out/video_out_vaapi.c +++ b/src/video_out/video_out_vaapi.c @@ -72,6 +72,12 @@ #include "accel_vaapi.h" +#ifdef HAVE_FFMPEG_AVUTIL_H +# include +#else +# include +#endif + #ifndef VA_SURFACE_ATTRIB_SETTABLE #define vaCreateSurfaces(d, f, w, h, s, ns, a, na) \ vaCreateSurfaces(d, w, h, f, ns, s) @@ -1637,7 +1643,7 @@ static void vaapi_property_callback (void *property_gen, xine_cfg_entry_t *entry lprintf("vaapi_property_callback property=%d, value=%d\n", property->type, entry->num_value ); - VAStatus vaStatus = vaSetDisplayAttributes(va_context->va_display, &attr, 1); + /*VAStatus vaStatus = */ vaSetDisplayAttributes(va_context->va_display, &attr, 1); //vaapi_check_status((vo_driver_t *)this, vaStatus, "vaSetDisplayAttributes()"); vaapi_show_display_props((vo_driver_t*)this); @@ -1769,7 +1775,7 @@ static void vaapi_display_attribs(vo_driver_t *this_gen) { static void vaapi_set_background_color(vo_driver_t *this_gen) { vaapi_driver_t *this = (vaapi_driver_t *)this_gen; ff_vaapi_context_t *va_context = this->va_context; - VAStatus vaStatus; + //VAStatus vaStatus; if(!va_context->valid_context) return; @@ -1780,7 +1786,7 @@ static void vaapi_set_background_color(vo_driver_t *this_gen) { attr.type = VADisplayAttribBackgroundColor; attr.value = 0x000000; - vaStatus = vaSetDisplayAttributes(va_context->va_display, &attr, 1); + /*vaStatus =*/ vaSetDisplayAttributes(va_context->va_display, &attr, 1); //vaapi_check_status(this_gen, vaStatus, "vaSetDisplayAttributes()"); } -- cgit v1.2.3 From e38ff4e25ee6f6e72fcc57f432ed6835fc856cfa Mon Sep 17 00:00:00 2001 From: Darren Salt Date: Wed, 18 Sep 2013 13:58:26 +0100 Subject: Fix inclusion of modplug.h. --- src/demuxers/demux_mod.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/demuxers/demux_mod.c b/src/demuxers/demux_mod.c index 39becbac3..08ca877d0 100644 --- a/src/demuxers/demux_mod.c +++ b/src/demuxers/demux_mod.c @@ -46,7 +46,7 @@ #include #include #include -#include "modplug.h" +#include #include "bswap.h" #define MOD_SAMPLERATE 44100 -- cgit v1.2.3 From 14d684005aa248bb0fca6c383cb1122158ada3a4 Mon Sep 17 00:00:00 2001 From: Darren Salt Date: Wed, 18 Sep 2013 18:01:02 +0100 Subject: Put vzeroupper in the right place, ref. patch as sent to xine-devel. --- src/xine-utils/memcpy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/xine-utils/memcpy.c b/src/xine-utils/memcpy.c index a59f4e67a..e00171e33 100644 --- a/src/xine-utils/memcpy.c +++ b/src/xine-utils/memcpy.c @@ -251,7 +251,6 @@ static void * sse_memcpy(void * to, const void * from, size_t len) /* since movntq is weakly-ordered, a "sfence" * is needed to become ordered again. */ __asm__ __volatile__ ("sfence":::"memory"); - __asm__ __volatile__ ("vzeroupper"); } /* * Now do the tail of the block @@ -343,6 +342,7 @@ static void * avx_memcpy(void * to, const void * from, size_t len) /* since movntq is weakly-ordered, a "sfence" * is needed to become ordered again. */ __asm__ __volatile__ ("sfence":::"memory"); + __asm__ __volatile__ ("vzeroupper"); } /* * Now do the tail of the block -- cgit v1.2.3 From 2353ea6726aaad2364d884adaf44c335183dd8e5 Mon Sep 17 00:00:00 2001 From: Darren Salt Date: Thu, 19 Sep 2013 00:00:37 +0100 Subject: Fix a hang (lock bug) which happens if VAAPI plugin init fails. --- src/video_out/video_out_vaapi.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'src') diff --git a/src/video_out/video_out_vaapi.c b/src/video_out/video_out_vaapi.c index 2c1b4e16a..6f0b5c1b8 100644 --- a/src/video_out/video_out_vaapi.c +++ b/src/video_out/video_out_vaapi.c @@ -3683,13 +3683,12 @@ static int vaapi_gui_data_exchange (vo_driver_t *this_gen, return 0; } -static void vaapi_dispose (vo_driver_t *this_gen) { +static void vaapi_dispose_locked (vo_driver_t *this_gen) { vaapi_driver_t *this = (vaapi_driver_t *) this_gen; ff_vaapi_context_t *va_context = this->va_context; - lprintf("vaapi_dispose\n"); + // vaapi_lock is locked at this point, either from vaapi_dispose or vaapi_open_plugin - pthread_mutex_lock(&this->vaapi_lock); DO_LOCKDISPLAY; this->ovl_yuv2rgb->dispose(this->ovl_yuv2rgb); @@ -3719,6 +3718,12 @@ static void vaapi_dispose (vo_driver_t *this_gen) { free (this); } +static void vaapi_dispose (vo_driver_t *this_gen) { + lprintf("vaapi_dispose\n"); + pthread_mutex_lock(&((vaapi_driver_t *)this_gen)->vaapi_lock); + vaapi_dispose_locked(this_gen); +} + static void vaapi_vdr_osd_width_flag( void *this_gen, xine_cfg_entry_t *entry ) { vaapi_driver_t *this = (vaapi_driver_t *) this_gen; @@ -3977,7 +3982,7 @@ static vo_driver_t *vaapi_open_plugin (video_driver_class_t *class_gen, const vo this->va_context->last_sub_image_fmt = 0; if(vaapi_init_internal((vo_driver_t *)this, SW_CONTEXT_INIT_FORMAT, SW_WIDTH, SW_HEIGHT, 0) != VA_STATUS_SUCCESS) { - vaapi_dispose((vo_driver_t *)this); + vaapi_dispose_locked((vo_driver_t *)this); return NULL; } vaapi_close((vo_driver_t *)this); -- cgit v1.2.3