summaryrefslogtreecommitdiff
path: root/video.c
diff options
context:
space:
mode:
Diffstat (limited to 'video.c')
-rw-r--r--video.c83
1 files changed, 58 insertions, 25 deletions
diff --git a/video.c b/video.c
index 53a7dd9..169a50e 100644
--- a/video.c
+++ b/video.c
@@ -3137,8 +3137,10 @@ static void VaapiBlackSurface(VaapiDecoder * decoder)
#define noUSE_VECTOR ///< use gcc vector extension
#ifdef USE_VECTOR
-typedef int8_t v16qi __attribute__ ((vector_size(16)));
-typedef int16_t v4hi __attribute__ ((vector_size(8)));
+typedef char v16qi __attribute__ ((vector_size(16)));
+typedef char v8qi __attribute__ ((vector_size(8)));
+typedef int16_t v4hi __attribute__ ((vector_size(4)));
+typedef int16_t v8hi __attribute__ ((vector_size(8)));
///
/// ELA Edge-based Line Averaging
@@ -3155,12 +3157,57 @@ static void FilterLineSpatial(uint8_t * dst, const uint8_t * cur, int width,
// 8/16 128bit xmm register
- for (x = 0; x < width; x += 16) {
- v16qi a;
+ for (x = 0; x < width; x += 8) {
+ v8qi c;
+ v8qi d;
+ v8qi e;
+ v8qi j;
+ v8qi k;
+ v8qi l;
+ v8qi t1;
+ v8qi t2;
+ v8qi pred;
+ v8qi score_l;
+ v8qi score_h;
+ v8qi t_l;
+ v8qi t_h;
+ v8qi zero;
// ignore bound violation
- a = *(v16qi *) & cur[above + x];
- *(v16qi *) & dst[x] = a;
+ d = *(v8qi *) & cur[above + x];
+ k = *(v8qi *) & cur[below + x];
+ pred = __builtin_ia32_pavgb(d, k);
+
+ // score = ABS(c - j) + ABS(d - k) + ABS(e - l);
+ c = *(v8qi *) & cur[above + x - 1 * next];
+ e = *(v8qi *) & cur[above + x + 1 * next];
+ j = *(v8qi *) & cur[below + x - 1 * next];
+ l = *(v8qi *) & cur[below + x + 1 * next];
+
+ t1 = __builtin_ia32_psubusb(c, j);
+ t2 = __builtin_ia32_psubusb(j, c);
+ t1 = __builtin_ia32_pmaxub(t1, t2);
+ zero ^= zero;
+ score_l = __builtin_ia32_punpcklbw(t1, zero);
+ score_h = __builtin_ia32_punpckhbw(t1, zero);
+
+ t1 = __builtin_ia32_psubusb(d, k);
+ t2 = __builtin_ia32_psubusb(k, d);
+ t1 = __builtin_ia32_pmaxub(t1, t2);
+ t_l = __builtin_ia32_punpcklbw(t1, zero);
+ t_h = __builtin_ia32_punpckhbw(t1, zero);
+ score_l = __builtin_ia32_paddw(score_l, t_l);
+ score_h = __builtin_ia32_paddw(score_h, t_h);
+
+ t1 = __builtin_ia32_psubusb(e, l);
+ t2 = __builtin_ia32_psubusb(l, e);
+ t1 = __builtin_ia32_pmaxub(t1, t2);
+ t_l = __builtin_ia32_punpcklbw(t1, zero);
+ t_h = __builtin_ia32_punpckhbw(t1, zero);
+ score_l = __builtin_ia32_paddw(score_l, t_l);
+ score_h = __builtin_ia32_paddw(score_h, t_h);
+
+ *(v8qi *) & dst[x] = pred;
}
}
@@ -3401,7 +3448,7 @@ static void VaapiSpatial(VaapiDecoder * decoder, VAImage * src, VAImage * dst1,
}
tick8 = GetMsTicks();
- Debug(4, "video/vaapi: map=%2d/%2d/%2d deint=%2d umap=%2d/%2d/%2d\n",
+ Debug(3, "video/vaapi: map=%2d/%2d/%2d deint=%2d umap=%2d/%2d/%2d\n",
tick2 - tick1, tick3 - tick2, tick4 - tick3, tick5 - tick4,
tick6 - tick5, tick7 - tick6, tick8 - tick7);
}
@@ -8075,8 +8122,7 @@ static void VideoEvent(void)
{
XEvent event;
KeySym keysym;
-
- //char buf[32];
+ char buf[32];
XNextEvent(XlibDisplay, &event);
switch (event.type) {
@@ -8109,27 +8155,14 @@ static void VideoEvent(void)
VideoSetFullscreen(-1);
break;
case KeyPress:
- keysym = XLookupKeysym(&event.xkey, 0);
-#if 0
- switch (keysym) {
- case XK_d:
- break;
- case XK_S:
- break;
- }
-#endif
+ XLookupString(&event.xkey, buf, sizeof(buf), &keysym, NULL);
if (keysym == NoSymbol) {
Warning(_("video/event: No symbol for %d\n"),
event.xkey.keycode);
+ break;
}
FeedKeyPress("XKeySym", XKeysymToString(keysym), 0, 0);
- /*
- if (XLookupString(&event.xkey, buf, sizeof(buf), &keysym, NULL)) {
- FeedKeyPress("XKeySym", buf, 0, 0);
- } else {
- FeedKeyPress("XKeySym", XKeysymToString(keysym), 0, 0);
- }
- */
+ break;
case KeyRelease:
break;
default: