diff options
| author | Johns <johns98@gmx.net> | 2012-01-17 18:53:53 +0100 | 
|---|---|---|
| committer | Johns <johns98@gmx.net> | 2012-01-17 18:53:53 +0100 | 
| commit | 19d4eeed8268c88feff69f06218a11e68e548246 (patch) | |
| tree | ffdc5ac1abeeb8dc3fb510919a41d800799552db | |
| parent | 9f668c47508443a6a49b7776a837870e3c92434b (diff) | |
| download | vdr-plugin-softhddevice-19d4eeed8268c88feff69f06218a11e68e548246.tar.gz vdr-plugin-softhddevice-19d4eeed8268c88feff69f06218a11e68e548246.tar.bz2 | |
Little speed improved Intel VA-API deinterlace.
| -rw-r--r-- | video.c | 130 | 
1 files changed, 122 insertions, 8 deletions
| @@ -2388,40 +2388,144 @@ static void VaapiBlackSurface(VaapiDecoder * decoder)  static void VaapiBob(VaapiDecoder * decoder, VAImage * src, VAImage * dst1,      VAImage * dst2)  { +    uint32_t tick1; +    uint32_t tick2; +    uint32_t tick3; +    uint32_t tick4; +    uint32_t tick5; +    uint32_t tick6; +    uint32_t tick7; +    uint32_t tick8;      void *src_base;      void *dst1_base;      void *dst2_base;      unsigned y;      unsigned p; +    tick1 = GetMsTicks();      if (vaMapBuffer(decoder->VaDisplay, src->buf,  	    &src_base) != VA_STATUS_SUCCESS) {  	Fatal("video/vaapi: can't map the image!\n");      } +    tick2 = GetMsTicks();      if (vaMapBuffer(decoder->VaDisplay, dst1->buf,  	    &dst1_base) != VA_STATUS_SUCCESS) {  	Fatal("video/vaapi: can't map the image!\n");      } +    tick3 = GetMsTicks();      if (vaMapBuffer(decoder->VaDisplay, dst2->buf,  	    &dst2_base) != VA_STATUS_SUCCESS) {  	Fatal("video/vaapi: can't map the image!\n");      } +    tick4 = GetMsTicks();      if (0) {				// test all updated  	memset(dst1_base, 0x00, dst1->data_size);  	memset(dst2_base, 0xFF, dst2->data_size);  	return;      } +#if 0 +    // interleave      for (p = 0; p < src->num_planes; ++p) {  	for (y = 0; y < (unsigned)(src->height >> (p != 0)); y += 2) { -	    memcpy(dst1_base + src->offsets[p] + y * src->pitches[p], -		src_base + src->offsets[p] + y * src->pitches[p], +	    memcpy(dst1_base + src->offsets[p] + (y + 0) * src->pitches[p], +		src_base + src->offsets[p] + (y + 0) * src->pitches[p],  		src->pitches[p]);  	    memcpy(dst1_base + src->offsets[p] + (y + 1) * src->pitches[p], -		src_base + src->offsets[p] + y * src->pitches[p], +		src_base + src->offsets[p] + (y + 0) * src->pitches[p],  		src->pitches[p]); -	    memcpy(dst2_base + src->offsets[p] + y * src->pitches[p], +	    memcpy(dst2_base + src->offsets[p] + (y + 0) * src->pitches[p], +		src_base + src->offsets[p] + (y + 1) * src->pitches[p], +		src->pitches[p]); +	    memcpy(dst2_base + src->offsets[p] + (y + 1) * src->pitches[p], +		src_base + src->offsets[p] + (y + 1) * src->pitches[p], +		src->pitches[p]); +	} +    } +#endif +#if 1 +    // use tmp copy +    if (1) { +	uint8_t *tmp; + +	tmp = malloc(src->data_size); +	memcpy(tmp, src_base, src->data_size); + +	for (p = 0; p < src->num_planes; ++p) { +	    for (y = 0; y < (unsigned)(src->height >> (p != 0)); y += 2) { +		memcpy(dst1_base + src->offsets[p] + (y + 0) * src->pitches[p], +		    tmp + src->offsets[p] + (y + 0) * src->pitches[p], +		    src->pitches[p]); +		memcpy(dst1_base + src->offsets[p] + (y + 1) * src->pitches[p], +		    tmp + src->offsets[p] + (y + 0) * src->pitches[p], +		    src->pitches[p]); + +		memcpy(dst2_base + src->offsets[p] + (y + 0) * src->pitches[p], +		    tmp + src->offsets[p] + (y + 1) * src->pitches[p], +		    src->pitches[p]); +		memcpy(dst2_base + src->offsets[p] + (y + 1) * src->pitches[p], +		    tmp + src->offsets[p] + (y + 1) * src->pitches[p], +		    src->pitches[p]); +	    } + +	} +	free(tmp); +    } +#endif +#if 0 +    // use multiple tmp copy +    if (1) { +	uint8_t *tmp_src; +	uint8_t *tmp_dst1; +	uint8_t *tmp_dst2; + +	tmp_src = malloc(src->data_size); +	memcpy(tmp_src, src_base, src->data_size); +	tmp_dst1 = malloc(src->data_size); +	tmp_dst2 = malloc(src->data_size); + +	for (p = 0; p < src->num_planes; ++p) { +	    for (y = 0; y < (unsigned)(src->height >> (p != 0)); y += 2) { +		memcpy(tmp_dst1 + src->offsets[p] + (y + 0) * src->pitches[p], +		    tmp_src + src->offsets[p] + (y + 0) * src->pitches[p], +		    src->pitches[p]); +		memcpy(tmp_dst1 + src->offsets[p] + (y + 1) * src->pitches[p], +		    tmp_src + src->offsets[p] + (y + 0) * src->pitches[p], +		    src->pitches[p]); + +		memcpy(tmp_dst2 + src->offsets[p] + (y + 0) * src->pitches[p], +		    tmp_src + src->offsets[p] + (y + 1) * src->pitches[p], +		    src->pitches[p]); +		memcpy(tmp_dst2 + src->offsets[p] + (y + 1) * src->pitches[p], +		    tmp_src + src->offsets[p] + (y + 1) * src->pitches[p], +		    src->pitches[p]); +	    } +	} +	memcpy(dst1_base, tmp_dst1, src->data_size); +	memcpy(dst2_base, tmp_dst2, src->data_size); + +	free(tmp_src); +	free(tmp_dst1); +	free(tmp_dst2); +    } +#endif +#if 0 +    // dst1 first +    for (p = 0; p < src->num_planes; ++p) { +	for (y = 0; y < (unsigned)(src->height >> (p != 0)); y += 2) { +	    memcpy(dst1_base + src->offsets[p] + (y + 0) * src->pitches[p], +		src_base + src->offsets[p] + (y + 0) * src->pitches[p], +		src->pitches[p]); +	    memcpy(dst1_base + src->offsets[p] + (y + 1) * src->pitches[p], +		src_base + src->offsets[p] + (y + 0) * src->pitches[p], +		src->pitches[p]); +	} +    } +    // dst2 next +    for (p = 0; p < src->num_planes; ++p) { +	for (y = 0; y < (unsigned)(src->height >> (p != 0)); y += 2) { +	    memcpy(dst2_base + src->offsets[p] + (y + 0) * src->pitches[p],  		src_base + src->offsets[p] + (y + 1) * src->pitches[p],  		src->pitches[p]);  	    memcpy(dst2_base + src->offsets[p] + (y + 1) * src->pitches[p], @@ -2429,16 +2533,26 @@ static void VaapiBob(VaapiDecoder * decoder, VAImage * src, VAImage * dst1,  		src->pitches[p]);  	}      } +#endif + +    tick5 = GetMsTicks();      if (vaUnmapBuffer(decoder->VaDisplay, dst2->buf) != VA_STATUS_SUCCESS) {  	Error(_("video/vaapi: can't unmap image buffer\n"));      } +    tick6 = GetMsTicks();      if (vaUnmapBuffer(decoder->VaDisplay, dst1->buf) != VA_STATUS_SUCCESS) {  	Error(_("video/vaapi: can't unmap image buffer\n"));      } +    tick7 = GetMsTicks();      if (vaUnmapBuffer(decoder->VaDisplay, src->buf) != VA_STATUS_SUCCESS) {  	Error(_("video/vaapi: can't unmap image buffer\n"));      } +    tick8 = GetMsTicks(); + +    Debug(3, "video/vaapi: map=%2d/%2d/%2d deint=%2d umap=%2d/%2d/%2d\n", +	tick2 - tick1, tick3 - tick2, tick4 - tick3, tick5 - tick4, +	tick6 - tick5, tick7 - tick6, tick8 - tick7);  }  /// @@ -2576,7 +2690,7 @@ static void VaapiCpuDerive(VaapiDecoder * decoder, VASurfaceID surface)      tick5 = GetMsTicks(); -    Debug(3, "video/vaapi: get=%2d get1=%2d get2=%d deint=%2d\n", +    Debug(4, "video/vaapi: get=%2d get1=%2d get2=%d deint=%2d\n",  	tick2 - tick1, tick3 - tick2, tick4 - tick3, tick5 - tick4);  } @@ -2645,12 +2759,12 @@ static void VaapiCpuPut(VaapiDecoder * decoder, VASurfaceID surface)  	abort();      }      VaapiQueueSurface(decoder, out, 1); -    if (1 && vaSyncSurface(decoder->VaDisplay, out) != VA_STATUS_SUCCESS) { +    if (0 && vaSyncSurface(decoder->VaDisplay, out) != VA_STATUS_SUCCESS) {  	Error(_("video/vaapi: vaSyncSurface failed\n"));      }      tick4 = GetMsTicks(); -    Debug(3, "video/vaapi: deint %d %#010x -> %#010x\n", decoder->SurfaceField, +    Debug(4, "video/vaapi: deint %d %#010x -> %#010x\n", decoder->SurfaceField,  	surface, out);      // get a free surface and upload the image @@ -2664,7 +2778,7 @@ static void VaapiCpuPut(VaapiDecoder * decoder, VASurfaceID surface)  	Error("video/vaapi: can't put image!\n");      }      VaapiQueueSurface(decoder, out, 1); -    if (1 && vaSyncSurface(decoder->VaDisplay, out) != VA_STATUS_SUCCESS) { +    if (0 && vaSyncSurface(decoder->VaDisplay, out) != VA_STATUS_SUCCESS) {  	Error(_("video/vaapi: vaSyncSurface failed\n"));      }      tick5 = GetMsTicks(); | 
