summaryrefslogtreecommitdiff
path: root/contrib/ffmpeg/libswscale/swscale_template.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/ffmpeg/libswscale/swscale_template.c')
-rw-r--r--contrib/ffmpeg/libswscale/swscale_template.c77
1 files changed, 62 insertions, 15 deletions
diff --git a/contrib/ffmpeg/libswscale/swscale_template.c b/contrib/ffmpeg/libswscale/swscale_template.c
index e725a3bb0..ad46be127 100644
--- a/contrib/ffmpeg/libswscale/swscale_template.c
+++ b/contrib/ffmpeg/libswscale/swscale_template.c
@@ -1730,7 +1730,6 @@ static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, long width)
static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
{
- assert(src1 == src2);
#ifdef HAVE_MMX
asm volatile(
"movq "MANGLE(bm01010101)", %%mm4\n\t"
@@ -1761,6 +1760,7 @@ static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1,
dstV[i]= src1[4*i + 3];
}
#endif
+ assert(src1 == src2);
}
//this is allmost identical to the previous, end exists only cuz yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses
@@ -1790,7 +1790,6 @@ static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, long width)
static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
{
- assert(src1 == src2);
#ifdef HAVE_MMX
asm volatile(
"movq "MANGLE(bm01010101)", %%mm4\n\t"
@@ -1821,6 +1820,7 @@ static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1,
dstV[i]= src1[4*i + 2];
}
#endif
+ assert(src1 == src2);
}
static inline void RENAME(bgr32ToY)(uint8_t *dst, uint8_t *src, int width)
@@ -1942,7 +1942,6 @@ static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, long width)
static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, long width)
{
- assert(src1 == src2);
#ifdef HAVE_MMX
asm volatile(
"mov %3, %%"REG_a" \n\t"
@@ -2072,6 +2071,7 @@ static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1
dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
}
#endif
+ assert(src1 == src2);
}
static inline void RENAME(bgr16ToY)(uint8_t *dst, uint8_t *src, int width)
@@ -2279,6 +2279,38 @@ static inline void RENAME(rgb15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1
}
}
+static inline void RENAME(palToY)(uint8_t *dst, uint8_t *src, int width, uint32_t *pal)
+{
+ int i;
+ for(i=0; i<width; i++)
+ {
+ int d= src[i];
+ int b= pal[d] &0xFF;
+ int g=(pal[d]>>8 )&0xFF;
+ int r= pal[d]>>16;
+
+ dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+ }
+}
+
+static inline void RENAME(palToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width, uint32_t *pal)
+{
+ int i;
+ assert(src1 == src2);
+ for(i=0; i<width; i++)
+ {
+ int d0= src1[2*i ];
+ int d1= src1[2*i+1];
+ int p = (pal[d0]&0xFF00FF) + (pal[d1]&0xFF00FF);
+ int g = (pal[d0]+pal[d1]-p)>>8;
+ int b= p&0x1FF;
+ int r= p>>16;
+
+ dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+1)) + 128;
+ dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+1)) + 128;
+ }
+}
+
// Bilinear / Bicubic scaling
static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc,
int16_t *filter, int16_t *filterPos, long filterSize)
@@ -2456,7 +2488,7 @@ static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW
val += ((int)src[srcPos + j])*filter[filterSize*i + j];
}
// filter += hFilterSize;
- dst[i] = FFMIN(FFMAX(0, val>>7), (1<<15)-1); // the cubic equation does overflow ...
+ dst[i] = av_clip(val>>7, 0, (1<<15)-1); // the cubic equation does overflow ...
// dst[i] = val>>7;
}
#endif
@@ -2467,7 +2499,7 @@ static inline void RENAME(hyscale)(uint16_t *dst, long dstWidth, uint8_t *src, i
int flags, int canMMX2BeUsed, int16_t *hLumFilter,
int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode,
int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
- int32_t *mmx2FilterPos)
+ int32_t *mmx2FilterPos, uint8_t *pal)
{
if(srcFormat==PIX_FMT_YUYV422 || srcFormat==PIX_FMT_GRAY16BE)
{
@@ -2519,6 +2551,11 @@ static inline void RENAME(hyscale)(uint16_t *dst, long dstWidth, uint8_t *src, i
RENAME(rgb15ToY)(formatConvBuffer, src, srcW);
src= formatConvBuffer;
}
+ else if(srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE || srcFormat==PIX_FMT_RGB4_BYTE)
+ {
+ RENAME(palToY)(formatConvBuffer, src, srcW, pal);
+ src= formatConvBuffer;
+ }
#ifdef HAVE_MMX
// use the new MMX scaler if the mmx2 can't be used (its faster than the x86asm one)
@@ -2664,7 +2701,7 @@ inline static void RENAME(hcscale)(uint16_t *dst, long dstWidth, uint8_t *src1,
int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter,
int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode,
int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter,
- int32_t *mmx2FilterPos)
+ int32_t *mmx2FilterPos, uint8_t *pal)
{
if(srcFormat==PIX_FMT_YUYV422)
{
@@ -2730,6 +2767,12 @@ inline static void RENAME(hcscale)(uint16_t *dst, long dstWidth, uint8_t *src1,
{
return;
}
+ else if(srcFormat==PIX_FMT_RGB8 || srcFormat==PIX_FMT_BGR8 || srcFormat==PIX_FMT_PAL8 || srcFormat==PIX_FMT_BGR4_BYTE || srcFormat==PIX_FMT_RGB4_BYTE)
+ {
+ RENAME(palToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW, pal);
+ src1= formatConvBuffer;
+ src2= formatConvBuffer+2048;
+ }
#ifdef HAVE_MMX
// use the new MMX scaler if the mmx2 can't be used (its faster than the x86asm one)
@@ -2932,6 +2975,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample;
const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample);
int lastDstY;
+ uint8_t *pal=NULL;
/* vars whch will change and which we need to storw back in the context */
int dstY= c->dstY;
@@ -2941,6 +2985,7 @@ static int RENAME(swScale)(SwsContext *c, uint8_t* src[], int srcStride[], int s
int lastInChrBuf= c->lastInChrBuf;
if(isPacked(c->srcFormat)){
+ pal= src[1];
src[0]=
src[1]=
src[2]= src[0];
@@ -2972,7 +3017,7 @@ i--;
static int firstTime=1; //FIXME move this into the context perhaps
if(flags & SWS_PRINT_INFO && firstTime)
{
- MSG_WARN("SwScaler: Warning: dstStride is not aligned!\n"
+ av_log(c, AV_LOG_WARNING, "SwScaler: Warning: dstStride is not aligned!\n"
"SwScaler: ->cannot do aligned memory acesses anymore\n");
firstTime=0;
}
@@ -3026,7 +3071,7 @@ i--;
RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
funnyYCode, c->srcFormat, formatConvBuffer,
- c->lumMmx2Filter, c->lumMmx2FilterPos);
+ c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
lastInLumBuf++;
}
while(lastInChrBuf < lastChrSrcY)
@@ -3043,7 +3088,7 @@ i--;
RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
funnyUVCode, c->srcFormat, formatConvBuffer,
- c->chrMmx2Filter, c->chrMmx2FilterPos);
+ c->chrMmx2Filter, c->chrMmx2FilterPos, pal);
lastInChrBuf++;
}
//wrap buf index around to stay inside the ring buffer
@@ -3068,7 +3113,7 @@ i--;
RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
funnyYCode, c->srcFormat, formatConvBuffer,
- c->lumMmx2Filter, c->lumMmx2FilterPos);
+ c->lumMmx2Filter, c->lumMmx2FilterPos, pal);
lastInLumBuf++;
}
while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH))
@@ -3084,7 +3129,7 @@ i--;
RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc,
flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
funnyUVCode, c->srcFormat, formatConvBuffer,
- c->chrMmx2Filter, c->chrMmx2FilterPos);
+ c->chrMmx2Filter, c->chrMmx2FilterPos, pal);
lastInChrBuf++;
}
//wrap buf index around to stay inside the ring buffer
@@ -3107,15 +3152,15 @@ i--;
int i;
if(flags & SWS_ACCURATE_RND){
for(i=0; i<vLumFilterSize; i+=2){
- lumMmxFilter[2*i+0]= lumSrcPtr[i ];
- lumMmxFilter[2*i+1]= lumSrcPtr[i+(vLumFilterSize>1)];
+ lumMmxFilter[2*i+0]= (int32_t)lumSrcPtr[i ];
+ lumMmxFilter[2*i+1]= (int32_t)lumSrcPtr[i+(vLumFilterSize>1)];
lumMmxFilter[2*i+2]=
lumMmxFilter[2*i+3]= vLumFilter[dstY*vLumFilterSize + i ]
+ (vLumFilterSize>1 ? vLumFilter[dstY*vLumFilterSize + i + 1]<<16 : 0);
}
for(i=0; i<vChrFilterSize; i+=2){
- chrMmxFilter[2*i+0]= chrSrcPtr[i ];
- chrMmxFilter[2*i+1]= chrSrcPtr[i+(vChrFilterSize>1)];
+ chrMmxFilter[2*i+0]= (int32_t)chrSrcPtr[i ];
+ chrMmxFilter[2*i+1]= (int32_t)chrSrcPtr[i+(vChrFilterSize>1)];
chrMmxFilter[2*i+2]=
chrMmxFilter[2*i+3]= vChrFilter[chrDstY*vChrFilterSize + i ]
+ (vChrFilterSize>1 ? vChrFilter[chrDstY*vChrFilterSize + i + 1]<<16 : 0);
@@ -3124,6 +3169,7 @@ i--;
for(i=0; i<vLumFilterSize; i++)
{
lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
+ lumMmxFilter[4*i+1]= (uint64_t)lumSrcPtr[i] >> 32;
lumMmxFilter[4*i+2]=
lumMmxFilter[4*i+3]=
((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
@@ -3131,6 +3177,7 @@ i--;
for(i=0; i<vChrFilterSize; i++)
{
chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
+ chrMmxFilter[4*i+1]= (uint64_t)chrSrcPtr[i] >> 32;
chrMmxFilter[4*i+2]=
chrMmxFilter[4*i+3]=
((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;