diff options
Diffstat (limited to 'contrib/ffmpeg/libavcodec/flacenc.c')
-rw-r--r-- | contrib/ffmpeg/libavcodec/flacenc.c | 242 |
1 files changed, 189 insertions, 53 deletions
diff --git a/contrib/ffmpeg/libavcodec/flacenc.c b/contrib/ffmpeg/libavcodec/flacenc.c index 9dd6c7eb8..469b46115 100644 --- a/contrib/ffmpeg/libavcodec/flacenc.c +++ b/contrib/ffmpeg/libavcodec/flacenc.c @@ -22,6 +22,7 @@ #include "avcodec.h" #include "bitstream.h" #include "crc.h" +#include "dsputil.h" #include "golomb.h" #include "lls.h" @@ -84,7 +85,7 @@ typedef struct FlacSubframe { int shift; RiceContext rc; int32_t samples[FLAC_MAX_BLOCKSIZE]; - int32_t residual[FLAC_MAX_BLOCKSIZE]; + int32_t residual[FLAC_MAX_BLOCKSIZE+1]; } FlacSubframe; typedef struct FlacFrame { @@ -107,6 +108,7 @@ typedef struct FlacEncodeContext { FlacFrame frame; CompressionOptions options; AVCodecContext *avctx; + DSPContext dsp; } FlacEncodeContext; static const int flac_samplerates[16] = { @@ -177,6 +179,8 @@ static int flac_encode_init(AVCodecContext *avctx) s->avctx = avctx; + dsputil_init(&s->dsp, avctx); + if(avctx->sample_fmt != SAMPLE_FMT_S16) { return -1; } @@ -447,20 +451,19 @@ static void copy_samples(FlacEncodeContext *s, int16_t *samples) #define rice_encode_count(sum, n, k) (((n)*((k)+1))+((sum-(n>>1))>>(k))) +/** + * Solve for d/dk(rice_encode_count) = n-((sum-(n>>1))>>(k+1)) = 0 + */ static int find_optimal_param(uint32_t sum, int n) { - int k, k_opt; - uint32_t nbits[MAX_RICE_PARAM+1]; - - k_opt = 0; - nbits[0] = UINT32_MAX; - for(k=0; k<=MAX_RICE_PARAM; k++) { - nbits[k] = rice_encode_count(sum, n, k); - if(nbits[k] < nbits[k_opt]) { - k_opt = k; - } - } - return k_opt; + int k; + uint32_t sum2; + + if(sum <= n>>1) + return 0; + sum2 = sum-(n>>1); + k = av_log2(n<256 ? FASTDIV(sum2,n) : sum2/n); + return FFMIN(k, MAX_RICE_PARAM); } static uint32_t calc_optimal_rice_params(RiceContext *rc, int porder, @@ -471,16 +474,15 @@ static uint32_t calc_optimal_rice_params(RiceContext *rc, int porder, uint32_t all_bits; part = (1 << porder); - all_bits = 0; + all_bits = 4 * part; cnt = (n >> porder) - pred_order; for(i=0; i<part; i++) { - if(i == 1) cnt = (n >> porder); k = find_optimal_param(sums[i], cnt); rc->params[i] = k; all_bits += rice_encode_count(sums[i], cnt, k); + cnt = n >> porder; } - all_bits += (4 * part); rc->porder = porder; @@ -499,10 +501,11 @@ static void calc_sums(int pmin, int pmax, uint32_t *data, int n, int pred_order, res = &data[pred_order]; res_end = &data[n >> pmax]; for(i=0; i<parts; i++) { - sums[pmax][i] = 0; + uint32_t sum = 0; while(res < res_end){ - sums[pmax][i] += *(res++); + sum += *(res++); } + sums[pmax][i] = sum; res_end+= n >> pmax; } /* sums for lower levels */ @@ -590,13 +593,19 @@ static void apply_welch_window(const int32_t *data, int len, double *w_data) double w; double c; + assert(!(len&1)); //the optimization in r11881 does not support odd len + //if someone wants odd len extend the change in r11881 + n2 = (len >> 1); c = 2.0 / (len - 1.0); + + w_data+=n2; + data+=n2; for(i=0; i<n2; i++) { - w = c - i - 1.0; + w = c - n2 + i; w = 1.0 - (w * w); - w_data[i] = data[i] * w; - w_data[len-1-i] = data[len-1-i] * w; + w_data[-i-1] = data[-i-1] * w; + w_data[+i ] = data[+i ] * w; } } @@ -604,24 +613,36 @@ static void apply_welch_window(const int32_t *data, int len, double *w_data) * Calculates autocorrelation data from audio samples * A Welch window function is applied before calculation. */ -static void compute_autocorr(const int32_t *data, int len, int lag, - double *autoc) +void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, + double *autoc) { - int i, lag_ptr; - double tmp[len + lag]; + int i, j; + double tmp[len + lag + 1]; double *data1= tmp + lag; apply_welch_window(data, len, data1); - for(i=0; i<lag; i++){ - autoc[i] = 1.0; - data1[i-lag]= 0.0; + for(j=0; j<lag; j++) + data1[j-lag]= 0.0; + data1[len] = 0.0; + + for(j=0; j<lag; j+=2){ + double sum0 = 1.0, sum1 = 1.0; + for(i=0; i<len; i++){ + sum0 += data1[i] * data1[i-j]; + sum1 += data1[i] * data1[i-j-1]; + } + autoc[j ] = sum0; + autoc[j+1] = sum1; } - for(i=0; i<len; i++){ - for(lag_ptr= i-lag; lag_ptr<=i; lag_ptr++){ - autoc[i-lag_ptr] += data1[i] * data1[lag_ptr]; + if(j==lag){ + double sum = 1.0; + for(i=0; i<len; i+=2){ + sum += data1[i ] * data1[i-j ] + + data1[i+1] * data1[i-j+1]; } + autoc[j] = sum; } } @@ -735,7 +756,8 @@ static int estimate_best_order(double *ref, int max_order) /** * Calculate LPC coefficients for multiple orders */ -static int lpc_calc_coefs(const int32_t *samples, int blocksize, int max_order, +static int lpc_calc_coefs(FlacEncodeContext *s, + const int32_t *samples, int blocksize, int max_order, int precision, int32_t coefs[][MAX_LPC_ORDER], int *shift, int use_lpc, int omethod) { @@ -748,12 +770,12 @@ static int lpc_calc_coefs(const int32_t *samples, int blocksize, int max_order, assert(max_order >= MIN_LPC_ORDER && max_order <= MAX_LPC_ORDER); if(use_lpc == 1){ - compute_autocorr(samples, blocksize, max_order+1, autoc); + s->dsp.flac_compute_autocorr(samples, blocksize, max_order, autoc); compute_lpc_coefs(autoc, max_order, lpc, ref); }else{ LLSModel m[2]; - double var[MAX_LPC_ORDER+1], eval, weight; + double var[MAX_LPC_ORDER+1], weight; for(pass=0; pass<use_lpc-1; pass++){ av_init_lls(&m[pass&1], max_order); @@ -764,11 +786,14 @@ static int lpc_calc_coefs(const int32_t *samples, int blocksize, int max_order, var[j]= samples[i-j]; if(pass){ + double eval, inv, rinv; eval= av_evaluate_lls(&m[(pass-1)&1], var+1, max_order-1); eval= (512>>pass) + fabs(eval - var[0]); + inv = 1/eval; + rinv = sqrt(inv); for(j=0; j<=max_order; j++) - var[j]/= sqrt(eval); - weight += 1/eval; + var[j] *= rinv; + weight += inv; }else weight++; @@ -823,33 +848,142 @@ static void encode_residual_fixed(int32_t *res, const int32_t *smp, int n, for(i=order; i<n; i++) res[i]= smp[i] - smp[i-1]; }else if(order==2){ - for(i=order; i<n; i++) - res[i]= smp[i] - 2*smp[i-1] + smp[i-2]; + int a = smp[order-1] - smp[order-2]; + for(i=order; i<n; i+=2) { + int b = smp[i] - smp[i-1]; + res[i]= b - a; + a = smp[i+1] - smp[i]; + res[i+1]= a - b; + } }else if(order==3){ - for(i=order; i<n; i++) - res[i]= smp[i] - 3*smp[i-1] + 3*smp[i-2] - smp[i-3]; + int a = smp[order-1] - smp[order-2]; + int c = smp[order-1] - 2*smp[order-2] + smp[order-3]; + for(i=order; i<n; i+=2) { + int b = smp[i] - smp[i-1]; + int d = b - a; + res[i]= d - c; + a = smp[i+1] - smp[i]; + c = a - b; + res[i+1]= c - d; + } }else{ - for(i=order; i<n; i++) - res[i]= smp[i] - 4*smp[i-1] + 6*smp[i-2] - 4*smp[i-3] + smp[i-4]; + int a = smp[order-1] - smp[order-2]; + int c = smp[order-1] - 2*smp[order-2] + smp[order-3]; + int e = smp[order-1] - 3*smp[order-2] + 3*smp[order-3] - smp[order-4]; + for(i=order; i<n; i+=2) { + int b = smp[i] - smp[i-1]; + int d = b - a; + int f = d - c; + res[i]= f - e; + a = smp[i+1] - smp[i]; + c = a - b; + e = c - d; + res[i+1]= e - f; + } + } +} + +#define LPC1(x) {\ + int c = coefs[(x)-1];\ + p0 += c*s;\ + s = smp[i-(x)+1];\ + p1 += c*s;\ +} + +static av_always_inline void encode_residual_lpc_unrolled( + int32_t *res, const int32_t *smp, int n, + int order, const int32_t *coefs, int shift, int big) +{ + int i; + for(i=order; i<n; i+=2) { + int s = smp[i-order]; + int p0 = 0, p1 = 0; + if(big) { + switch(order) { + case 32: LPC1(32) + case 31: LPC1(31) + case 30: LPC1(30) + case 29: LPC1(29) + case 28: LPC1(28) + case 27: LPC1(27) + case 26: LPC1(26) + case 25: LPC1(25) + case 24: LPC1(24) + case 23: LPC1(23) + case 22: LPC1(22) + case 21: LPC1(21) + case 20: LPC1(20) + case 19: LPC1(19) + case 18: LPC1(18) + case 17: LPC1(17) + case 16: LPC1(16) + case 15: LPC1(15) + case 14: LPC1(14) + case 13: LPC1(13) + case 12: LPC1(12) + case 11: LPC1(11) + case 10: LPC1(10) + case 9: LPC1( 9) + LPC1( 8) + LPC1( 7) + LPC1( 6) + LPC1( 5) + LPC1( 4) + LPC1( 3) + LPC1( 2) + LPC1( 1) + } + } else { + switch(order) { + case 8: LPC1( 8) + case 7: LPC1( 7) + case 6: LPC1( 6) + case 5: LPC1( 5) + case 4: LPC1( 4) + case 3: LPC1( 3) + case 2: LPC1( 2) + case 1: LPC1( 1) + } + } + res[i ] = smp[i ] - (p0 >> shift); + res[i+1] = smp[i+1] - (p1 >> shift); } } static void encode_residual_lpc(int32_t *res, const int32_t *smp, int n, int order, const int32_t *coefs, int shift) { - int i, j; - int32_t pred; - + int i; for(i=0; i<order; i++) { res[i] = smp[i]; } - for(i=order; i<n; i++) { - pred = 0; +#ifdef CONFIG_SMALL + for(i=order; i<n; i+=2) { + int j; + int s = smp[i]; + int p0 = 0, p1 = 0; for(j=0; j<order; j++) { - pred += coefs[j] * smp[i-j-1]; + int c = coefs[j]; + p1 += c*s; + s = smp[i-j-1]; + p0 += c*s; } - res[i] = smp[i] - (pred >> shift); - } + res[i ] = smp[i ] - (p0 >> shift); + res[i+1] = smp[i+1] - (p1 >> shift); + } +#else + switch(order) { + case 1: encode_residual_lpc_unrolled(res, smp, n, 1, coefs, shift, 0); break; + case 2: encode_residual_lpc_unrolled(res, smp, n, 2, coefs, shift, 0); break; + case 3: encode_residual_lpc_unrolled(res, smp, n, 3, coefs, shift, 0); break; + case 4: encode_residual_lpc_unrolled(res, smp, n, 4, coefs, shift, 0); break; + case 5: encode_residual_lpc_unrolled(res, smp, n, 5, coefs, shift, 0); break; + case 6: encode_residual_lpc_unrolled(res, smp, n, 6, coefs, shift, 0); break; + case 7: encode_residual_lpc_unrolled(res, smp, n, 7, coefs, shift, 0); break; + case 8: encode_residual_lpc_unrolled(res, smp, n, 8, coefs, shift, 0); break; + default: encode_residual_lpc_unrolled(res, smp, n, order, coefs, shift, 1); break; + } +#endif } static int encode_residual(FlacEncodeContext *ctx, int ch) @@ -919,7 +1053,7 @@ static int encode_residual(FlacEncodeContext *ctx, int ch) } /* LPC */ - opt_order = lpc_calc_coefs(smp, n, max_order, precision, coefs, shift, ctx->options.use_lpc, omethod); + opt_order = lpc_calc_coefs(ctx, smp, n, max_order, precision, coefs, shift, ctx->options.use_lpc, omethod); if(omethod == ORDER_METHOD_2LEVEL || omethod == ORDER_METHOD_4LEVEL || @@ -1155,7 +1289,8 @@ static void output_frame_header(FlacEncodeContext *s) put_bits(&s->pb, 16, s->sr_code[1]); } flush_put_bits(&s->pb); - crc = av_crc(av_crc07, 0, s->pb.buf, put_bits_count(&s->pb)>>3); + crc = av_crc(av_crc_get_table(AV_CRC_8_ATM), 0, + s->pb.buf, put_bits_count(&s->pb)>>3); put_bits(&s->pb, 8, crc); } @@ -1297,7 +1432,8 @@ static void output_frame_footer(FlacEncodeContext *s) { int crc; flush_put_bits(&s->pb); - crc = bswap_16(av_crc(av_crc8005, 0, s->pb.buf, put_bits_count(&s->pb)>>3)); + crc = bswap_16(av_crc(av_crc_get_table(AV_CRC_16_ANSI), 0, + s->pb.buf, put_bits_count(&s->pb)>>3)); put_bits(&s->pb, 16, crc); flush_put_bits(&s->pb); } |