From c156a68d06ace37b87d91c97774e06c05bee1f08 Mon Sep 17 00:00:00 2001 From: Yunqing Wang Date: Tue, 12 Jul 2011 14:34:51 -0400 Subject: [PATCH 1/2] Fix vpxenc encoding incorrect webm file header on big endian machines(Issue 331) As reported in issue 331, vpxenc encoded incorrect webm file header on big endian machines. This change fixed that. Change-Id: I31924ebd476a87f3e88b9b5424540bf781d2b86f --- libmkv/EbmlWriter.c | 30 +++++++++++++++---------- libmkv/EbmlWriter.h | 2 +- libmkv/WebMElement.c | 4 ++-- vpxenc.c | 53 +++++++++++++++++++++++++++++++++----------- 4 files changed, 61 insertions(+), 28 deletions(-) diff --git a/libmkv/EbmlWriter.c b/libmkv/EbmlWriter.c index ac70d097d..fbf2c66e9 100644 --- a/libmkv/EbmlWriter.c +++ b/libmkv/EbmlWriter.c @@ -11,6 +11,7 @@ #include #include #include +#include #if defined(_MSC_VER) #define LITERALU64(n) n #else @@ -33,7 +34,7 @@ void Ebml_WriteLen(EbmlGlobal *glob, long long val) val |= (LITERALU64(0x000000000000080) << ((size - 1) * 7)); - Ebml_Serialize(glob, (void *) &val, size); + Ebml_Serialize(glob, (void *) &val, sizeof(val), size); } void Ebml_WriteString(EbmlGlobal *glob, const char *str) @@ -60,21 +61,26 @@ void Ebml_WriteUTF8(EbmlGlobal *glob, const wchar_t *wstr) void Ebml_WriteID(EbmlGlobal *glob, unsigned long class_id) { + int len; + if (class_id >= 0x01000000) - Ebml_Serialize(glob, (void *)&class_id, 4); + len = 4; else if (class_id >= 0x00010000) - Ebml_Serialize(glob, (void *)&class_id, 3); + len = 3; else if (class_id >= 0x00000100) - Ebml_Serialize(glob, (void *)&class_id, 2); + len = 2; else - Ebml_Serialize(glob, (void *)&class_id, 1); + len = 1; + + Ebml_Serialize(glob, (void *)&class_id, sizeof(class_id), len); } + void Ebml_SerializeUnsigned64(EbmlGlobal *glob, unsigned long class_id, uint64_t ui) { unsigned char sizeSerialized = 8 | 0x80; Ebml_WriteID(glob, class_id); - Ebml_Serialize(glob, &sizeSerialized, 1); - Ebml_Serialize(glob, &ui, 8); + Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1); + Ebml_Serialize(glob, &ui, sizeof(ui), 8); } void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned long ui) @@ -97,8 +103,8 @@ void Ebml_SerializeUnsigned(EbmlGlobal *glob, unsigned long class_id, unsigned l } sizeSerialized = 0x80 | size; - Ebml_Serialize(glob, &sizeSerialized, 1); - Ebml_Serialize(glob, &ui, size); + Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1); + Ebml_Serialize(glob, &ui, sizeof(ui), size); } //TODO: perhaps this is a poor name for this id serializer helper function void Ebml_SerializeBinary(EbmlGlobal *glob, unsigned long class_id, unsigned long bin) @@ -119,14 +125,14 @@ void Ebml_SerializeFloat(EbmlGlobal *glob, unsigned long class_id, double d) unsigned char len = 0x88; Ebml_WriteID(glob, class_id); - Ebml_Serialize(glob, &len, 1); - Ebml_Serialize(glob, &d, 8); + Ebml_Serialize(glob, &len, sizeof(len), 1); + Ebml_Serialize(glob, &d, sizeof(d), 8); } void Ebml_WriteSigned16(EbmlGlobal *glob, short val) { signed long out = ((val & 0x003FFFFF) | 0x00200000) << 8; - Ebml_Serialize(glob, &out, 3); + Ebml_Serialize(glob, &out, sizeof(out), 3); } void Ebml_SerializeString(EbmlGlobal *glob, unsigned long class_id, const char *s) diff --git a/libmkv/EbmlWriter.h b/libmkv/EbmlWriter.h index 8c7fe7c66..324c9bca0 100644 --- a/libmkv/EbmlWriter.h +++ b/libmkv/EbmlWriter.h @@ -15,7 +15,7 @@ #include "vpx/vpx_integer.h" typedef struct EbmlGlobal EbmlGlobal; -void Ebml_Serialize(EbmlGlobal *glob, const void *, unsigned long); +void Ebml_Serialize(EbmlGlobal *glob, const void *, int, unsigned long); void Ebml_Write(EbmlGlobal *glob, const void *, unsigned long); ///// diff --git a/libmkv/WebMElement.c b/libmkv/WebMElement.c index 25a90249a..0ef5100bb 100644 --- a/libmkv/WebMElement.c +++ b/libmkv/WebMElement.c @@ -35,11 +35,11 @@ void writeSimpleBlock(EbmlGlobal *glob, unsigned char trackNumber, short timeCod Ebml_WriteID(glob, SimpleBlock); unsigned long blockLength = 4 + dataLength; blockLength |= 0x10000000; //TODO check length < 0x0FFFFFFFF - Ebml_Serialize(glob, &blockLength, 4); + Ebml_Serialize(glob, &blockLength, sizeof(blockLength), 4); trackNumber |= 0x80; //TODO check track nubmer < 128 Ebml_Write(glob, &trackNumber, 1); //Ebml_WriteSigned16(glob, timeCode,2); //this is 3 bytes - Ebml_Serialize(glob, &timeCode, 2); + Ebml_Serialize(glob, &timeCode, sizeof(timeCode), 2); unsigned char flags = 0x00 | (isKeyframe ? 0x80 : 0x00) | (lacingFlag << 1) | discardable; Ebml_Write(glob, &flags, 1); Ebml_Write(glob, data, dataLength); diff --git a/vpxenc.c b/vpxenc.c index 042f07b81..a3c8eadc0 100644 --- a/vpxenc.c +++ b/vpxenc.c @@ -501,15 +501,42 @@ void Ebml_Write(EbmlGlobal *glob, const void *buffer_in, unsigned long len) if(fwrite(buffer_in, 1, len, glob->stream)); } - -void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, unsigned long len) -{ - const unsigned char *q = (const unsigned char *)buffer_in + len - 1; - - for(; len; len--) - Ebml_Write(glob, q--, 1); +#define WRITE_BUFFER(s) \ +for(i = len-1; i>=0; i--)\ +{ \ + x = *(const s *)buffer_in >> (i * CHAR_BIT); \ + Ebml_Write(glob, &x, 1); \ } +void Ebml_Serialize(EbmlGlobal *glob, const void *buffer_in, int buffer_size, unsigned long len) +{ + char x; + int i; + /* buffer_size: + * 1 - int8_t; + * 2 - int16_t; + * 3 - int32_t; + * 4 - int64_t; + */ + switch (buffer_size) + { + case 1: + WRITE_BUFFER(int8_t) + break; + case 2: + WRITE_BUFFER(int16_t) + break; + case 4: + WRITE_BUFFER(int32_t) + break; + case 8: + WRITE_BUFFER(int64_t) + break; + default: + break; + } +} +#undef WRITE_BUFFER /* Need a fixed size serializer for the track ID. libmkv provdes a 64 bit * one, but not a 32 bit one. @@ -518,8 +545,8 @@ static void Ebml_SerializeUnsigned32(EbmlGlobal *glob, unsigned long class_id, u { unsigned char sizeSerialized = 4 | 0x80; Ebml_WriteID(glob, class_id); - Ebml_Serialize(glob, &sizeSerialized, 1); - Ebml_Serialize(glob, &ui, 4); + Ebml_Serialize(glob, &sizeSerialized, sizeof(sizeSerialized), 1); + Ebml_Serialize(glob, &ui, sizeof(ui), 4); } @@ -533,7 +560,7 @@ Ebml_StartSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc, Ebml_WriteID(glob, class_id); *ebmlLoc = ftello(glob->stream); - Ebml_Serialize(glob, &unknownLen, 8); + Ebml_Serialize(glob, &unknownLen, sizeof(unknownLen), 8); } static void @@ -551,7 +578,7 @@ Ebml_EndSubElement(EbmlGlobal *glob, EbmlLoc *ebmlLoc) /* Seek back to the beginning of the element and write the new size */ fseeko(glob->stream, *ebmlLoc, SEEK_SET); - Ebml_Serialize(glob, &size, 8); + Ebml_Serialize(glob, &size, sizeof(size), 8); /* Reset the stream pointer */ fseeko(glob->stream, pos, SEEK_SET); @@ -741,13 +768,13 @@ write_webm_block(EbmlGlobal *glob, block_length = pkt->data.frame.sz + 4; block_length |= 0x10000000; - Ebml_Serialize(glob, &block_length, 4); + Ebml_Serialize(glob, &block_length, sizeof(block_length), 4); track_number = 1; track_number |= 0x80; Ebml_Write(glob, &track_number, 1); - Ebml_Serialize(glob, &block_timecode, 2); + Ebml_Serialize(glob, &block_timecode, sizeof(block_timecode), 2); flags = 0; if(is_keyframe) From 4e82f01547fd1610723c566005f1c6bb91ae3e1d Mon Sep 17 00:00:00 2001 From: Tero Rintaluoma Date: Mon, 11 Jul 2011 15:55:45 +0300 Subject: [PATCH 2/2] Tokenize MB optimized Optimized C-code of the following functions: - vp8_tokenize_mb - tokenize1st_order_b - tokenize2nd_order_b Gives ~1-5% speed-up for RT encoding on Cortex-A8/A9 depending on encoding parameters. Change-Id: I6be86104a589a06dcbc9ed3318e8bf264ef4176c --- vp8/encoder/tokenize.c | 218 +++++++++++++++++++++++++++-------------- 1 file changed, 143 insertions(+), 75 deletions(-) diff --git a/vp8/encoder/tokenize.c b/vp8/encoder/tokenize.c index e14e6fc92..15e7336b1 100644 --- a/vp8/encoder/tokenize.c +++ b/vp8/encoder/tokenize.c @@ -95,101 +95,183 @@ static void fill_value_tokens() static void tokenize2nd_order_b ( - const BLOCKD *const b, + MACROBLOCKD *x, TOKENEXTRA **tp, - const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ - ENTROPY_CONTEXT *a, - ENTROPY_CONTEXT *l, VP8_COMP *cpi ) { - int pt; /* near block/prev token context index */ - int c = 0; /* start at DC */ - const int eob = b->eob; /* one beyond last nonzero coeff */ - TOKENEXTRA *t = *tp; /* store tokens starting here */ - int x; - const short *qcoeff_ptr = b->qcoeff; + int pt; /* near block/prev token context index */ + int c; /* start at DC */ + TOKENEXTRA *t = *tp;/* store tokens starting here */ + const BLOCKD *b; + const short *qcoeff_ptr; + ENTROPY_CONTEXT * a; + ENTROPY_CONTEXT * l; + int band, rc, v, token; + + b = x->block + 24; + qcoeff_ptr = b->qcoeff; + a = (ENTROPY_CONTEXT *)x->above_context + 8; + l = (ENTROPY_CONTEXT *)x->left_context + 8; + VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); - do + for (c = 0; c < b->eob; c++) { - const int band = vp8_coef_bands[c]; + rc = vp8_default_zig_zag1d[c]; + band = vp8_coef_bands[c]; + v = qcoeff_ptr[rc]; - if (c < eob) - { - int rc = vp8_default_zig_zag1d[c]; - const int v = qcoeff_ptr[rc]; -#if CONFIG_DEBUG - assert(-DCT_MAX_VALUE <= v && v < (DCT_MAX_VALUE)); -#endif - t->Extra = vp8_dct_value_tokens_ptr[v].Extra; - x = vp8_dct_value_tokens_ptr[v].Token; - } - else - x = DCT_EOB_TOKEN; + t->Extra = vp8_dct_value_tokens_ptr[v].Extra; + token = vp8_dct_value_tokens_ptr[v].Token; - t->Token = x; - t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt]; + t->Token = token; + t->context_tree = cpi->common.fc.coef_probs [1] [band] [pt]; - t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0)); + t->skip_eob_node = ((pt == 0) && (band > 0)); - ++cpi->coef_counts [type] [band] [pt] [x]; + ++cpi->coef_counts [1] [band] [pt] [token]; + + pt = vp8_prev_token_class[token]; + t++; + } + if (c < 16) + { + band = vp8_coef_bands[c]; + t->Token = DCT_EOB_TOKEN; + t->context_tree = cpi->common.fc.coef_probs [1] [band] [pt]; + + t->skip_eob_node = ((pt == 0) && (band > 0)); + + ++cpi->coef_counts [1] [band] [pt] [DCT_EOB_TOKEN]; + + t++; } - while (pt = vp8_prev_token_class[x], ++t, c < eob && ++c < 16); *tp = t; - pt = (c != !type); /* 0 <-> all coeff data is zero */ + pt = (c != 0); /* 0 <-> all coeff data is zero */ *a = *l = pt; } static void tokenize1st_order_b ( - const BLOCKD *const b, + MACROBLOCKD *x, TOKENEXTRA **tp, - const int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ - ENTROPY_CONTEXT *a, - ENTROPY_CONTEXT *l, + int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ VP8_COMP *cpi ) { - int pt; /* near block/prev token context index */ - int c = type ? 0 : 1; /* start at DC unless type 0 */ - const int eob = b->eob; /* one beyond last nonzero coeff */ - TOKENEXTRA *t = *tp; /* store tokens starting here */ - int x; - const short *qcoeff_ptr = b->qcoeff; - VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); + unsigned int block; + const BLOCKD *b; + int pt; /* near block/prev token context index */ + int c; + int token; + TOKENEXTRA *t = *tp;/* store tokens starting here */ + const short *qcoeff_ptr; + ENTROPY_CONTEXT * a; + ENTROPY_CONTEXT * l; + int band, rc, v; + int tmp1, tmp2; - do + b = x->block; + /* Luma */ + for (block = 0; block < 16; block++, b++) { - const int band = vp8_coef_bands[c]; + tmp1 = vp8_block2above[block]; + tmp2 = vp8_block2left[block]; + qcoeff_ptr = b->qcoeff; + a = (ENTROPY_CONTEXT *)x->above_context + tmp1; + l = (ENTROPY_CONTEXT *)x->left_context + tmp2; - x = DCT_EOB_TOKEN; + VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); - if (c < eob) + c = type ? 0 : 1; + + for (; c < b->eob; c++) { - int rc = vp8_default_zig_zag1d[c]; - const int v = qcoeff_ptr[rc]; -#if CONFIG_DEBUG - assert(-DCT_MAX_VALUE <= v && v < (DCT_MAX_VALUE)); -#endif + rc = vp8_default_zig_zag1d[c]; + band = vp8_coef_bands[c]; + v = qcoeff_ptr[rc]; + t->Extra = vp8_dct_value_tokens_ptr[v].Extra; - x = vp8_dct_value_tokens_ptr[v].Token; + token = vp8_dct_value_tokens_ptr[v].Token; + + t->Token = token; + t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt]; + + t->skip_eob_node = pt == 0 && + ((band > 0 && type > 0) || (band > 1 && type == 0)); + + ++cpi->coef_counts [type] [band] [pt] [token]; + + pt = vp8_prev_token_class[token]; + t++; } + if (c < 16) + { + band = vp8_coef_bands[c]; + t->Token = DCT_EOB_TOKEN; + t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt]; - t->Token = x; - t->context_tree = cpi->common.fc.coef_probs [type] [band] [pt]; + t->skip_eob_node = pt == 0 && + ((band > 0 && type > 0) || (band > 1 && type == 0)); - t->skip_eob_node = pt == 0 && ((band > 0 && type > 0) || (band > 1 && type == 0)); + ++cpi->coef_counts [type] [band] [pt] [DCT_EOB_TOKEN]; + + t++; + } + *tp = t; + pt = (c != !type); /* 0 <-> all coeff data is zero */ + *a = *l = pt; - ++cpi->coef_counts [type] [band] [pt] [x]; } - while (pt = vp8_prev_token_class[x], ++t, c < eob && ++c < 16); + /* Chroma */ + for (block = 16; block < 24; block++, b++) + { + tmp1 = vp8_block2above[block]; + tmp2 = vp8_block2left[block]; + qcoeff_ptr = b->qcoeff; + a = (ENTROPY_CONTEXT *)x->above_context + tmp1; + l = (ENTROPY_CONTEXT *)x->left_context + tmp2; - *tp = t; - pt = (c != !type); /* 0 <-> all coeff data is zero */ - *a = *l = pt; + VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); + + for (c = 0; c < b->eob; c++) + { + rc = vp8_default_zig_zag1d[c]; + band = vp8_coef_bands[c]; + v = qcoeff_ptr[rc]; + + t->Extra = vp8_dct_value_tokens_ptr[v].Extra; + token = vp8_dct_value_tokens_ptr[v].Token; + + t->Token = token; + t->context_tree = cpi->common.fc.coef_probs [2] [band] [pt]; + + t->skip_eob_node = ((pt == 0) && (band > 0)); + + ++cpi->coef_counts [2] [band] [pt] [token]; + + pt = vp8_prev_token_class[token]; + t++; + } + if (c < 16) + { + band = vp8_coef_bands[c]; + t->Token = DCT_EOB_TOKEN; + t->context_tree = cpi->common.fc.coef_probs [2] [band] [pt]; + + t->skip_eob_node = ((pt == 0) && (band > 0)); + + ++cpi->coef_counts [2] [band] [pt] [DCT_EOB_TOKEN]; + + t++; + } + *tp = t; + pt = (c != 0); /* 0 <-> all coeff data is zero */ + *a = *l = pt; + } } @@ -214,10 +296,7 @@ static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block) void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) { - ENTROPY_CONTEXT * A = (ENTROPY_CONTEXT *)x->above_context; - ENTROPY_CONTEXT * L = (ENTROPY_CONTEXT *)x->left_context; int plane_type; - int b; int has_y2_block; has_y2_block = (x->mode_info_context->mbmi.mode != B_PRED @@ -240,26 +319,15 @@ void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCKD *x, TOKENEXTRA **t) cpi->skip_false_count++; - - plane_type = 3; if(has_y2_block) { - tokenize2nd_order_b(x->block + 24, t, 1, - A + vp8_block2above[24], L + vp8_block2left[24], cpi); + tokenize2nd_order_b(x, t, cpi); plane_type = 0; } - for (b = 0; b < 16; b++) - tokenize1st_order_b(x->block + b, t, plane_type, - A + vp8_block2above[b], - L + vp8_block2left[b], cpi); - - for (b = 16; b < 24; b++) - tokenize1st_order_b(x->block + b, t, 2, - A + vp8_block2above[b], - L + vp8_block2left[b], cpi); + tokenize1st_order_b(x, t, plane_type, cpi); }