From c139b81a13f680340dd874c205cba40a7233d388 Mon Sep 17 00:00:00 2001 From: Marco Date: Thu, 21 May 2015 16:15:37 -0700 Subject: [PATCH] Vidyo patch: Rate control for SVC, 1 pass CBR mode. -Make Rate control work for SVC 1 pass CBR mode. -Added temporal layering mode. -Fixed bug in non-rd variance partition. -Modified/updated the sample encoders (vp9_spatial_svc_encoder, vpx_temporal_svc_encoder). -Added datarate unittest(s) for 1 pass CBR SVC. Change-Id: Ie94b1b68a56ea1267b5087c625e5df04def2ee48 --- examples/vp9_spatial_svc_encoder.c | 367 +++++++++++++++++++++- examples/vpx_temporal_svc_encoder.c | 26 +- test/datarate_test.cc | 206 ++++++++++++- test/encode_test_driver.h | 4 + test/svc_test.cc | 1 + vp9/encoder/vp9_encodeframe.c | 15 +- vp9/encoder/vp9_encoder.c | 48 +-- vp9/encoder/vp9_encoder.h | 13 +- vp9/encoder/vp9_ratectrl.c | 74 +++-- vp9/encoder/vp9_svc_layercontext.c | 455 +++++++++++++++++++++------- vp9/encoder/vp9_svc_layercontext.h | 11 +- vp9/encoder/vp9_temporal_filter.c | 2 +- vp9/vp9_cx_iface.c | 132 ++++---- vpx/src/svc_encodeframe.c | 120 ++++++-- vpx/svc_context.h | 5 +- vpx/vp8cx.h | 37 +++ vpx/vpx_encoder.h | 32 +- 17 files changed, 1279 insertions(+), 269 deletions(-) diff --git a/examples/vp9_spatial_svc_encoder.c b/examples/vp9_spatial_svc_encoder.c index a3fd65f2a..c22a04a99 100644 --- a/examples/vp9_spatial_svc_encoder.c +++ b/examples/vp9_spatial_svc_encoder.c @@ -14,11 +14,13 @@ * that benefit from a scalable bitstream. */ +#include #include #include #include #include + #include "../args.h" #include "../tools_common.h" #include "../video_writer.h" @@ -27,11 +29,18 @@ #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" #include "../vpxstats.h" +#define OUTPUT_RC_STATS 1 static const arg_def_t skip_frames_arg = ARG_DEF("s", "skip-frames", 1, "input frames to skip"); static const arg_def_t frames_arg = ARG_DEF("f", "frames", 1, "number of frames to encode"); +static const arg_def_t threads_arg = + ARG_DEF("th", "threads", 1, "number of threads to use"); +#if OUTPUT_RC_STATS +static const arg_def_t output_rc_stats_arg = + ARG_DEF("rcstat", "output_rc_stats", 1, "output rc stats"); +#endif static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width"); static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height"); static const arg_def_t timebase_arg = @@ -42,6 +51,9 @@ static const arg_def_t spatial_layers_arg = ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers"); static const arg_def_t temporal_layers_arg = ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers"); +static const arg_def_t temporal_layering_mode_arg = + ARG_DEF("tlm", "temporal-layering-mode", 1, "temporal layering scheme." + "VP9E_TEMPORAL_LAYERING_MODE"); static const arg_def_t kf_dist_arg = ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes"); static const arg_def_t scale_factors_arg = @@ -65,6 +77,8 @@ static const arg_def_t lag_in_frame_arg = "generating any outputs"); static const arg_def_t rc_end_usage_arg = ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q"); +static const arg_def_t speed_arg = + ARG_DEF("sp", "speed", 1, "speed configuration"); #if CONFIG_VP9_HIGHBITDEPTH static const struct arg_enum_list bitdepth_enum[] = { @@ -85,10 +99,16 @@ static const arg_def_t *svc_args[] = { &timebase_arg, &bitrate_arg, &skip_frames_arg, &spatial_layers_arg, &kf_dist_arg, &scale_factors_arg, &passes_arg, &pass_arg, &fpf_name_arg, &min_q_arg, &max_q_arg, &min_bitrate_arg, - &max_bitrate_arg, &temporal_layers_arg, &lag_in_frame_arg, + &max_bitrate_arg, &temporal_layers_arg, &temporal_layering_mode_arg, + &lag_in_frame_arg, &threads_arg, +#if OUTPUT_RC_STATS + &output_rc_stats_arg, +#endif + #if CONFIG_VP9_HIGHBITDEPTH &bitdepth_arg, #endif + &speed_arg, &rc_end_usage_arg, NULL }; @@ -102,6 +122,10 @@ static const uint32_t default_bitrate = 1000; static const uint32_t default_spatial_layers = 5; static const uint32_t default_temporal_layers = 1; static const uint32_t default_kf_dist = 100; +static const uint32_t default_temporal_layering_mode = 0; +static const uint32_t default_output_rc_stats = 0; +static const int32_t default_speed = -1; // -1 means use library default. +static const uint32_t default_threads = 0; // zero means use library default. typedef struct { const char *input_filename; @@ -143,6 +167,12 @@ static void parse_command_line(int argc, const char **argv_, svc_ctx->log_level = SVC_LOG_DEBUG; svc_ctx->spatial_layers = default_spatial_layers; svc_ctx->temporal_layers = default_temporal_layers; + svc_ctx->temporal_layering_mode = default_temporal_layering_mode; +#if OUTPUT_RC_STATS + svc_ctx->output_rc_stat = default_output_rc_stats; +#endif + svc_ctx->speed = default_speed; + svc_ctx->threads = default_threads; // start with default encoder configuration res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0); @@ -184,6 +214,20 @@ static void parse_command_line(int argc, const char **argv_, svc_ctx->spatial_layers = arg_parse_uint(&arg); } else if (arg_match(&arg, &temporal_layers_arg, argi)) { svc_ctx->temporal_layers = arg_parse_uint(&arg); +#if OUTPUT_RC_STATS + } else if (arg_match(&arg, &output_rc_stats_arg, argi)) { + svc_ctx->output_rc_stat = arg_parse_uint(&arg); +#endif + } else if (arg_match(&arg, &speed_arg, argi)) { + svc_ctx->speed = arg_parse_uint(&arg); + } else if (arg_match(&arg, &threads_arg, argi)) { + svc_ctx->threads = arg_parse_uint(&arg); + } else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) { + svc_ctx->temporal_layering_mode = + enc_cfg->temporal_layering_mode = arg_parse_int(&arg); + if (svc_ctx->temporal_layering_mode) { + enc_cfg->g_error_resilient = 1; + } } else if (arg_match(&arg, &kf_dist_arg, argi)) { enc_cfg->kf_min_dist = arg_parse_uint(&arg); enc_cfg->kf_max_dist = enc_cfg->kf_min_dist; @@ -316,6 +360,185 @@ static void parse_command_line(int argc, const char **argv_, enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist); } +#if OUTPUT_RC_STATS +// For rate control encoding stats. +struct RateControlStats { + // Number of input frames per layer. + int layer_input_frames[VPX_MAX_LAYERS]; + // Total (cumulative) number of encoded frames per layer. + int layer_tot_enc_frames[VPX_MAX_LAYERS]; + // Number of encoded non-key frames per layer. + int layer_enc_frames[VPX_MAX_LAYERS]; + // Framerate per layer (cumulative). + double layer_framerate[VPX_MAX_LAYERS]; + // Target average frame size per layer (per-frame-bandwidth per layer). + double layer_pfb[VPX_MAX_LAYERS]; + // Actual average frame size per layer. + double layer_avg_frame_size[VPX_MAX_LAYERS]; + // Average rate mismatch per layer (|target - actual| / target). + double layer_avg_rate_mismatch[VPX_MAX_LAYERS]; + // Actual encoding bitrate per layer (cumulative). + double layer_encoding_bitrate[VPX_MAX_LAYERS]; + // Average of the short-time encoder actual bitrate. + // TODO(marpan): Should we add these short-time stats for each layer? + double avg_st_encoding_bitrate; + // Variance of the short-time encoder actual bitrate. + double variance_st_encoding_bitrate; + // Window (number of frames) for computing short-time encoding bitrate. + int window_size; + // Number of window measurements. + int window_count; +}; + +// Note: these rate control stats assume only 1 key frame in the +// sequence (i.e., first frame only). +static void set_rate_control_stats(struct RateControlStats *rc, + vpx_codec_enc_cfg_t *cfg) { + unsigned int sl, tl; + // Set the layer (cumulative) framerate and the target layer (non-cumulative) + // per-frame-bandwidth, for the rate control encoding stats below. + const double framerate = cfg->g_timebase.den / cfg->g_timebase.num; + + for (sl = 0; sl < cfg->ss_number_layers; ++sl) { + for (tl = 0; tl < cfg->ts_number_layers; ++tl) { + const int layer = sl * cfg->ts_number_layers + tl; + const int tlayer0 = sl * cfg->ts_number_layers; + rc->layer_framerate[layer] = + framerate / cfg->ts_rate_decimator[tl]; + if (tl > 0) { + rc->layer_pfb[layer] = 1000.0 * + (cfg->layer_target_bitrate[layer] - + cfg->layer_target_bitrate[layer - 1]) / + (rc->layer_framerate[layer] - + rc->layer_framerate[layer - 1]); + } else { + rc->layer_pfb[tlayer0] = 1000.0 * + cfg->layer_target_bitrate[tlayer0] / + rc->layer_framerate[tlayer0]; + } + rc->layer_input_frames[layer] = 0; + rc->layer_enc_frames[layer] = 0; + rc->layer_tot_enc_frames[layer] = 0; + rc->layer_encoding_bitrate[layer] = 0.0; + rc->layer_avg_frame_size[layer] = 0.0; + rc->layer_avg_rate_mismatch[layer] = 0.0; + } + } + rc->window_count = 0; + rc->window_size = 15; + rc->avg_st_encoding_bitrate = 0.0; + rc->variance_st_encoding_bitrate = 0.0; +} + +static void printout_rate_control_summary(struct RateControlStats *rc, + vpx_codec_enc_cfg_t *cfg, + int frame_cnt) { + unsigned int sl, tl; + int tot_num_frames = 0; + double perc_fluctuation = 0.0; + printf("Total number of processed frames: %d\n\n", frame_cnt - 1); + printf("Rate control layer stats for sl%d tl%d layer(s):\n\n", + cfg->ss_number_layers, cfg->ts_number_layers); + for (sl = 0; sl < cfg->ss_number_layers; ++sl) { + for (tl = 0; tl < cfg->ts_number_layers; ++tl) { + const int layer = sl * cfg->ts_number_layers + tl; + const int num_dropped = (tl > 0) ? + (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer]) : + (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] - 1); + if (!sl) + tot_num_frames += rc->layer_input_frames[layer]; + rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] * + rc->layer_encoding_bitrate[layer] / tot_num_frames; + rc->layer_avg_frame_size[layer] = rc->layer_avg_frame_size[layer] / + rc->layer_enc_frames[layer]; + rc->layer_avg_rate_mismatch[layer] = + 100.0 * rc->layer_avg_rate_mismatch[layer] / + rc->layer_enc_frames[layer]; + printf("For layer#: sl%d tl%d \n", sl, tl); + printf("Bitrate (target vs actual): %d %f.0 kbps\n", + cfg->layer_target_bitrate[layer], + rc->layer_encoding_bitrate[layer]); + printf("Average frame size (target vs actual): %f %f bits\n", + rc->layer_pfb[layer], rc->layer_avg_frame_size[layer]); + printf("Average rate_mismatch: %f\n", + rc->layer_avg_rate_mismatch[layer]); + printf("Number of input frames, encoded (non-key) frames, " + "and percent dropped frames: %d %d %f.0 \n", + rc->layer_input_frames[layer], rc->layer_enc_frames[layer], + 100.0 * num_dropped / rc->layer_input_frames[layer]); + printf("\n"); + } + } + rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count; + rc->variance_st_encoding_bitrate = + rc->variance_st_encoding_bitrate / rc->window_count - + (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate); + perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) / + rc->avg_st_encoding_bitrate; + printf("Short-time stats, for window of %d frames: \n", rc->window_size); + printf("Average, rms-variance, and percent-fluct: %f %f %f \n", + rc->avg_st_encoding_bitrate, + sqrt(rc->variance_st_encoding_bitrate), + perc_fluctuation); + if (frame_cnt != tot_num_frames) + die("Error: Number of input frames not equal to output encoded frames != " + "%d tot_num_frames = %d\n", frame_cnt, tot_num_frames); +} + +vpx_codec_err_t parse_superframe_index(const uint8_t *data, + size_t data_sz, + uint32_t sizes[8], int *count) { + // A chunk ending with a byte matching 0xc0 is an invalid chunk unless + // it is a super frame index. If the last byte of real video compression + // data is 0xc0 the encoder must add a 0 byte. If we have the marker but + // not the associated matching marker byte at the front of the index we have + // an invalid bitstream and need to return an error. + + uint8_t marker; + + marker = *(data + data_sz - 1); + *count = 0; + + + if ((marker & 0xe0) == 0xc0) { + const uint32_t frames = (marker & 0x7) + 1; + const uint32_t mag = ((marker >> 3) & 0x3) + 1; + const size_t index_sz = 2 + mag * frames; + + // This chunk is marked as having a superframe index but doesn't have + // enough data for it, thus it's an invalid superframe index. + if (data_sz < index_sz) + return VPX_CODEC_CORRUPT_FRAME; + + { + const uint8_t marker2 = *(data + data_sz - index_sz); + + // This chunk is marked as having a superframe index but doesn't have + // the matching marker byte at the front of the index therefore it's an + // invalid chunk. + if (marker != marker2) + return VPX_CODEC_CORRUPT_FRAME; + } + + { + // Found a valid superframe index. + uint32_t i, j; + const uint8_t *x = &data[data_sz - index_sz + 1]; + + for (i = 0; i < frames; ++i) { + uint32_t this_sz = 0; + + for (j = 0; j < mag; ++j) + this_sz |= (*x++) << (j * 8); + sizes[i] = this_sz; + } + *count = frames; + } + } + return VPX_CODEC_OK; +} +#endif + int main(int argc, const char **argv) { AppInput app_input = {0}; VpxVideoWriter *writer = NULL; @@ -332,7 +555,15 @@ int main(int argc, const char **argv) { FILE *infile = NULL; int end_of_stream = 0; int frames_received = 0; - +#if OUTPUT_RC_STATS + VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = {NULL}; + struct RateControlStats rc; + vpx_svc_layer_id_t layer_id; + int sl, tl; + double sum_bitrate = 0.0; + double sum_bitrate2 = 0.0; + double framerate = 30.0; +#endif memset(&svc_ctx, 0, sizeof(svc_ctx)); svc_ctx.log_print = 1; exec_name = argv[0]; @@ -359,6 +590,13 @@ int main(int argc, const char **argv) { VPX_CODEC_OK) die("Failed to initialize encoder\n"); +#if OUTPUT_RC_STATS + if (svc_ctx.output_rc_stat) { + set_rate_control_stats(&rc, &enc_cfg); + framerate = enc_cfg.g_timebase.den / enc_cfg.g_timebase.num; + } +#endif + info.codec_fourcc = VP9_FOURCC; info.time_base.numerator = enc_cfg.g_timebase.num; info.time_base.denominator = enc_cfg.g_timebase.den; @@ -370,11 +608,30 @@ int main(int argc, const char **argv) { if (!writer) die("Failed to open %s for writing\n", app_input.output_filename); } +#if OUTPUT_RC_STATS + // For now, just write temporal layer streams. + // TODO(wonkap): do spatial by re-writing superframe. + if (svc_ctx.output_rc_stat) { + for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) { + char file_name[PATH_MAX]; + + snprintf(file_name, sizeof(file_name), "%s_t%d.ivf", + app_input.output_filename, tl); + outfile[tl] = vpx_video_writer_open(file_name, kContainerIVF, &info); + if (!outfile[tl]) + die("Failed to open %s for writing", file_name); + } + } +#endif // skip initial frames for (i = 0; i < app_input.frames_to_skip; ++i) vpx_img_read(&raw, infile); + if (svc_ctx.speed != -1) + vpx_codec_control(&codec, VP8E_SET_CPUUSED, svc_ctx.speed); + vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, 0); + // Encode frames while (!end_of_stream) { vpx_codec_iter_t iter = NULL; @@ -386,7 +643,9 @@ int main(int argc, const char **argv) { } res = vpx_svc_encode(&svc_ctx, &codec, (end_of_stream ? NULL : &raw), - pts, frame_duration, VPX_DL_GOOD_QUALITY); + pts, frame_duration, svc_ctx.speed >= 5 ? + VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY); + printf("%s", vpx_svc_get_message(&svc_ctx)); if (res != VPX_CODEC_OK) { die_codec(&codec, "Failed to encode frame"); @@ -395,11 +654,90 @@ int main(int argc, const char **argv) { while ((cx_pkt = vpx_codec_get_cx_data(&codec, &iter)) != NULL) { switch (cx_pkt->kind) { case VPX_CODEC_CX_FRAME_PKT: { - if (cx_pkt->data.frame.sz > 0) + if (cx_pkt->data.frame.sz > 0) { +#if OUTPUT_RC_STATS + uint32_t sizes[8]; + int count = 0; +#endif vpx_video_writer_write_frame(writer, cx_pkt->data.frame.buf, cx_pkt->data.frame.sz, cx_pkt->data.frame.pts); +#if OUTPUT_RC_STATS + // TODO(marpan/wonkap): Put this (to line728) in separate function. + if (svc_ctx.output_rc_stat) { + vpx_codec_control(&codec, VP9E_GET_SVC_LAYER_ID, &layer_id); + parse_superframe_index(cx_pkt->data.frame.buf, + cx_pkt->data.frame.sz, sizes, &count); + for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { + ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers + + layer_id.temporal_layer_id]; + } + for (tl = layer_id.temporal_layer_id; + tl < enc_cfg.ts_number_layers; ++tl) { + vpx_video_writer_write_frame(outfile[tl], + cx_pkt->data.frame.buf, + cx_pkt->data.frame.sz, + cx_pkt->data.frame.pts); + } + + for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { + for (tl = layer_id.temporal_layer_id; + tl < enc_cfg.ts_number_layers; ++tl) { + const int layer = sl * enc_cfg.ts_number_layers + tl; + ++rc.layer_tot_enc_frames[layer]; + rc.layer_encoding_bitrate[layer] += 8.0 * sizes[sl]; + // Keep count of rate control stats per layer, for non-key + // frames. + if (tl == layer_id.temporal_layer_id && + !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) { + rc.layer_avg_frame_size[layer] += 8.0 * sizes[sl]; + rc.layer_avg_rate_mismatch[layer] += + fabs(8.0 * sizes[sl] - rc.layer_pfb[layer]) / + rc.layer_pfb[layer]; + ++rc.layer_enc_frames[layer]; + } + } + } + + // Update for short-time encoding bitrate states, for moving + // window of size rc->window, shifted by rc->window / 2. + // Ignore first window segment, due to key frame. + if (frame_cnt > rc.window_size) { + tl = layer_id.temporal_layer_id; + for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { + sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate; + } + if (frame_cnt % rc.window_size == 0) { + rc.window_count += 1; + rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size; + rc.variance_st_encoding_bitrate += + (sum_bitrate / rc.window_size) * + (sum_bitrate / rc.window_size); + sum_bitrate = 0.0; + } + } + + // Second shifted window. + if (frame_cnt > rc.window_size + rc.window_size / 2) { + tl = layer_id.temporal_layer_id; + for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { + sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate; + } + + if (frame_cnt > 2 * rc.window_size && + frame_cnt % rc.window_size == 0) { + rc.window_count += 1; + rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size; + rc.variance_st_encoding_bitrate += + (sum_bitrate2 / rc.window_size) * + (sum_bitrate2 / rc.window_size); + sum_bitrate2 = 0.0; + } + } + } +#endif + } printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received, !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY), @@ -424,25 +762,30 @@ int main(int argc, const char **argv) { pts += frame_duration; } } - printf("Processed %d frames\n", frame_cnt); - fclose(infile); +#if OUTPUT_RC_STATS + if (svc_ctx.output_rc_stat) { + printout_rate_control_summary(&rc, &enc_cfg, frame_cnt); + printf("\n"); + } +#endif if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); - if (app_input.passes == 2) stats_close(&app_input.rc_stats, 1); - if (writer) { vpx_video_writer_close(writer); } - +#if OUTPUT_RC_STATS + if (svc_ctx.output_rc_stat) { + for (tl = 0; tl < enc_cfg.ts_number_layers; ++tl) { + vpx_video_writer_close(outfile[tl]); + } + } +#endif vpx_img_free(&raw); - // display average size, psnr printf("%s", vpx_svc_dump_statistics(&svc_ctx)); - vpx_svc_release(&svc_ctx); - return EXIT_SUCCESS; } diff --git a/examples/vpx_temporal_svc_encoder.c b/examples/vpx_temporal_svc_encoder.c index 964954e91..f39fd1e3f 100644 --- a/examples/vpx_temporal_svc_encoder.c +++ b/examples/vpx_temporal_svc_encoder.c @@ -85,13 +85,13 @@ static void set_rate_control_metrics(struct RateControlMetrics *rc, // per-frame-bandwidth, for the rate control encoding stats below. const double framerate = cfg->g_timebase.den / cfg->g_timebase.num; rc->layer_framerate[0] = framerate / cfg->ts_rate_decimator[0]; - rc->layer_pfb[0] = 1000.0 * cfg->ts_target_bitrate[0] / + rc->layer_pfb[0] = 1000.0 * cfg->layer_target_bitrate[0] / rc->layer_framerate[0]; for (i = 0; i < cfg->ts_number_layers; ++i) { if (i > 0) { rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i]; rc->layer_pfb[i] = 1000.0 * - (cfg->ts_target_bitrate[i] - cfg->ts_target_bitrate[i - 1]) / + (cfg->layer_target_bitrate[i] - cfg->layer_target_bitrate[i - 1]) / (rc->layer_framerate[i] - rc->layer_framerate[i - 1]); } rc->layer_input_frames[i] = 0; @@ -128,7 +128,7 @@ static void printout_rate_control_summary(struct RateControlMetrics *rc, rc->layer_avg_rate_mismatch[i] = 100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[i]; printf("For layer#: %d \n", i); - printf("Bitrate (target vs actual): %d %f \n", cfg->ts_target_bitrate[i], + printf("Bitrate (target vs actual): %d %f \n", cfg->layer_target_bitrate[i], rc->layer_encoding_bitrate[i]); printf("Average frame size (target vs actual): %f %f \n", rc->layer_pfb[i], rc->layer_avg_frame_size[i]); @@ -597,7 +597,7 @@ int main(int argc, char **argv) { for (i = min_args_base; (int)i < min_args_base + mode_to_num_layers[layering_mode]; ++i) { - cfg.ts_target_bitrate[i - 11] = strtol(argv[i], NULL, 0); + cfg.layer_target_bitrate[i - 11] = strtol(argv[i], NULL, 0); } // Real time parameters. @@ -625,6 +625,8 @@ int main(int argc, char **argv) { // Disable automatic keyframe placement. cfg.kf_min_dist = cfg.kf_max_dist = 3000; + cfg.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; + set_temporal_layer_pattern(layering_mode, &cfg, layer_flags, @@ -633,8 +635,8 @@ int main(int argc, char **argv) { set_rate_control_metrics(&rc, &cfg); // Target bandwidth for the whole stream. - // Set to ts_target_bitrate for highest layer (total bitrate). - cfg.rc_target_bitrate = cfg.ts_target_bitrate[cfg.ts_number_layers - 1]; + // Set to layer_target_bitrate for highest layer (total bitrate). + cfg.rc_target_bitrate = cfg.layer_target_bitrate[cfg.ts_number_layers - 1]; // Open input file. if (!(infile = fopen(argv[1], "rb"))) { @@ -677,6 +679,9 @@ int main(int argc, char **argv) { vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kDenoiserOff); vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 0); } else if (strncmp(encoder->name, "vp9", 3) == 0) { +#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION) + vpx_svc_extra_cfg_t svc_params; +#endif vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed); vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3); vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0); @@ -685,6 +690,15 @@ int main(int argc, char **argv) { vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, (cfg.g_threads >> 1)); if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1: 0)) { die_codec(&codec, "Failed to set SVC"); +#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION) + for (i = 0; i < cfg.ts_number_layers; ++i) { + svc_params.max_quantizers[i] = cfg.rc_max_quantizer; + svc_params.min_quantizers[i] = cfg.rc_min_quantizer; + } + svc_params.scaling_factor_num[0] = cfg.g_h; + svc_params.scaling_factor_den[0] = cfg.g_h; + vpx_codec_control(&codec, VP9E_SET_SVC_PARAMETERS, &svc_params); +#endif } } if (strncmp(encoder->name, "vp8", 3) == 0) { diff --git a/test/datarate_test.cc b/test/datarate_test.cc index 78b5fa406..c182a2427 100644 --- a/test/datarate_test.cc +++ b/test/datarate_test.cc @@ -14,6 +14,7 @@ #include "test/i420_video_source.h" #include "test/util.h" #include "test/y4m_video_source.h" +#include "vpx/vpx_codec.h" namespace { @@ -565,6 +566,8 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting2TemporalLayers) { cfg_.ts_rate_decimator[0] = 2; cfg_.ts_rate_decimator[1] = 1; + cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; + if (deadline_ == VPX_DL_REALTIME) cfg_.g_error_resilient = 1; @@ -574,14 +577,14 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting2TemporalLayers) { cfg_.rc_target_bitrate = i; ResetModel(); // 60-40 bitrate allocation for 2 temporal layers. - cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100; - cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate; + cfg_.layer_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100; + cfg_.layer_target_bitrate[1] = cfg_.rc_target_bitrate; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); for (int j = 0; j < static_cast(cfg_.ts_number_layers); ++j) { - ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.85) + ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85) << " The datarate for the file is lower than target by too much, " "for layer: " << j; - ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.15) + ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15) << " The datarate for the file is greater than target by too much, " "for layer: " << j; } @@ -606,25 +609,27 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayers) { cfg_.ts_rate_decimator[1] = 2; cfg_.ts_rate_decimator[2] = 1; + cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 200); for (int i = 200; i <= 800; i += 200) { cfg_.rc_target_bitrate = i; ResetModel(); // 40-20-40 bitrate allocation for 3 temporal layers. - cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100; - cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100; - cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate; + cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100; + cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100; + cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); for (int j = 0; j < static_cast(cfg_.ts_number_layers); ++j) { // TODO(yaowu): Work out more stable rc control strategy and // Adjust the thresholds to be tighter than .75. - ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.75) + ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.75) << " The datarate for the file is lower than target by too much, " "for layer: " << j; // TODO(yaowu): Work out more stable rc control strategy and // Adjust the thresholds to be tighter than 1.25. - ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.25) + ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.25) << " The datarate for the file is greater than target by too much, " "for layer: " << j; } @@ -652,20 +657,22 @@ TEST_P(DatarateTestVP9Large, BasicRateTargeting3TemporalLayersFrameDropping) { cfg_.ts_rate_decimator[1] = 2; cfg_.ts_rate_decimator[2] = 1; + cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 200); cfg_.rc_target_bitrate = 200; ResetModel(); // 40-20-40 bitrate allocation for 3 temporal layers. - cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100; - cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100; - cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate; + cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100; + cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100; + cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); for (int j = 0; j < static_cast(cfg_.ts_number_layers); ++j) { - ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.85) + ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85) << " The datarate for the file is lower than target by too much, " "for layer: " << j; - ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.15) + ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15) << " The datarate for the file is greater than target by too much, " "for layer: " << j; // Expect some frame drops in this test: for this 200 frames test, @@ -737,9 +744,180 @@ TEST_P(DatarateTestVP9Large, DenoiserOffOn) { } #endif // CONFIG_VP9_TEMPORAL_DENOISING +class DatarateOnePassCbrSvc : public ::libvpx_test::EncoderTest, + public ::libvpx_test::CodecTestWith2Params { + public: + DatarateOnePassCbrSvc() : EncoderTest(GET_PARAM(0)) {} + virtual ~DatarateOnePassCbrSvc() {} + protected: + virtual void SetUp() { + InitializeConfig(); + SetMode(GET_PARAM(1)); + speed_setting_ = GET_PARAM(2); + ResetModel(); + } + virtual void ResetModel() { + last_pts_ = 0; + bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz; + frame_number_ = 0; + first_drop_ = 0; + bits_total_ = 0; + duration_ = 0.0; + } + virtual void BeginPassHook(unsigned int /*pass*/) { + } + virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, + ::libvpx_test::Encoder *encoder) { + if (video->frame() == 0) { + int i; + for (i = 0; i < 2; ++i) { + svc_params_.max_quantizers[i] = 63; + svc_params_.min_quantizers[i] = 0; + } + svc_params_.scaling_factor_num[0] = 144; + svc_params_.scaling_factor_den[0] = 288; + svc_params_.scaling_factor_num[1] = 288; + svc_params_.scaling_factor_den[1] = 288; + encoder->Control(VP9E_SET_SVC, 1); +#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION) + encoder->Control(VP9E_SET_SVC_PARAMETERS, &svc_params_); +#endif + encoder->Control(VP8E_SET_CPUUSED, speed_setting_); + encoder->Control(VP9E_SET_TILE_COLUMNS, 0); + encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 300); + } + const vpx_rational_t tb = video->timebase(); + timebase_ = static_cast(tb.num) / tb.den; + duration_ = 0; + } + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { + vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_; + if (last_pts_ == 0) + duration = 1; + bits_in_buffer_model_ += static_cast( + duration * timebase_ * cfg_.rc_target_bitrate * 1000); + const bool key_frame = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) + ? true: false; + if (!key_frame) { + ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame " + << pkt->data.frame.pts; + } + const size_t frame_size_in_bits = pkt->data.frame.sz * 8; + bits_in_buffer_model_ -= frame_size_in_bits; + bits_total_ += frame_size_in_bits; + if (!first_drop_ && duration > 1) + first_drop_ = last_pts_ + 1; + last_pts_ = pkt->data.frame.pts; + bits_in_last_frame_ = frame_size_in_bits; + ++frame_number_; + } + virtual void EndPassHook(void) { + if (bits_total_) { + const double file_size_in_kb = bits_total_ / 1000.; // bits per kilobit + duration_ = (last_pts_ + 1) * timebase_; + effective_datarate_ = (bits_total_ - bits_in_last_frame_) / 1000.0 + / (cfg_.rc_buf_initial_sz / 1000.0 + duration_); + file_datarate_ = file_size_in_kb / duration_; + } + } + vpx_codec_pts_t last_pts_; + int64_t bits_in_buffer_model_; + double timebase_; + int frame_number_; + vpx_codec_pts_t first_drop_; + int64_t bits_total_; + double duration_; + double file_datarate_; + double effective_datarate_; + size_t bits_in_last_frame_; + vpx_svc_extra_cfg_t svc_params_; + int speed_setting_; +}; +static void assign_layer_bitrates(vpx_codec_enc_cfg_t *const enc_cfg, + const vpx_svc_extra_cfg_t *svc_params, + int spatial_layers, + int temporal_layers, + int temporal_layering_mode, + unsigned int total_rate) { + int sl, spatial_layer_target; + float total = 0; + float alloc_ratio[VPX_MAX_LAYERS] = {0}; + for (sl = 0; sl < spatial_layers; ++sl) { + if (svc_params->scaling_factor_den[sl] > 0) { + alloc_ratio[sl] = (float)(svc_params->scaling_factor_num[sl] * + 1.0 / svc_params->scaling_factor_den[sl]); + total += alloc_ratio[sl]; + } + } + for (sl = 0; sl < spatial_layers; ++sl) { + enc_cfg->ss_target_bitrate[sl] = spatial_layer_target = + (unsigned int)(enc_cfg->rc_target_bitrate * + alloc_ratio[sl] / total); + const int index = sl * temporal_layers; + if (temporal_layering_mode == 3) { + enc_cfg->layer_target_bitrate[index] = + spatial_layer_target >> 1; + enc_cfg->layer_target_bitrate[index + 1] = + (spatial_layer_target >> 1) + (spatial_layer_target >> 2); + enc_cfg->layer_target_bitrate[index + 2] = + spatial_layer_target; + } else if (temporal_layering_mode == 2) { + enc_cfg->layer_target_bitrate[index] = + spatial_layer_target * 2 / 3; + enc_cfg->layer_target_bitrate[index + 1] = + spatial_layer_target; + } + } +} +#if VPX_ENCODER_ABI_VERSION > (4 + VPX_CODEC_ABI_VERSION) +// Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and +// 3 temporal layers. +TEST_P(DatarateOnePassCbrSvc, OnePassCbrSvc) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + cfg_.ss_number_layers = 2; + cfg_.ts_number_layers = 3; + cfg_.ts_rate_decimator[0] = 4; + cfg_.ts_rate_decimator[1] = 2; + cfg_.ts_rate_decimator[2] = 1; + cfg_.g_error_resilient = 1; + cfg_.temporal_layering_mode = 3; + svc_params_.scaling_factor_num[0] = 144; + svc_params_.scaling_factor_den[0] = 288; + svc_params_.scaling_factor_num[1] = 288; + svc_params_.scaling_factor_den[1] = 288; + // TODO(wonkap/marpan): No frame drop for now, we need to implement correct + // frame dropping for SVC. + cfg_.rc_dropframe_thresh = 0; + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 200); + // TODO(wonkap/marpan): Check that effective_datarate for each layer hits the + // layer target_bitrate. Also check if test can pass at lower bitrate (~200k). + for (int i = 400; i <= 800; i += 200) { + cfg_.rc_target_bitrate = i; + ResetModel(); + assign_layer_bitrates(&cfg_, &svc_params_, cfg_.ss_number_layers, + cfg_.ts_number_layers, cfg_.temporal_layering_mode, + cfg_.rc_target_bitrate); + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.85) + << " The datarate for the file exceeds the target by too much!"; + ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.15) + << " The datarate for the file is lower than the target by too much!"; + } +} +#endif VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES); VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9Large, ::testing::Values(::libvpx_test::kOnePassGood, ::libvpx_test::kRealTime), ::testing::Range(2, 7)); +VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvc, + ::testing::Values(::libvpx_test::kRealTime), + ::testing::Range(5, 8)); } // namespace diff --git a/test/encode_test_driver.h b/test/encode_test_driver.h index e16cf9cdd..7a068bf0b 100644 --- a/test/encode_test_driver.h +++ b/test/encode_test_driver.h @@ -133,6 +133,10 @@ class Encoder { ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } + void Control(int ctrl_id, struct vpx_svc_parameters *arg) { + const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); + ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); + } #if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER void Control(int ctrl_id, vpx_active_map_t *arg) { const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); diff --git a/test/svc_test.cc b/test/svc_test.cc index 0af3005e5..b955cee65 100644 --- a/test/svc_test.cc +++ b/test/svc_test.cc @@ -453,6 +453,7 @@ TEST_F(SvcTest, OnePassEncodeOneFrame) { TEST_F(SvcTest, OnePassEncodeThreeFrames) { codec_enc_.g_pass = VPX_RC_ONE_PASS; + codec_enc_.g_lag_in_frames = 0; vpx_fixed_buf outputs[3]; memset(&outputs[0], 0, sizeof(outputs)); Pass2EncodeNFrames(NULL, 3, 2, &outputs[0]); diff --git a/vp9/encoder/vp9_encodeframe.c b/vp9/encoder/vp9_encodeframe.c index a8adca9ec..4355ec60e 100644 --- a/vp9/encoder/vp9_encodeframe.c +++ b/vp9/encoder/vp9_encodeframe.c @@ -687,17 +687,28 @@ static int choose_partitioning(VP9_COMP *cpi, s = x->plane[0].src.buf; sp = x->plane[0].src.stride; - if (!is_key_frame) { + if (!is_key_frame && !(is_one_pass_cbr_svc(cpi) && + cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)) { + // In the case of spatial/temporal scalable coding, the assumption here is + // that the temporal reference frame will always be of type LAST_FRAME. + // TODO(marpan): If that assumption is broken, we need to revisit this code. MB_MODE_INFO *mbmi = &xd->mi[0]->mbmi; unsigned int uv_sad; const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); - const YV12_BUFFER_CONFIG *yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); + const YV12_BUFFER_CONFIG *yv12_g = NULL; unsigned int y_sad, y_sad_g; const BLOCK_SIZE bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows); assert(yv12 != NULL); + + if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id)) { + // For now, GOLDEN will not be used for non-zero spatial layers, since + // it may not be a temporal reference. + yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); + } + if (yv12_g && yv12_g != yv12) { vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, &cm->frame_refs[GOLDEN_FRAME - 1].sf); diff --git a/vp9/encoder/vp9_encoder.c b/vp9/encoder/vp9_encoder.c index 3c0f52e14..95f9bc488 100644 --- a/vp9/encoder/vp9_encoder.c +++ b/vp9/encoder/vp9_encoder.c @@ -756,6 +756,8 @@ static void init_config(struct VP9_COMP *cpi, VP9EncoderConfig *oxcf) { cm->height = oxcf->height; vp9_alloc_compressor_data(cpi); + cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode; + // Single thread case: use counts in common. cpi->td.counts = &cm->counts; @@ -2265,8 +2267,9 @@ static void generate_psnr_packet(VP9_COMP *cpi) { pkt.data.psnr.psnr[i] = psnr.psnr[i]; } pkt.kind = VPX_CODEC_PSNR_PKT; - if (is_two_pass_svc(cpi)) - cpi->svc.layer_context[cpi->svc.spatial_layer_id].psnr_pkt = pkt.data.psnr; + if (cpi->use_svc) + cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers].psnr_pkt = pkt.data.psnr; else vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt); } @@ -3667,9 +3670,11 @@ static void encode_frame_to_data_rate(VP9_COMP *cpi, } cm->prev_frame = cm->cur_frame; - if (is_two_pass_svc(cpi)) - cpi->svc.layer_context[cpi->svc.spatial_layer_id].last_frame_type = - cm->frame_type; + if (cpi->use_svc) + cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers + + cpi->svc.temporal_layer_id].last_frame_type = + cm->frame_type; } static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest, @@ -3930,6 +3935,8 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, #endif if (oxcf->pass == 2) vp9_restore_layer_context(cpi); + } else if (is_one_pass_cbr_svc(cpi)) { + vp9_one_pass_cbr_svc_start_layer(cpi); } vpx_usec_timer_start(&cmptimer); @@ -3948,9 +3955,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, // Normal defaults cm->reset_frame_context = 0; cm->refresh_frame_context = 1; - cpi->refresh_last_frame = 1; - cpi->refresh_golden_frame = 0; - cpi->refresh_alt_ref_frame = 0; + if (!is_one_pass_cbr_svc(cpi)) { + cpi->refresh_last_frame = 1; + cpi->refresh_golden_frame = 0; + cpi->refresh_alt_ref_frame = 0; + } // Should we encode an arf frame. arf_src_index = get_arf_src_index(cpi); @@ -4006,12 +4015,11 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } // Read in the source frame. -#if CONFIG_SPATIAL_SVC - if (is_two_pass_svc(cpi)) + if (cpi->use_svc) source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush); else -#endif source = vp9_lookahead_pop(cpi->lookahead, flush); + if (source != NULL) { cm->show_frame = 1; cm->intra_only = 0; @@ -4060,8 +4068,7 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, adjust_frame_rate(cpi, source); } - if (cpi->svc.number_temporal_layers > 1 && - oxcf->rc_mode == VPX_CBR) { + if (is_one_pass_cbr_svc(cpi)) { vp9_update_temporal_layer_framerate(cpi); vp9_restore_layer_context(cpi); } @@ -4143,11 +4150,10 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, } // Save layer specific state. - if ((cpi->svc.number_temporal_layers > 1 && - oxcf->rc_mode == VPX_CBR) || - ((cpi->svc.number_temporal_layers > 1 || - cpi->svc.number_spatial_layers > 1) && - oxcf->pass == 2)) { + if (is_one_pass_cbr_svc(cpi) || + ((cpi->svc.number_temporal_layers > 1 || + cpi->svc.number_spatial_layers > 1) && + oxcf->pass == 2)) { vp9_save_layer_context(cpi); } @@ -4343,6 +4349,12 @@ int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, // May need the empty frame after an visible frame. cpi->svc.encode_empty_frame_state = NEED_TO_ENCODE; } + } else if (is_one_pass_cbr_svc(cpi)) { + if (cm->show_frame) { + ++cpi->svc.spatial_layer_to_encode; + if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers) + cpi->svc.spatial_layer_to_encode = 0; + } } return 0; } diff --git a/vp9/encoder/vp9_encoder.h b/vp9/encoder/vp9_encoder.h index 41f1c13d4..6ce4a67cd 100644 --- a/vp9/encoder/vp9_encoder.h +++ b/vp9/encoder/vp9_encoder.h @@ -194,10 +194,10 @@ typedef struct VP9EncoderConfig { int ss_number_layers; // Number of spatial layers. int ts_number_layers; // Number of temporal layers. // Bitrate allocation for spatial layers. + int layer_target_bitrate[VPX_MAX_LAYERS]; int ss_target_bitrate[VPX_SS_MAX_LAYERS]; int ss_enable_auto_arf[VPX_SS_MAX_LAYERS]; // Bitrate allocation (CBR mode) and framerate factor, for temporal layers. - int ts_target_bitrate[VPX_TS_MAX_LAYERS]; int ts_rate_decimator[VPX_TS_MAX_LAYERS]; int enable_auto_arf; @@ -237,6 +237,7 @@ typedef struct VP9EncoderConfig { int use_highbitdepth; #endif vpx_color_space_t color_space; + VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode; } VP9EncoderConfig; static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) { @@ -611,9 +612,11 @@ YV12_BUFFER_CONFIG *vp9_scale_if_required(VP9_COMMON *cm, void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags); static INLINE int is_two_pass_svc(const struct VP9_COMP *const cpi) { - return cpi->use_svc && - ((cpi->svc.number_spatial_layers > 1) || - (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.pass != 0)); + return cpi->use_svc && cpi->oxcf.pass != 0; +} + +static INLINE int is_one_pass_cbr_svc(const struct VP9_COMP *const cpi) { + return (cpi->use_svc && cpi->oxcf.pass == 0); } static INLINE int is_altref_enabled(const VP9_COMP *const cpi) { @@ -642,6 +645,8 @@ static INLINE int *cond_cost_list(const struct VP9_COMP *cpi, int *cost_list) { void vp9_new_framerate(VP9_COMP *cpi, double framerate); +#define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl)) + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index 98095ac32..32682fe74 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -234,13 +234,16 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) { return target; } -// Update the buffer level for higher layers, given the encoded current layer. +// Update the buffer level for higher temporal layers, given the encoded current +// temporal layer. static void update_layer_buffer_level(SVC *svc, int encoded_frame_size) { - int temporal_layer = 0; + int i = 0; int current_temporal_layer = svc->temporal_layer_id; - for (temporal_layer = current_temporal_layer + 1; - temporal_layer < svc->number_temporal_layers; ++temporal_layer) { - LAYER_CONTEXT *lc = &svc->layer_context[temporal_layer]; + for (i = current_temporal_layer + 1; + i < svc->number_temporal_layers; ++i) { + const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i, + svc->number_temporal_layers); + LAYER_CONTEXT *lc = &svc->layer_context[layer]; RATE_CONTROL *lrc = &lc->rc; int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate - encoded_frame_size); @@ -268,7 +271,7 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { rc->bits_off_target = MIN(rc->bits_off_target, rc->maximum_buffer_size); rc->buffer_level = rc->bits_off_target; - if (cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR) { + if (is_one_pass_cbr_svc(cpi)) { update_layer_buffer_level(&cpi->svc, encoded_frame_size); } } @@ -1418,13 +1421,14 @@ static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { } else { target = rc->avg_frame_bandwidth; } - if (svc->number_temporal_layers > 1 && - oxcf->rc_mode == VPX_CBR) { + if (is_one_pass_cbr_svc(cpi)) { // Note that for layers, avg_frame_bandwidth is the cumulative // per-frame-bandwidth. For the target size of this frame, use the // layer average frame size (i.e., non-cumulative per-frame-bw). - int current_temporal_layer = svc->temporal_layer_id; - const LAYER_CONTEXT *lc = &svc->layer_context[current_temporal_layer]; + int layer = + LAYER_IDS_TO_IDX(svc->spatial_layer_id, + svc->temporal_layer_id, svc->number_temporal_layers); + const LAYER_CONTEXT *lc = &svc->layer_context[layer]; target = lc->avg_frame_size; min_frame_target = MAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS); } @@ -1459,7 +1463,9 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { if (svc->number_temporal_layers > 1 && oxcf->rc_mode == VPX_CBR) { // Use the layer framerate for temporal layers CBR mode. - const LAYER_CONTEXT *lc = &svc->layer_context[svc->temporal_layer_id]; + const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, + svc->temporal_layer_id, svc->number_temporal_layers); + const LAYER_CONTEXT *lc = &svc->layer_context[layer]; framerate = lc->framerate; } kf_boost = MAX(kf_boost, (int)(2 * framerate - 16)); @@ -1472,10 +1478,27 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { return vp9_rc_clamp_iframe_target_size(cpi, target); } +// Reset information needed to set proper reference frames and buffer updates +// for temporal layering. This is called when a key frame is encoded. +static void reset_temporal_layer_to_zero(VP9_COMP *cpi) { + int sl; + LAYER_CONTEXT *lc = NULL; + cpi->svc.temporal_layer_id = 0; + + for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) { + lc = &cpi->svc.layer_context[sl * cpi->svc.number_temporal_layers]; + lc->current_video_frame_in_layer = 0; + lc->frames_from_key_frame = 0; + } +} + void vp9_rc_get_svc_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; int target = rc->avg_frame_bandwidth; + const int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, + cpi->svc.temporal_layer_id, cpi->svc.number_temporal_layers); + if ((cm->current_video_frame == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY) || (cpi->oxcf.auto_key && (rc->frames_since_key % @@ -1484,30 +1507,39 @@ void vp9_rc_get_svc_params(VP9_COMP *cpi) { rc->source_alt_ref_active = 0; if (is_two_pass_svc(cpi)) { - cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame = 1; + cpi->svc.layer_context[layer].is_key_frame = 1; cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); - } - - if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) { + } else if (is_one_pass_cbr_svc(cpi)) { + cpi->svc.layer_context[layer].is_key_frame = 1; + reset_temporal_layer_to_zero(cpi); + cpi->ref_frame_flags &= + (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); + // Assumption here is that LAST_FRAME is being updated for a keyframe. + // Thus no change in update flags. target = calc_iframe_target_size_one_pass_cbr(cpi); } } else { cm->frame_type = INTER_FRAME; - if (is_two_pass_svc(cpi)) { - LAYER_CONTEXT *lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; + LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; if (cpi->svc.spatial_layer_id == 0) { lc->is_key_frame = 0; } else { - lc->is_key_frame = cpi->svc.layer_context[0].is_key_frame; + lc->is_key_frame = + cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame; if (lc->is_key_frame) cpi->ref_frame_flags &= (~VP9_LAST_FLAG); } cpi->ref_frame_flags &= (~VP9_ALT_FLAG); - } - - if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR) { + } else if (is_one_pass_cbr_svc(cpi)) { + LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; + if (cpi->svc.spatial_layer_id == 0) { + lc->is_key_frame = 0; + } else { + lc->is_key_frame = + cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame; + } target = calc_pframe_target_size_one_pass_cbr(cpi); } } diff --git a/vp9/encoder/vp9_svc_layercontext.c b/vp9/encoder/vp9_svc_layercontext.c index 2d2e95d9c..919704819 100644 --- a/vp9/encoder/vp9_svc_layercontext.c +++ b/vp9/encoder/vp9_svc_layercontext.c @@ -21,83 +21,79 @@ void vp9_init_layer_context(VP9_COMP *const cpi) { SVC *const svc = &cpi->svc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; - int layer; - int layer_end; + int sl, tl; int alt_ref_idx = svc->number_spatial_layers; svc->spatial_layer_id = 0; svc->temporal_layer_id = 0; - if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { - layer_end = svc->number_temporal_layers; - } else { - layer_end = svc->number_spatial_layers; - - if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) { - if (vp9_realloc_frame_buffer(&cpi->svc.empty_frame.img, - SMALL_FRAME_WIDTH, SMALL_FRAME_HEIGHT, - cpi->common.subsampling_x, - cpi->common.subsampling_y, + if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) { + if (vp9_realloc_frame_buffer(&cpi->svc.empty_frame.img, + SMALL_FRAME_WIDTH, SMALL_FRAME_HEIGHT, + cpi->common.subsampling_x, + cpi->common.subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cpi->common.use_highbitdepth, #endif VP9_ENC_BORDER_IN_PIXELS, cpi->common.byte_alignment, NULL, NULL, NULL)) - vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, - "Failed to allocate empty frame for multiple frame " - "contexts"); + vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, + "Failed to allocate empty frame for multiple frame " + "contexts"); - memset(cpi->svc.empty_frame.img.buffer_alloc, 0x80, - cpi->svc.empty_frame.img.buffer_alloc_sz); - } + memset(cpi->svc.empty_frame.img.buffer_alloc, 0x80, + cpi->svc.empty_frame.img.buffer_alloc_sz); } - for (layer = 0; layer < layer_end; ++layer) { - LAYER_CONTEXT *const lc = &svc->layer_context[layer]; - RATE_CONTROL *const lrc = &lc->rc; - int i; - lc->current_video_frame_in_layer = 0; - lc->layer_size = 0; - lc->frames_from_key_frame = 0; - lc->last_frame_type = FRAME_TYPES; - lrc->ni_av_qi = oxcf->worst_allowed_q; - lrc->total_actual_bits = 0; - lrc->total_target_vs_actual = 0; - lrc->ni_tot_qi = 0; - lrc->tot_q = 0.0; - lrc->avg_q = 0.0; - lrc->ni_frames = 0; - lrc->decimation_count = 0; - lrc->decimation_factor = 0; + for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { + for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { + int layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers); + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + RATE_CONTROL *const lrc = &lc->rc; + int i; + lc->current_video_frame_in_layer = 0; + lc->layer_size = 0; + lc->frames_from_key_frame = 0; + lc->last_frame_type = FRAME_TYPES; + lrc->ni_av_qi = oxcf->worst_allowed_q; + lrc->total_actual_bits = 0; + lrc->total_target_vs_actual = 0; + lrc->ni_tot_qi = 0; + lrc->tot_q = 0.0; + lrc->avg_q = 0.0; + lrc->ni_frames = 0; + lrc->decimation_count = 0; + lrc->decimation_factor = 0; - for (i = 0; i < RATE_FACTOR_LEVELS; ++i) { - lrc->rate_correction_factors[i] = 1.0; - } + for (i = 0; i < RATE_FACTOR_LEVELS; ++i) { + lrc->rate_correction_factors[i] = 1.0; + } - if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { - lc->target_bandwidth = oxcf->ts_target_bitrate[layer]; - lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q; - lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q; - lrc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q; - } else { - lc->target_bandwidth = oxcf->ss_target_bitrate[layer]; - lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q; - lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q; - lrc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q + - oxcf->best_allowed_q) / 2; - lrc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q + + if (cpi->oxcf.rc_mode == VPX_CBR) { + lc->target_bandwidth = oxcf->layer_target_bitrate[layer]; + lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q; + lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q; + lrc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q; + } else { + lc->target_bandwidth = oxcf->layer_target_bitrate[layer]; + lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q; + lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q; + lrc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q + oxcf->best_allowed_q) / 2; - if (oxcf->ss_enable_auto_arf[layer]) - lc->alt_ref_idx = alt_ref_idx++; - else - lc->alt_ref_idx = INVALID_IDX; - lc->gold_ref_idx = INVALID_IDX; - } + lrc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q + + oxcf->best_allowed_q) / 2; + if (oxcf->ss_enable_auto_arf[sl]) + lc->alt_ref_idx = alt_ref_idx++; + else + lc->alt_ref_idx = INVALID_IDX; + lc->gold_ref_idx = INVALID_IDX; + } - lrc->buffer_level = oxcf->starting_buffer_level_ms * - lc->target_bandwidth / 1000; - lrc->bits_off_target = lrc->buffer_level; + lrc->buffer_level = oxcf->starting_buffer_level_ms * + lc->target_bandwidth / 1000; + lrc->bits_off_target = lrc->buffer_level; + } } // Still have extra buffer for base layer golden frame @@ -112,53 +108,100 @@ void vp9_update_layer_context_change_config(VP9_COMP *const cpi, SVC *const svc = &cpi->svc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; const RATE_CONTROL *const rc = &cpi->rc; - int layer; - int layer_end; + int sl, tl, layer = 0, spatial_layer_target; float bitrate_alloc = 1.0; - if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { - layer_end = svc->number_temporal_layers; + if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) { + for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { + spatial_layer_target = 0; + + for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { + layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers); + svc->layer_context[layer].target_bandwidth = + oxcf->layer_target_bitrate[layer]; + } + + layer = LAYER_IDS_TO_IDX(sl, ((oxcf->ts_number_layers - 1) < 0 ? + 0 : (oxcf->ts_number_layers - 1)), oxcf->ts_number_layers); + spatial_layer_target = + svc->layer_context[layer].target_bandwidth = + oxcf->layer_target_bitrate[layer]; + + for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { + LAYER_CONTEXT *const lc = + &svc->layer_context[sl * oxcf->ts_number_layers + tl]; + RATE_CONTROL *const lrc = &lc->rc; + layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers); + + lc->spatial_layer_target_bandwidth = spatial_layer_target; + bitrate_alloc = (float)lc->target_bandwidth / spatial_layer_target; + lrc->starting_buffer_level = + (int64_t)(rc->starting_buffer_level * bitrate_alloc); + lrc->optimal_buffer_level = + (int64_t)(rc->optimal_buffer_level * bitrate_alloc); + lrc->maximum_buffer_size = + (int64_t)(rc->maximum_buffer_size * bitrate_alloc); + lrc->bits_off_target = + MIN(lrc->bits_off_target, lrc->maximum_buffer_size); + lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size); + lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer]; + lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); + lrc->max_frame_bandwidth = rc->max_frame_bandwidth; + lrc->worst_quality = rc->worst_quality; + lrc->best_quality = rc->best_quality; + } + } } else { - layer_end = svc->number_spatial_layers; - } - - for (layer = 0; layer < layer_end; ++layer) { - LAYER_CONTEXT *const lc = &svc->layer_context[layer]; - RATE_CONTROL *const lrc = &lc->rc; + int layer_end; + float bitrate_alloc = 1.0; if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { - lc->target_bandwidth = oxcf->ts_target_bitrate[layer]; + layer_end = svc->number_temporal_layers; } else { - lc->target_bandwidth = oxcf->ss_target_bitrate[layer]; + layer_end = svc->number_spatial_layers; } - bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth; - // Update buffer-related quantities. - lrc->starting_buffer_level = - (int64_t)(rc->starting_buffer_level * bitrate_alloc); - lrc->optimal_buffer_level = - (int64_t)(rc->optimal_buffer_level * bitrate_alloc); - lrc->maximum_buffer_size = - (int64_t)(rc->maximum_buffer_size * bitrate_alloc); - lrc->bits_off_target = MIN(lrc->bits_off_target, lrc->maximum_buffer_size); - lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size); - // Update framerate-related quantities. - if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { - lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer]; - } else { - lc->framerate = cpi->framerate; + + for (layer = 0; layer < layer_end; ++layer) { + LAYER_CONTEXT *const lc = &svc->layer_context[layer]; + RATE_CONTROL *const lrc = &lc->rc; + + lc->target_bandwidth = oxcf->layer_target_bitrate[layer]; + + bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth; + // Update buffer-related quantities. + lrc->starting_buffer_level = + (int64_t)(rc->starting_buffer_level * bitrate_alloc); + lrc->optimal_buffer_level = + (int64_t)(rc->optimal_buffer_level * bitrate_alloc); + lrc->maximum_buffer_size = + (int64_t)(rc->maximum_buffer_size * bitrate_alloc); + lrc->bits_off_target = MIN(lrc->bits_off_target, + lrc->maximum_buffer_size); + lrc->buffer_level = MIN(lrc->buffer_level, lrc->maximum_buffer_size); + // Update framerate-related quantities. + if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { + lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer]; + } else { + lc->framerate = cpi->framerate; + } + lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); + lrc->max_frame_bandwidth = rc->max_frame_bandwidth; + // Update qp-related quantities. + lrc->worst_quality = rc->worst_quality; + lrc->best_quality = rc->best_quality; } - lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); - lrc->max_frame_bandwidth = rc->max_frame_bandwidth; - // Update qp-related quantities. - lrc->worst_quality = rc->worst_quality; - lrc->best_quality = rc->best_quality; } } static LAYER_CONTEXT *get_layer_context(VP9_COMP *const cpi) { - return (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ? - &cpi->svc.layer_context[cpi->svc.temporal_layer_id] : - &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; + if (is_one_pass_cbr_svc(cpi)) + return &cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers + cpi->svc.temporal_layer_id]; + else + return (cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.rc_mode == VPX_CBR) ? + &cpi->svc.layer_context[cpi->svc.temporal_layer_id] : + &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; } void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) { @@ -166,18 +209,22 @@ void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; LAYER_CONTEXT *const lc = get_layer_context(cpi); RATE_CONTROL *const lrc = &lc->rc; - const int layer = svc->temporal_layer_id; + // Index into spatial+temporal arrays. + const int st_idx = svc->spatial_layer_id * svc->number_temporal_layers + + svc->temporal_layer_id; + const int tl = svc->temporal_layer_id; - lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer]; + lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[tl]; lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth; // Update the average layer frame size (non-cumulative per-frame-bw). - if (layer == 0) { + if (tl == 0) { lc->avg_frame_size = lrc->avg_frame_bandwidth; } else { const double prev_layer_framerate = - cpi->framerate / oxcf->ts_rate_decimator[layer - 1]; - const int prev_layer_target_bandwidth = oxcf->ts_target_bitrate[layer - 1]; + cpi->framerate / oxcf->ts_rate_decimator[tl - 1]; + const int prev_layer_target_bandwidth = + oxcf->layer_target_bitrate[st_idx - 1]; lc->avg_frame_size = (int)((lc->target_bandwidth - prev_layer_target_bandwidth) / (lc->framerate - prev_layer_framerate)); @@ -243,9 +290,8 @@ void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) { void vp9_inc_frame_in_layer(VP9_COMP *const cpi) { LAYER_CONTEXT *const lc = - (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ? - &cpi->svc.layer_context[cpi->svc.temporal_layer_id] : - &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; + &cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers]; ++lc->current_video_frame_in_layer; ++lc->frames_from_key_frame; } @@ -253,10 +299,11 @@ void vp9_inc_frame_in_layer(VP9_COMP *const cpi) { int vp9_is_upper_layer_key_frame(const VP9_COMP *const cpi) { return is_two_pass_svc(cpi) && cpi->svc.spatial_layer_id > 0 && - cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame; + cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers + + cpi->svc.temporal_layer_id].is_key_frame; } -#if CONFIG_SPATIAL_SVC static void get_layer_resolution(const int width_org, const int height_org, const int num, const int den, int *width_out, int *height_out) { @@ -276,6 +323,201 @@ static void get_layer_resolution(const int width_org, const int height_org, *height_out = h; } +// The function sets proper ref_frame_flags, buffer indices, and buffer update +// variables for temporal layering mode 3 - that does 0-2-1-2 temporal layering +// scheme. +static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) { + int frame_num_within_temporal_struct = 0; + int spatial_id, temporal_id; + spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode; + frame_num_within_temporal_struct = + cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers].current_video_frame_in_layer % 4; + temporal_id = cpi->svc.temporal_layer_id = + (frame_num_within_temporal_struct & 1) ? 2 : + (frame_num_within_temporal_struct >> 1); + cpi->ext_refresh_last_frame = cpi->ext_refresh_golden_frame = + cpi->ext_refresh_alt_ref_frame = 0; + if (!temporal_id) { + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ext_refresh_last_frame = 1; + if (!spatial_id) { + cpi->ref_frame_flags = VP9_LAST_FLAG; + } else if (cpi->svc.layer_context[temporal_id].is_key_frame) { + // base layer is a key frame. + cpi->ref_frame_flags = VP9_GOLD_FLAG; + } else { + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } + } else if (temporal_id == 1) { + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ext_refresh_alt_ref_frame = 1; + if (!spatial_id) { + cpi->ref_frame_flags = VP9_LAST_FLAG; + } else { + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } + } else { + if (frame_num_within_temporal_struct == 1) { + // the first tl2 picture + if (!spatial_id) { + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ext_refresh_alt_ref_frame = 1; + cpi->ref_frame_flags = VP9_LAST_FLAG; + } else if (spatial_id < cpi->svc.number_spatial_layers - 1) { + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ext_refresh_alt_ref_frame = 1; + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } else { // Top layer + cpi->ext_refresh_frame_flags_pending = 0; + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } + } else { + // The second tl2 picture + if (!spatial_id) { + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ref_frame_flags = VP9_LAST_FLAG; + cpi->ext_refresh_last_frame = 1; + } else if (spatial_id < cpi->svc.number_spatial_layers - 1) { + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + cpi->ext_refresh_last_frame = 1; + } else { // top layer + cpi->ext_refresh_frame_flags_pending = 0; + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } + } + } + if (temporal_id == 0) { + cpi->lst_fb_idx = spatial_id; + if (spatial_id) + cpi->gld_fb_idx = spatial_id - 1; + else + cpi->gld_fb_idx = 0; + cpi->alt_fb_idx = 0; + } else if (temporal_id == 1) { + cpi->lst_fb_idx = spatial_id; + cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1; + cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id; + } else if (frame_num_within_temporal_struct == 1) { + cpi->lst_fb_idx = spatial_id; + cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1; + cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id; + } else { + cpi->lst_fb_idx = cpi->svc.number_spatial_layers + spatial_id; + cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1; + cpi->alt_fb_idx = 0; + } +} + +// The function sets proper ref_frame_flags, buffer indices, and buffer update +// variables for temporal layering mode 2 - that does 0-1-0-1 temporal layering +// scheme. +static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) { + int spatial_id, temporal_id; + spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode; + temporal_id = cpi->svc.temporal_layer_id = + cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers].current_video_frame_in_layer & 1; + cpi->ext_refresh_last_frame = cpi->ext_refresh_golden_frame = + cpi->ext_refresh_alt_ref_frame = 0; + if (!temporal_id) { + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ext_refresh_last_frame = 1; + if (!spatial_id) { + cpi->ref_frame_flags = VP9_LAST_FLAG; + } else if (cpi->svc.layer_context[temporal_id].is_key_frame) { + // base layer is a key frame. + cpi->ref_frame_flags = VP9_GOLD_FLAG; + } else { + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } + } else if (temporal_id == 1) { + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ext_refresh_alt_ref_frame = 1; + if (!spatial_id) { + cpi->ref_frame_flags = VP9_LAST_FLAG; + } else { + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } + } + + if (temporal_id == 0) { + cpi->lst_fb_idx = spatial_id; + if (spatial_id) + cpi->gld_fb_idx = spatial_id - 1; + else + cpi->gld_fb_idx = 0; + cpi->alt_fb_idx = 0; + } else if (temporal_id == 1) { + cpi->lst_fb_idx = spatial_id; + cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1; + cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id; + } +} + +// The function sets proper ref_frame_flags, buffer indices, and buffer update +// variables for temporal layering mode 0 - that has no temporal layering. +static void set_flags_and_fb_idx_for_temporal_mode_noLayering( + VP9_COMP *const cpi) { + int spatial_id; + spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode; + cpi->ext_refresh_last_frame = + cpi->ext_refresh_golden_frame = cpi->ext_refresh_alt_ref_frame = 0; + cpi->ext_refresh_frame_flags_pending = 1; + cpi->ext_refresh_last_frame = 1; + if (!spatial_id) { + cpi->ref_frame_flags = VP9_LAST_FLAG; + } else if (cpi->svc.layer_context[0].is_key_frame) { + cpi->ref_frame_flags = VP9_GOLD_FLAG; + } else { + cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; + } + cpi->lst_fb_idx = spatial_id; + if (spatial_id) + cpi->gld_fb_idx = spatial_id - 1; + else + cpi->gld_fb_idx = 0; +} + +int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { + int width = 0, height = 0; + LAYER_CONTEXT *lc = NULL; + + if (cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) { + set_flags_and_fb_idx_for_temporal_mode3(cpi); + } else if (cpi->svc.temporal_layering_mode == + VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) { + set_flags_and_fb_idx_for_temporal_mode_noLayering(cpi); + } else if (cpi->svc.temporal_layering_mode == + VP9E_TEMPORAL_LAYERING_MODE_0101) { + set_flags_and_fb_idx_for_temporal_mode2(cpi); + } else if (cpi->svc.temporal_layering_mode == + VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { + // VP9E_TEMPORAL_LAYERING_MODE_BYPASS : + // if the code goes here, it means the encoder will be relying on the + // flags from outside for layering. + // However, since when spatial+temporal layering is used, the buffer indices + // cannot be derived automatically, the bypass mode will only work when the + // number of spatial layers equals 1. + assert(cpi->svc.number_spatial_layers == 1); + } + + lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers + + cpi->svc.temporal_layer_id]; + + get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height, + lc->scaling_factor_num, lc->scaling_factor_den, + &width, &height); + + if (vp9_set_size_literal(cpi, width, height) != 0) + return VPX_CODEC_INVALID_PARAM; + + return 0; +} + +#if CONFIG_SPATIAL_SVC int vp9_svc_start_frame(VP9_COMP *const cpi) { int width = 0, height = 0; LAYER_CONTEXT *lc; @@ -386,11 +628,12 @@ int vp9_svc_start_frame(VP9_COMP *const cpi) { return 0; } +#endif + struct lookahead_entry *vp9_svc_lookahead_pop(VP9_COMP *const cpi, struct lookahead_ctx *ctx, int drain) { struct lookahead_entry *buf = NULL; - if (ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) { buf = vp9_lookahead_peek(ctx, 0); if (buf != NULL) { @@ -400,7 +643,5 @@ struct lookahead_entry *vp9_svc_lookahead_pop(VP9_COMP *const cpi, } } } - return buf; } -#endif diff --git a/vp9/encoder/vp9_svc_layercontext.h b/vp9/encoder/vp9_svc_layercontext.h index 5063d521f..b6a5ea548 100644 --- a/vp9/encoder/vp9_svc_layercontext.h +++ b/vp9/encoder/vp9_svc_layercontext.h @@ -22,6 +22,7 @@ extern "C" { typedef struct { RATE_CONTROL rc; int target_bandwidth; + int spatial_layer_target_bandwidth; // Target for the spatial layer. double framerate; int avg_frame_size; int max_q; @@ -64,9 +65,11 @@ typedef struct { YV12_BUFFER_CONFIG scaled_frames[MAX_LAG_BUFFERS]; // Layer context used for rate control in one pass temporal CBR mode or - // two pass spatial mode. Defined for temporal or spatial layers for now. - // Does not support temporal combined with spatial RC. - LAYER_CONTEXT layer_context[MAX(VPX_TS_MAX_LAYERS, VPX_SS_MAX_LAYERS)]; + // two pass spatial mode. + LAYER_CONTEXT layer_context[VPX_MAX_LAYERS]; + // Indicates what sort of temporal layering is used. + // Currently, this only works for CBR mode. + VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode; } SVC; struct VP9_COMP; @@ -110,6 +113,8 @@ struct lookahead_entry *vp9_svc_lookahead_pop(struct VP9_COMP *const cpi, // Start a frame and initialize svc parameters int vp9_svc_start_frame(struct VP9_COMP *const cpi); +int vp9_one_pass_cbr_svc_start_layer(struct VP9_COMP *const cpi); + #ifdef __cplusplus } // extern "C" #endif diff --git a/vp9/encoder/vp9_temporal_filter.c b/vp9/encoder/vp9_temporal_filter.c index 10c67ff49..24b6203cb 100644 --- a/vp9/encoder/vp9_temporal_filter.c +++ b/vp9/encoder/vp9_temporal_filter.c @@ -682,7 +682,7 @@ void vp9_temporal_filter(VP9_COMP *cpi, int distance) { if (frames_to_blur > 0) { // Setup scaling factors. Scaling on each of the arnr frames is not // supported. - if (is_two_pass_svc(cpi)) { + if (cpi->use_svc) { // In spatial svc the scaling factors might be less then 1/2. // So we will use non-normative scaling. int frame_used = 0; diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index cba15e693..01b97bd60 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -176,15 +176,23 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS); RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS); + if (cfg->ss_number_layers * cfg->ts_number_layers > VPX_MAX_LAYERS) + ERROR("ss_number_layers * ts_number_layers is out of range"); if (cfg->ts_number_layers > 1) { - unsigned int i; - for (i = 1; i < cfg->ts_number_layers; ++i) - if (cfg->ts_target_bitrate[i] < cfg->ts_target_bitrate[i - 1]) + unsigned int sl, tl; + for (sl = 1; sl < cfg->ss_number_layers; ++sl) { + for (tl = 1; tl < cfg->ts_number_layers; ++tl) { + const int layer = + LAYER_IDS_TO_IDX(sl, tl, cfg->ts_number_layers); + if (cfg->layer_target_bitrate[layer] < + cfg->layer_target_bitrate[layer - 1]) ERROR("ts_target_bitrate entries are not increasing"); + } + } RANGE_CHECK(cfg, ts_rate_decimator[cfg->ts_number_layers - 1], 1, 1); - for (i = cfg->ts_number_layers - 2; i > 0; --i) - if (cfg->ts_rate_decimator[i - 1] != 2 * cfg->ts_rate_decimator[i]) + for (tl = cfg->ts_number_layers - 2; tl > 0; --tl) + if (cfg->ts_rate_decimator[tl - 1] != 2 * cfg->ts_rate_decimator[tl]) ERROR("ts_rate_decimator factors are not powers of 2"); } @@ -360,6 +368,7 @@ static vpx_codec_err_t set_encoder_config( const vpx_codec_enc_cfg_t *cfg, const struct vp9_extracfg *extra_cfg) { const int is_vbr = cfg->rc_end_usage == VPX_VBR; + int sl, tl; oxcf->profile = cfg->g_profile; oxcf->max_threads = (int)cfg->g_threads; oxcf->width = cfg->g_w; @@ -460,35 +469,33 @@ static vpx_codec_err_t set_encoder_config( oxcf->frame_periodic_boost = extra_cfg->frame_periodic_boost; oxcf->ss_number_layers = cfg->ss_number_layers; + oxcf->ts_number_layers = cfg->ts_number_layers; + oxcf->temporal_layering_mode = (enum vp9e_temporal_layering_mode) + cfg->temporal_layering_mode; - if (oxcf->ss_number_layers > 1) { - int i; - for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) { - oxcf->ss_target_bitrate[i] = 1000 * cfg->ss_target_bitrate[i]; + for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { #if CONFIG_SPATIAL_SVC - oxcf->ss_enable_auto_arf[i] = cfg->ss_enable_auto_alt_ref[i]; + oxcf->ss_enable_auto_arf[sl] = cfg->ss_enable_auto_alt_ref[sl]; #endif + for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { + oxcf->layer_target_bitrate[sl * oxcf->ts_number_layers + tl] = + 1000 * cfg->layer_target_bitrate[sl * oxcf->ts_number_layers + tl]; } - } else if (oxcf->ss_number_layers == 1) { + } + if (oxcf->ss_number_layers == 1 && oxcf->pass != 0) { oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth; #if CONFIG_SPATIAL_SVC oxcf->ss_enable_auto_arf[0] = extra_cfg->enable_auto_alt_ref; #endif } - - oxcf->ts_number_layers = cfg->ts_number_layers; - if (oxcf->ts_number_layers > 1) { - int i; - for (i = 0; i < VPX_TS_MAX_LAYERS; ++i) { - oxcf->ts_target_bitrate[i] = 1000 * cfg->ts_target_bitrate[i]; - oxcf->ts_rate_decimator[i] = cfg->ts_rate_decimator[i]; + for (tl = 0; tl < VPX_TS_MAX_LAYERS; ++tl) { + oxcf->ts_rate_decimator[tl] = cfg->ts_rate_decimator[tl] ? + cfg->ts_rate_decimator[tl] : 1; } } else if (oxcf->ts_number_layers == 1) { - oxcf->ts_target_bitrate[0] = (int)oxcf->target_bandwidth; oxcf->ts_rate_decimator[0] = 1; } - /* printf("Current VP9 Settings: \n"); printf("target_bandwidth: %d\n", oxcf->target_bandwidth); @@ -902,11 +909,12 @@ static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi, unsigned int lib_flags) { vpx_codec_frame_flags_t flags = lib_flags << 16; - if (lib_flags & FRAMEFLAGS_KEY -#if CONFIG_SPATIAL_SVC - || (is_two_pass_svc(cpi) && cpi->svc.layer_context[0].is_key_frame) -#endif - ) + if (lib_flags & FRAMEFLAGS_KEY || + (cpi->use_svc && + cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers + + cpi->svc.temporal_layer_id].is_key_frame) + ) flags |= VPX_FRAME_IS_KEY; if (cpi->droppable) @@ -1022,16 +1030,15 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, vpx_codec_cx_pkt_t pkt; #if CONFIG_SPATIAL_SVC - if (is_two_pass_svc(cpi)) - cpi->svc.layer_context[cpi->svc.spatial_layer_id].layer_size += size; + if (cpi->use_svc) + cpi->svc.layer_context[cpi->svc.spatial_layer_id * + cpi->svc.number_temporal_layers].layer_size += size; #endif // Pack invisible frames with the next visible frame - if (!cpi->common.show_frame -#if CONFIG_SPATIAL_SVC - || (is_two_pass_svc(cpi) && - cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1) -#endif + if (!cpi->common.show_frame || + (cpi->use_svc && + cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1) ) { if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data; @@ -1089,24 +1096,26 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, pkt.data.frame.partition_id = -1; if(ctx->output_cx_pkt_cb.output_cx_pkt) - ctx->output_cx_pkt_cb.output_cx_pkt(&pkt, ctx->output_cx_pkt_cb.user_priv); + ctx->output_cx_pkt_cb.output_cx_pkt(&pkt, + ctx->output_cx_pkt_cb.user_priv); else vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt); cx_data += size; cx_data_sz -= size; #if CONFIG_SPATIAL_SVC - if (is_two_pass_svc(cpi) && !ctx->output_cx_pkt_cb.output_cx_pkt) { + if (cpi->use_svc && !ctx->output_cx_pkt_cb.output_cx_pkt) { vpx_codec_cx_pkt_t pkt_sizes, pkt_psnr; - int i; + int sl; vp9_zero(pkt_sizes); vp9_zero(pkt_psnr); pkt_sizes.kind = VPX_CODEC_SPATIAL_SVC_LAYER_SIZES; pkt_psnr.kind = VPX_CODEC_SPATIAL_SVC_LAYER_PSNR; - for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { - LAYER_CONTEXT *lc = &cpi->svc.layer_context[i]; - pkt_sizes.data.layer_sizes[i] = lc->layer_size; - pkt_psnr.data.layer_psnr[i] = lc->psnr_pkt; + for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) { + LAYER_CONTEXT *lc = + &cpi->svc.layer_context[sl * cpi->svc.number_temporal_layers]; + pkt_sizes.data.layer_sizes[sl] = lc->layer_size; + pkt_psnr.data.layer_psnr[sl] = lc->psnr_pkt; lc->layer_size = 0; } @@ -1115,6 +1124,11 @@ static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt_psnr); } #endif + if (is_one_pass_cbr_svc(cpi) && + (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) { + // Encoded all spatial layers; exit loop. + break; + } } } } @@ -1292,16 +1306,20 @@ static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx, static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, va_list args) { int data = va_arg(args, int); const vpx_codec_enc_cfg_t *cfg = &ctx->cfg; + // Both one-pass and two-pass RC are supported now. + // User setting this has to make sure of the following. + // In two-pass setting: either (but not both) + // cfg->ss_number_layers > 1, or cfg->ts_number_layers > 1 + // In one-pass setting: + // either or both cfg->ss_number_layers > 1, or cfg->ts_number_layers > 1 vp9_set_svc(ctx->cpi, data); - // CBR or two pass mode for SVC with both temporal and spatial layers - // not yet supported. + if (data == 1 && - (cfg->rc_end_usage == VPX_CBR || - cfg->g_pass == VPX_RC_FIRST_PASS || + (cfg->g_pass == VPX_RC_FIRST_PASS || cfg->g_pass == VPX_RC_LAST_PASS) && - cfg->ss_number_layers > 1 && - cfg->ts_number_layers > 1) { + cfg->ss_number_layers > 1 && + cfg->ts_number_layers > 1) { return VPX_CODEC_INVALID_PARAM; } return VPX_CODEC_OK; @@ -1347,15 +1365,21 @@ static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx, va_list args) { VP9_COMP *const cpi = ctx->cpi; vpx_svc_extra_cfg_t *const params = va_arg(args, vpx_svc_extra_cfg_t *); - int i; + int sl, tl; - for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { - LAYER_CONTEXT *lc = &cpi->svc.layer_context[i]; - - lc->max_q = params->max_quantizers[i]; - lc->min_q = params->min_quantizers[i]; - lc->scaling_factor_num = params->scaling_factor_num[i]; - lc->scaling_factor_den = params->scaling_factor_den[i]; + // Number of temporal layers and number of spatial layers have to be set + // properly before calling this control function. + for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) { + for (tl = 0; tl < cpi->svc.number_temporal_layers; ++tl) { + const int layer = + LAYER_IDS_TO_IDX(sl, tl, cpi->svc.number_temporal_layers); + LAYER_CONTEXT *lc = + &cpi->svc.layer_context[layer]; + lc->max_q = params->max_quantizers[sl]; + lc->min_q = params->min_quantizers[sl]; + lc->scaling_factor_num = params->scaling_factor_num[sl]; + lc->scaling_factor_den = params->scaling_factor_den[sl]; + } } return VPX_CODEC_OK; @@ -1495,6 +1519,8 @@ static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = { {0}, // ts_rate_decimator 0, // ts_periodicity {0}, // ts_layer_id + {0}, // layer_taget_bitrate + 0 // temporal_layering_mode } }, }; diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c index e711cf909..9a3cd8f33 100644 --- a/vpx/src/svc_encodeframe.c +++ b/vpx/src/svc_encodeframe.c @@ -302,31 +302,79 @@ void assign_layer_bitrates(const SvcContext *svc_ctx, vpx_codec_enc_cfg_t *const enc_cfg) { int i; const SvcInternal_t *const si = get_const_svc_internal(svc_ctx); + int sl, tl, spatial_layer_target; - if (si->bitrates[0] != 0) { - enc_cfg->rc_target_bitrate = 0; - for (i = 0; i < svc_ctx->spatial_layers; ++i) { - enc_cfg->ss_target_bitrate[i] = (unsigned int)si->bitrates[i]; - enc_cfg->rc_target_bitrate += si->bitrates[i]; - } - } else { - float total = 0; - float alloc_ratio[VPX_SS_MAX_LAYERS] = {0}; + if (svc_ctx->temporal_layering_mode != 0) { + if (si->bitrates[0] != 0) { + enc_cfg->rc_target_bitrate = 0; + for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { + enc_cfg->ss_target_bitrate[sl*svc_ctx->temporal_layers] = 0; + for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) { + enc_cfg->ss_target_bitrate[sl*svc_ctx->temporal_layers] + += (unsigned int)si->bitrates[sl * svc_ctx->temporal_layers + tl]; + enc_cfg->layer_target_bitrate[sl*svc_ctx->temporal_layers + tl] + = si->bitrates[sl * svc_ctx->temporal_layers + tl]; + } + } + } else { + float total = 0; + float alloc_ratio[VPX_MAX_LAYERS] = {0}; - for (i = 0; i < svc_ctx->spatial_layers; ++i) { - if (si->svc_params.scaling_factor_den[i] > 0) { - alloc_ratio[i] = (float)(si->svc_params.scaling_factor_num[i] * 1.0 / - si->svc_params.scaling_factor_den[i]); + for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { + if (si->svc_params.scaling_factor_den[sl] > 0) { + alloc_ratio[sl] = (float)(si->svc_params.scaling_factor_num[sl] * + 1.0 / si->svc_params.scaling_factor_den[sl]); + total += alloc_ratio[sl]; + } + } - alloc_ratio[i] *= alloc_ratio[i]; - total += alloc_ratio[i]; + for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { + enc_cfg->ss_target_bitrate[sl] = spatial_layer_target = + (unsigned int)(enc_cfg->rc_target_bitrate * + alloc_ratio[sl] / total); + if (svc_ctx->temporal_layering_mode == 3) { + enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] = + spatial_layer_target >> 1; + enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] = + (spatial_layer_target >> 1) + (spatial_layer_target >> 2); + enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 2] = + spatial_layer_target; + } else if (svc_ctx->temporal_layering_mode == 2) { + enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] = + spatial_layer_target * 2 / 3; + enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] = + spatial_layer_target; + } else { + // User should explicitly assign bitrates in this case. + assert(0); + } } } + } else { + if (si->bitrates[0] != 0) { + enc_cfg->rc_target_bitrate = 0; + for (i = 0; i < svc_ctx->spatial_layers; ++i) { + enc_cfg->ss_target_bitrate[i] = (unsigned int)si->bitrates[i]; + enc_cfg->rc_target_bitrate += si->bitrates[i]; + } + } else { + float total = 0; + float alloc_ratio[VPX_MAX_LAYERS] = {0}; - for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) { - if (total > 0) { - enc_cfg->ss_target_bitrate[i] = (unsigned int) - (enc_cfg->rc_target_bitrate * alloc_ratio[i] / total); + for (i = 0; i < svc_ctx->spatial_layers; ++i) { + if (si->svc_params.scaling_factor_den[i] > 0) { + alloc_ratio[i] = (float)(si->svc_params.scaling_factor_num[i] * 1.0 / + si->svc_params.scaling_factor_den[i]); + + alloc_ratio[i] *= alloc_ratio[i]; + total += alloc_ratio[i]; + } + } + for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) { + if (total > 0) { + enc_cfg->layer_target_bitrate[i] = (unsigned int) + (enc_cfg->rc_target_bitrate * alloc_ratio[i] / total); + } } } } @@ -365,6 +413,14 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, return VPX_CODEC_INVALID_PARAM; } + // Note: temporal_layering_mode only applies to one-pass CBR + // si->svc_params.temporal_layering_mode = svc_ctx->temporal_layering_mode; + if (svc_ctx->temporal_layering_mode == 3) { + svc_ctx->temporal_layers = 3; + } else if (svc_ctx->temporal_layering_mode == 2) { + svc_ctx->temporal_layers = 2; + } + for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) { si->svc_params.max_quantizers[i] = MAX_QUANTIZER; si->svc_params.min_quantizers[i] = 0; @@ -387,6 +443,14 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, if (svc_ctx->temporal_layers > VPX_TS_MAX_LAYERS) svc_ctx->temporal_layers = VPX_TS_MAX_LAYERS; + if (svc_ctx->temporal_layers * svc_ctx->spatial_layers > VPX_MAX_LAYERS) { + svc_log(svc_ctx, SVC_LOG_ERROR, + "spatial layers * temporal layers exceeds the maximum number of " + "allowed layers of %d\n", + svc_ctx->spatial_layers * svc_ctx->temporal_layers, + (int) VPX_MAX_LAYERS); + return VPX_CODEC_INVALID_PARAM; + } assign_layer_bitrates(svc_ctx, enc_cfg); #if CONFIG_SPATIAL_SVC @@ -403,10 +467,24 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, } } - // modify encoder configuration + if (svc_ctx->threads) + enc_cfg->g_threads = svc_ctx->threads; + + // Modify encoder configuration enc_cfg->ss_number_layers = svc_ctx->spatial_layers; enc_cfg->ts_number_layers = svc_ctx->temporal_layers; + if (enc_cfg->rc_end_usage == VPX_CBR) { + enc_cfg->rc_resize_allowed = 0; + enc_cfg->rc_min_quantizer = 2; + enc_cfg->rc_max_quantizer = 63; + enc_cfg->rc_undershoot_pct = 50; + enc_cfg->rc_overshoot_pct = 50; + enc_cfg->rc_buf_initial_sz = 20; + enc_cfg->rc_buf_optimal_sz = 600; + enc_cfg->rc_buf_sz = 1000; + } + if (enc_cfg->g_error_resilient == 0 && si->use_multiple_frame_contexts == 0) enc_cfg->g_error_resilient = 1; @@ -554,7 +632,7 @@ const char *vpx_svc_dump_statistics(SvcContext *svc_ctx) { mse[1], mse[2], mse[3]); bytes_total += si->bytes_sum[i]; - // clear sums for next time + // Clear sums for next time. si->bytes_sum[i] = 0; for (j = 0; j < COMPONENTS; ++j) { si->psnr_sum[i][j] = 0; diff --git a/vpx/svc_context.h b/vpx/svc_context.h index cf791bdeb..a09651cc9 100644 --- a/vpx/svc_context.h +++ b/vpx/svc_context.h @@ -33,10 +33,13 @@ typedef struct { // public interface to svc_command options int spatial_layers; // number of spatial layers int temporal_layers; // number of temporal layers + int temporal_layering_mode; SVC_LOG_LEVEL log_level; // amount of information to display int log_print; // when set, printf log messages instead of returning the // message with svc_get_message - + int output_rc_stat; // for outputting rc stats + int speed; // speed setting for codec + int threads; // private storage for vpx_svc_encode void *internal; } SvcContext; diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h index db31d42af..90edc03fa 100644 --- a/vpx/vp8cx.h +++ b/vpx/vp8cx.h @@ -511,6 +511,17 @@ enum vp8e_enc_control_id { */ VP9E_SET_COLOR_SPACE, + /*!\brief Codec control function to set temporal layering mode. + * \note Valid ranges: 0..3, default is "0" (VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING). + * 0 = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING + * 1 = VP9E_TEMPORAL_LAYERING_MODE_BYPASS + * 2 = VP9E_TEMPORAL_LAYERING_MODE_0101 + * 3 = VP9E_TEMPORAL_LAYERING_MODE_0212 + * + * Supported in codecs: VP9 + */ + VP9E_SET_TEMPORAL_LAYERING_MODE, + /*!\brief Codec control function to get an Active map back from the encoder. * * Supported in codecs: VP9 @@ -529,6 +540,32 @@ typedef enum vpx_scaling_mode_1d { VP8E_ONETWO = 3 } VPX_SCALING_MODE; +/*!\brief Temporal layering mode enum for VP9 SVC. + * + * This set of macros define the different temporal layering modes. + * Supported codecs: VP9 (in SVC mode) + * + */ +typedef enum vp9e_temporal_layering_mode { + /*!\brief No temporal layering. + * Used when only spatial layering is used. + */ + VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING = 0, + + /*!\brief Bypass mode. + * Used when application needs to control temporal layering. + * This will only work when the number of spatial layers equals 1. + */ + VP9E_TEMPORAL_LAYERING_MODE_BYPASS = 1, + + /*!\brief 0-1-0-1... temporal layering scheme with two temporal layers. + */ + VP9E_TEMPORAL_LAYERING_MODE_0101 = 2, + + /*!\brief 0-2-1-2... temporal layering scheme with three temporal layers. + */ + VP9E_TEMPORAL_LAYERING_MODE_0212 = 3 +} VP9E_TEMPORAL_LAYERING_MODE; /*!\brief vpx region of interest map * diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h index bf75584d5..5892a63ce 100644 --- a/vpx/vpx_encoder.h +++ b/vpx/vpx_encoder.h @@ -42,8 +42,11 @@ extern "C" { /*!\deprecated Use #VPX_TS_MAX_PERIODICITY instead. */ #define MAX_PERIODICITY VPX_TS_MAX_PERIODICITY - /*!\deprecated Use #VPX_TS_MAX_LAYERS instead. */ -#define MAX_LAYERS VPX_TS_MAX_LAYERS +/*! Temporal+Spatial Scalability: Maximum number of coding layers */ +#define VPX_MAX_LAYERS 12 // 3 temporal + 4 spatial layers are allowed. + +/*!\deprecated Use #VPX_MAX_LAYERS instead. */ +#define MAX_LAYERS VPX_MAX_LAYERS // 3 temporal + 4 spatial layers allowed. /*! Spatial Scalability: Maximum number of coding layers */ #define VPX_SS_MAX_LAYERS 5 @@ -729,6 +732,22 @@ extern "C" { * ts_periodicity=8, then ts_layer_id = (0,1,0,1,0,1,0,1). */ unsigned int ts_layer_id[VPX_TS_MAX_PERIODICITY]; + + /*!\brief Target bitrate for each spatial/temporal layer. + * + * These values specify the target coding bitrate to be used for each + * spatial/temporal layer. + * + */ + unsigned int layer_target_bitrate[VPX_MAX_LAYERS]; + + /*!\brief Temporal layering mode indicating which temporal layering scheme to use. + * + * The value (refer to VP9E_TEMPORAL_LAYERING_MODE) specifies the + * temporal layering mode to use. + * + */ + int temporal_layering_mode; } vpx_codec_enc_cfg_t; /**< alias for struct vpx_codec_enc_cfg */ /*!\brief vp9 svc extra configure parameters @@ -737,10 +756,11 @@ extern "C" { * */ typedef struct vpx_svc_parameters { - int max_quantizers[VPX_SS_MAX_LAYERS]; /**< Max Q for each layer */ - int min_quantizers[VPX_SS_MAX_LAYERS]; /**< Min Q for each layer */ - int scaling_factor_num[VPX_SS_MAX_LAYERS]; /**< Scaling factor-numerator*/ - int scaling_factor_den[VPX_SS_MAX_LAYERS]; /**< Scaling factor-denominator*/ + int max_quantizers[VPX_MAX_LAYERS]; /**< Max Q for each layer */ + int min_quantizers[VPX_MAX_LAYERS]; /**< Min Q for each layer */ + int scaling_factor_num[VPX_MAX_LAYERS]; /**< Scaling factor-numerator */ + int scaling_factor_den[VPX_MAX_LAYERS]; /**< Scaling factor-denominator */ + int temporal_layering_mode; /**< Temporal layering mode */ } vpx_svc_extra_cfg_t;