From 4864ab21b0679270bd41c280489a8493bcf8c4ee Mon Sep 17 00:00:00 2001 From: Marco Paniconi Date: Thu, 6 Feb 2014 09:23:17 -0800 Subject: [PATCH] Layer based rate control for CBR mode. This patch adds a buffer-based rate control for temporal layers, under CBR mode. Added vpx_temporal_scalable_patters.c encoder for testing temporal layers, for both vp9 and vp8 (replaces the old vp8_scalable_patterns). Updated datarate unittest with tests for temporal layer rate-targeting. Change-Id: I8900a854288b9354d9c697cfeb0243a9fd6790b1 --- examples.mk | 8 +- examples/vpx_temporal_scalable_patterns.c | 548 +++++++++++++++++ test/datarate_test.cc | 206 ++++++- test/encode_test_driver.h | 5 + vp8_scalable_patterns.c | 694 ---------------------- vp9/common/vp9_onyx.h | 8 +- vp9/encoder/vp9_onyx_if.c | 147 ++++- vp9/encoder/vp9_onyx_int.h | 25 +- vp9/encoder/vp9_ratectrl.c | 85 ++- vp9/vp9_cx_iface.c | 67 ++- vpx/src/svc_encodeframe.c | 4 +- vpx/vp8cx.h | 12 +- vpx/vpx_encoder.h | 39 +- 13 files changed, 1067 insertions(+), 781 deletions(-) create mode 100644 examples/vpx_temporal_scalable_patterns.c delete mode 100644 vp8_scalable_patterns.c diff --git a/examples.mk b/examples.mk index b2bdf6815..f3e75073c 100644 --- a/examples.mk +++ b/examples.mk @@ -54,9 +54,6 @@ vpxenc.SRCS += third_party/libmkv/EbmlWriter.h vpxenc.SRCS += $(LIBYUV_SRCS) vpxenc.GUID = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1 vpxenc.DESCRIPTION = Full featured encoder -UTILS-$(CONFIG_VP8_ENCODER) += vp8_scalable_patterns.c -vp8_scalable_patterns.GUID = 0D6A210B-F482-4D6F-8570-4A9C01ACC88C -vp8_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder UTILS-$(CONFIG_VP9_ENCODER) += vp9_spatial_scalable_encoder.c vp9_spatial_scalable_encoder.SRCS += args.c args.h vp9_spatial_scalable_encoder.SRCS += ivfenc.c ivfenc.h @@ -73,6 +70,11 @@ endif #example_xma.GUID = A955FC4A-73F1-44F7-135E-30D84D32F022 #example_xma.DESCRIPTION = External Memory Allocation mode usage +EXAMPLES-$(CONFIG_ENCODERS) += vpx_temporal_scalable_patterns.c +vpx_temporal_scalable_patterns.SRCS += ivfenc.c ivfenc.h +vpx_temporal_scalable_patterns.SRCS += tools_common.c tools_common.h +vpx_temporal_scalable_patterns.GUID = B18C08F2-A439-4502-A78E-849BE3D60947 +vpx_temporal_scalable_patterns.DESCRIPTION = Temporal Scalability Encoder EXAMPLES-$(CONFIG_VP8_DECODER) += simple_decoder.c simple_decoder.GUID = D3BBF1E9-2427-450D-BBFF-B2843C1D44CC simple_decoder.SRCS += ivfdec.h ivfdec.c diff --git a/examples/vpx_temporal_scalable_patterns.c b/examples/vpx_temporal_scalable_patterns.c new file mode 100644 index 000000000..f91d33ce2 --- /dev/null +++ b/examples/vpx_temporal_scalable_patterns.c @@ -0,0 +1,548 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This is an example demonstrating how to implement a multi-layer VP9 +// encoding scheme based on temporal scalability for video applications +// that benefit from a scalable bitstream. + +#include +#include +#include +#include + +#define VPX_CODEC_DISABLE_COMPAT 1 +#include "./ivfenc.h" +#include "./tools_common.h" +#include "./vpx_config.h" +#include "vpx/vp8cx.h" +#include "vpx/vpx_encoder.h" + +static const char *exec_name; + +void usage_exit() { + exit(EXIT_FAILURE); +} + +static int mode_to_num_layers[12] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3}; + +// Temporal scaling parameters: +// NOTE: The 3 prediction frames cannot be used interchangeably due to +// differences in the way they are handled throughout the code. The +// frames should be allocated to layers in the order LAST, GF, ARF. +// Other combinations work, but may produce slightly inferior results. +static void set_temporal_layer_pattern(int layering_mode, + vpx_codec_enc_cfg_t *cfg, + int *layer_flags, + int *flag_periodicity) { + switch (layering_mode) { + case 0: { + // 1-layer. + int ids[1] = {0}; + cfg->ts_periodicity = 1; + *flag_periodicity = 1; + cfg->ts_number_layers = 1; + cfg->ts_rate_decimator[0] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // Update L only. + layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + break; + } + case 1: { + // 2-layers, 2-frame period. + int ids[2] = {0, 1}; + cfg->ts_periodicity = 2; + *flag_periodicity = 2; + cfg->ts_number_layers = 2; + cfg->ts_rate_decimator[0] = 2; + cfg->ts_rate_decimator[1] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); +#if 1 + // 0=L, 1=GF, Intra-layer prediction enabled. + layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; + layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_REF_ARF; +#else + // 0=L, 1=GF, Intra-layer prediction disabled. + layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; + layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_LAST; +#endif + break; + } + case 2: { + // 2-layers, 3-frame period. + int ids[3] = {0, 1, 1}; + cfg->ts_periodicity = 3; + *flag_periodicity = 3; + cfg->ts_number_layers = 2; + cfg->ts_rate_decimator[0] = 3; + cfg->ts_rate_decimator[1] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF, Intra-layer prediction enabled. + layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + layer_flags[1] = + layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; + break; + } + case 3: { + // 3-layers, 6-frame period. + int ids[6] = {0, 2, 2, 1, 2, 2}; + cfg->ts_periodicity = 6; + *flag_periodicity = 6; + cfg->ts_number_layers = 3; + cfg->ts_rate_decimator[0] = 6; + cfg->ts_rate_decimator[1] = 3; + cfg->ts_rate_decimator[2] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled. + layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST; + layer_flags[1] = + layer_flags[2] = + layer_flags[4] = + layer_flags[5] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST; + break; + } + case 4: { + // 3-layers, 4-frame period. + int ids[4] = {0, 2, 1, 2}; + cfg->ts_periodicity = 4; + *flag_periodicity = 4; + cfg->ts_number_layers = 3; + cfg->ts_rate_decimator[0] = 4; + cfg->ts_rate_decimator[1] = 2; + cfg->ts_rate_decimator[2] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled. + layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; + layer_flags[1] = + layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + break; + } + case 5: { + // 3-layers, 4-frame period. + int ids[4] = {0, 2, 1, 2}; + cfg->ts_periodicity = 4; + *flag_periodicity = 4; + cfg->ts_number_layers = 3; + cfg->ts_rate_decimator[0] = 4; + cfg->ts_rate_decimator[1] = 2; + cfg->ts_rate_decimator[2] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1, disabled + // in layer 2. + layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + layer_flags[2] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_ARF; + layer_flags[1] = + layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + break; + } + case 6: { + // 3-layers, 4-frame period. + int ids[4] = {0, 2, 1, 2}; + cfg->ts_periodicity = 4; + *flag_periodicity = 4; + cfg->ts_number_layers = 3; + cfg->ts_rate_decimator[0] = 4; + cfg->ts_rate_decimator[1] = 2; + cfg->ts_rate_decimator[2] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled. + layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + layer_flags[2] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_ARF; + layer_flags[1] = + layer_flags[3] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; + break; + } + case 7: { + // NOTE: Probably of academic interest only. + // 5-layers, 16-frame period. + int ids[16] = {0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4}; + cfg->ts_periodicity = 16; + *flag_periodicity = 16; + cfg->ts_number_layers = 5; + cfg->ts_rate_decimator[0] = 16; + cfg->ts_rate_decimator[1] = 8; + cfg->ts_rate_decimator[2] = 4; + cfg->ts_rate_decimator[3] = 2; + cfg->ts_rate_decimator[4] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + layer_flags[0] = VPX_EFLAG_FORCE_KF; + layer_flags[1] = + layer_flags[3] = + layer_flags[5] = + layer_flags[7] = + layer_flags[9] = + layer_flags[11] = + layer_flags[13] = + layer_flags[15] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + layer_flags[2] = + layer_flags[6] = + layer_flags[10] = + layer_flags[14] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF; + layer_flags[4] = + layer_flags[12] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_UPD_ARF; + layer_flags[8] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF; + break; + } + case 8: { + // 2-layers, with sync point at first frame of layer 1. + int ids[2] = {0, 1}; + cfg->ts_periodicity = 2; + *flag_periodicity = 8; + cfg->ts_number_layers = 2; + cfg->ts_rate_decimator[0] = 2; + cfg->ts_rate_decimator[1] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF. + // ARF is used as predictor for all frames, and is only updated on + // key frame. Sync point every 8 frames. + + // Layer 0: predict from L and ARF, update L and G. + layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_UPD_ARF; + // Layer 1: sync point: predict from L and ARF, and update G. + layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_ARF; + // Layer 0, predict from L and ARF, update L. + layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + // Layer 1: predict from L, G and ARF, and update G. + layer_flags[3] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_ENTROPY; + // Layer 0. + layer_flags[4] = layer_flags[2]; + // Layer 1. + layer_flags[5] = layer_flags[3]; + // Layer 0. + layer_flags[6] = layer_flags[4]; + // Layer 1. + layer_flags[7] = layer_flags[5]; + break; + } + case 9: { + // 3-layers: Sync points for layer 1 and 2 every 8 frames. + int ids[4] = {0, 2, 1, 2}; + cfg->ts_periodicity = 4; + *flag_periodicity = 8; + cfg->ts_number_layers = 3; + cfg->ts_rate_decimator[0] = 4; + cfg->ts_rate_decimator[1] = 2; + cfg->ts_rate_decimator[2] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF, 2=ARF. + layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | + VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; + layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; + layer_flags[3] = + layer_flags[5] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; + layer_flags[4] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | + VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; + layer_flags[6] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_ARF; + layer_flags[7] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_ENTROPY; + break; + } + case 10: { + // 3-layers structure where ARF is used as predictor for all frames, + // and is only updated on key frame. + // Sync points for layer 1 and 2 every 8 frames. + + int ids[4] = {0, 2, 1, 2}; + cfg->ts_periodicity = 4; + *flag_periodicity = 8; + cfg->ts_number_layers = 3; + cfg->ts_rate_decimator[0] = 4; + cfg->ts_rate_decimator[1] = 2; + cfg->ts_rate_decimator[2] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF, 2=ARF. + // Layer 0: predict from L and ARF; update L and G. + layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF; + // Layer 2: sync point: predict from L and ARF; update none. + layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_ENTROPY; + // Layer 1: sync point: predict from L and ARF; update G. + layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST; + // Layer 2: predict from L, G, ARF; update none. + layer_flags[3] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY; + // Layer 0: predict from L and ARF; update L. + layer_flags[4] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF; + // Layer 2: predict from L, G, ARF; update none. + layer_flags[5] = layer_flags[3]; + // Layer 1: predict from L, G, ARF; update G. + layer_flags[6] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; + // Layer 2: predict from L, G, ARF; update none. + layer_flags[7] = layer_flags[3]; + break; + } + case 11: + default: { + // 3-layers structure as in case 10, but no sync/refresh points for + // layer 1 and 2. + int ids[4] = {0, 2, 1, 2}; + cfg->ts_periodicity = 4; + *flag_periodicity = 8; + cfg->ts_number_layers = 3; + cfg->ts_rate_decimator[0] = 4; + cfg->ts_rate_decimator[1] = 2; + cfg->ts_rate_decimator[2] = 1; + memcpy(cfg->ts_layer_id, ids, sizeof(ids)); + // 0=L, 1=GF, 2=ARF. + // Layer 0: predict from L and ARF; update L. + layer_flags[0] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF; + layer_flags[4] = layer_flags[0]; + // Layer 1: predict from L, G, ARF; update G. + layer_flags[2] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; + layer_flags[6] = layer_flags[2]; + // Layer 2: predict from L, G, ARF; update none. + layer_flags[1] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY; + layer_flags[3] = layer_flags[1]; + layer_flags[5] = layer_flags[1]; + layer_flags[7] = layer_flags[1]; + break; + } + } +} + +int main(int argc, char **argv) { + FILE *outfile[VPX_TS_MAX_LAYERS]; + vpx_codec_ctx_t codec; + vpx_codec_enc_cfg_t cfg; + int frame_cnt = 0; + vpx_image_t raw; + vpx_codec_err_t res; + unsigned int width; + unsigned int height; + int frame_avail; + int got_data; + int flags = 0; + int i; + int pts = 0; // PTS starts at 0. + int frame_duration = 1; // 1 timebase tick per frame. + int layering_mode = 0; + int frames_in_layer[VPX_TS_MAX_LAYERS] = {0}; + int layer_flags[VPX_TS_MAX_PERIODICITY] = {0}; + int flag_periodicity = 1; + int max_intra_size_pct; + vpx_svc_layer_id_t layer_id = {0, 0}; + char *codec_type; + const vpx_codec_iface_t *(*interface)(void); + unsigned int fourcc; + struct VpxInputContext input_ctx = {0}; + + exec_name = argv[0]; + // Check usage and arguments. + if (argc < 10) { + die("Usage: %s " + " ... \n", + argv[0]); + } + + codec_type = argv[3]; + if (strncmp(codec_type, "vp9", 3) == 0) { +#if CONFIG_VP9_ENCODER + interface = vpx_codec_vp9_cx; + fourcc = 0x30395056; +#else + die("Encoder vp9 selected but not configured"); +#endif + } else { +#if CONFIG_VP8_ENCODER + interface = vpx_codec_vp8_cx; + fourcc = 0x30385056; +#else + die("Encoder vp8 selected but not configured"); +#endif + } + printf("Using %s\n", vpx_codec_iface_name(interface())); + + width = strtol(argv[4], NULL, 0); + height = strtol(argv[5], NULL, 0); + if (width < 16 || width % 2 || height < 16 || height % 2) { + die("Invalid resolution: %d x %d", width, height); + } + + layering_mode = strtol(argv[8], NULL, 0); + if (layering_mode < 0 || layering_mode > 11) { + die("Invalid mode (0..11) %s", argv[8]); + } + + if (argc != 9 + mode_to_num_layers[layering_mode]) { + die("Invalid number of arguments"); + } + + if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 32)) { + die("Failed to allocate image", width, height); + } + + // Populate encoder configuration. + res = vpx_codec_enc_config_default(interface(), &cfg, 0); + if (res) { + printf("Failed to get config: %s\n", vpx_codec_err_to_string(res)); + return EXIT_FAILURE; + } + + // Update the default configuration with our settings. + cfg.g_w = width; + cfg.g_h = height; + + // Timebase format e.g. 30fps: numerator=1, demoninator = 30. + cfg.g_timebase.num = strtol(argv[6], NULL, 0); + cfg.g_timebase.den = strtol(argv[7], NULL, 0); + + for (i = 9; i < 9 + mode_to_num_layers[layering_mode]; ++i) { + cfg.ts_target_bitrate[i-9] = strtol(argv[i], NULL, 0); + } + + // Real time parameters. + cfg.rc_dropframe_thresh = 0; + cfg.rc_end_usage = VPX_CBR; + cfg.rc_resize_allowed = 0; + cfg.rc_min_quantizer = 2; + cfg.rc_max_quantizer = 56; + cfg.rc_undershoot_pct = 100; + cfg.rc_overshoot_pct = 15; + cfg.rc_buf_initial_sz = 500; + cfg.rc_buf_optimal_sz = 600; + cfg.rc_buf_sz = 1000; + + // Enable error resilient mode. + cfg.g_error_resilient = 1; + cfg.g_lag_in_frames = 0; + cfg.kf_mode = VPX_KF_DISABLED; + + // Disable automatic keyframe placement. + cfg.kf_min_dist = cfg.kf_max_dist = 3000; + + // Default setting for bitrate: used in special case of 1 layer (case 0). + cfg.rc_target_bitrate = cfg.ts_target_bitrate[0]; + + set_temporal_layer_pattern(layering_mode, + &cfg, + layer_flags, + &flag_periodicity); + + // Open input file. + input_ctx.filename = argv[1]; + if (!(input_ctx.file = fopen(input_ctx.filename, "rb"))) { + die("Failed to open %s for reading", argv[1]); + } + + // Open an output file for each stream. + for (i = 0; i < cfg.ts_number_layers; ++i) { + char file_name[512]; + snprintf(file_name, sizeof(file_name), "%s_%d.ivf", argv[2], i); + if (!(outfile[i] = fopen(file_name, "wb"))) + die("Failed to open %s for writing", file_name); + ivf_write_file_header(outfile[i], &cfg, fourcc, 0); + } + // No spatial layers in this encoder. + cfg.ss_number_layers = 1; + + // Initialize codec. + if (vpx_codec_enc_init(&codec, interface(), &cfg, 0)) + die_codec(&codec, "Failed to initialize encoder"); + + vpx_codec_control(&codec, VP8E_SET_CPUUSED, -6); + vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, 1); + if (strncmp(codec_type, "vp9", 3) == 0) { + vpx_codec_control(&codec, VP8E_SET_CPUUSED, 3); + vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, 0); + if (vpx_codec_control(&codec, VP9E_SET_SVC, 1)) { + die_codec(&codec, "Failed to set SVC"); + } + } + vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); + vpx_codec_control(&codec, VP8E_SET_TOKEN_PARTITIONS, 1); + max_intra_size_pct = (int) (((double)cfg.rc_buf_optimal_sz * 0.5) + * ((double) cfg.g_timebase.den / cfg.g_timebase.num) / 10.0); + vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, max_intra_size_pct); + + frame_avail = 1; + while (frame_avail || got_data) { + vpx_codec_iter_t iter = NULL; + const vpx_codec_cx_pkt_t *pkt; + // Update the temporal layer_id. No spatial layers in this test. + layer_id.spatial_layer_id = 0; + layer_id.temporal_layer_id = + cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity]; + vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id); + flags = layer_flags[frame_cnt % flag_periodicity]; + frame_avail = !read_yuv_frame(&input_ctx, &raw); + if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, pts, 1, flags, + VPX_DL_REALTIME)) { + die_codec(&codec, "Failed to encode frame"); + } + // Reset KF flag. + if (layering_mode != 7) { + layer_flags[0] &= ~VPX_EFLAG_FORCE_KF; + } + got_data = 0; + while ( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) { + got_data = 1; + switch (pkt->kind) { + case VPX_CODEC_CX_FRAME_PKT: + for (i = cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity]; + i < cfg.ts_number_layers; ++i) { + ivf_write_frame_header(outfile[i], pts, pkt->data.frame.sz); + (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, + outfile[i]); + ++frames_in_layer[i]; + } + break; + default: + break; + } + } + ++frame_cnt; + pts += frame_duration; + } + fclose(input_ctx.file); + printf("Processed %d frames: \n", frame_cnt-1); + if (vpx_codec_destroy(&codec)) { + die_codec(&codec, "Failed to destroy codec"); + } + // Try to rewrite the output file headers with the actual frame count. + for (i = 0; i < cfg.ts_number_layers; ++i) { + if (!fseek(outfile[i], 0, SEEK_SET)) + ivf_write_file_header(outfile[i], &cfg, fourcc, frame_cnt); + fclose(outfile[i]); + } + return EXIT_SUCCESS; +} diff --git a/test/datarate_test.cc b/test/datarate_test.cc index db7dfdb53..4bcb0b2fd 100644 --- a/test/datarate_test.cc +++ b/test/datarate_test.cc @@ -200,21 +200,102 @@ class DatarateTestVP9 : public ::libvpx_test::EncoderTest, frame_number_ = 0; first_drop_ = 0; num_drops_ = 0; - bits_total_ = 0; - duration_ = 0.0; + // For testing up to 3 layers. + for (int i = 0; i < 3; ++i) { + bits_total_[i] = 0; + } + } + + // + // Frame flags and layer id for temporal layers. + // + + // For two layers, test pattern is: + // 1 3 + // 0 2 ..... + // For three layers, test pattern is: + // 1 3 5 7 + // 2 6 + // 0 4 .... + // LAST is always update on base/layer 0, GOLDEN is updated on layer 1. + // For this 3 layer example, the 2nd enhancement layer (layer 2) does not + // update any reference frames. + int SetFrameFlags(int frame_num, int num_temp_layers) { + int frame_flags = 0; + if (num_temp_layers == 2) { + if (frame_num % 2 == 0) { + // Layer 0: predict from L and ARF, update L. + frame_flags = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | + VP8_EFLAG_NO_UPD_ARF; + } else { + // Layer 1: predict from L, G and ARF, and update G. + frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | + VP8_EFLAG_NO_UPD_ENTROPY; + } + } else if (num_temp_layers == 3) { + if (frame_num % 4 == 0) { + // Layer 0: predict from L and ARF; update L. + frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_REF_GF; + } else if ((frame_num - 2) % 4 == 0) { + // Layer 1: predict from L, G, ARF; update G. + frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; + } else if ((frame_num - 1) % 2 == 0) { + // Layer 2: predict from L, G, ARF; update none. + frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | + VP8_EFLAG_NO_UPD_LAST; + } + } + return frame_flags; + } + + int SetLayerId(int frame_num, int num_temp_layers) { + int layer_id = 0; + if (num_temp_layers == 2) { + if (frame_num % 2 == 0) { + layer_id = 0; + } else { + layer_id = 1; + } + } else if (num_temp_layers == 3) { + if (frame_num % 4 == 0) { + layer_id = 0; + } else if ((frame_num - 2) % 4 == 0) { + layer_id = 1; + } else if ((frame_num - 1) % 2 == 0) { + layer_id = 2; + } + } + return layer_id; } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, - ::libvpx_test::Encoder *encoder) { + ::libvpx_test::Encoder *encoder) { if (video->frame() == 1) { encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); } + if (cfg_.ts_number_layers > 1) { + if (video->frame() == 1) { + encoder->Control(VP9E_SET_SVC, 1); + } + vpx_svc_layer_id_t layer_id = {0, 0}; + layer_id.spatial_layer_id = 0; + frame_flags_ = SetFrameFlags(video->frame(), cfg_.ts_number_layers); + layer_id.temporal_layer_id = SetLayerId(video->frame(), + cfg_.ts_number_layers); + if (video->frame() > 0) { + encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id); + } + } const vpx_rational_t tb = video->timebase(); timebase_ = static_cast(tb.num) / tb.den; duration_ = 0; } + virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { + int layer = SetLayerId(frame_number_, cfg_.ts_number_layers); + // Time since last timestamp = duration. vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_; @@ -227,7 +308,12 @@ class DatarateTestVP9 : public ::libvpx_test::EncoderTest, << pkt->data.frame.pts; const size_t frame_size_in_bits = pkt->data.frame.sz * 8; - bits_total_ += frame_size_in_bits; + + // Update the total encoded bits. For temporal layers, update the cumulative + // encoded bits per layer. + for (int i = layer; i < static_cast(cfg_.ts_number_layers); ++i) { + bits_total_[i] += frame_size_in_bits; + } // If first drop not set and we have a drop set it to this time. if (!first_drop_ && duration > 1) @@ -244,19 +330,22 @@ class DatarateTestVP9 : public ::libvpx_test::EncoderTest, } virtual void EndPassHook(void) { - if (bits_total_) { + for (int layer = 0; layer < static_cast(cfg_.ts_number_layers); + ++layer) { duration_ = (last_pts_ + 1) * timebase_; - // Effective file datarate: - effective_datarate_ = ((bits_total_) / 1000.0) / duration_; + if (bits_total_[layer]) { + // Effective file datarate: + effective_datarate_[layer] = (bits_total_[layer] / 1000.0) / duration_; + } } } vpx_codec_pts_t last_pts_; double timebase_; int frame_number_; - int64_t bits_total_; + int64_t bits_total_[3]; double duration_; - double effective_datarate_; + double effective_datarate_[3]; int set_cpu_used_; int64_t bits_in_buffer_model_; vpx_codec_pts_t first_drop_; @@ -272,6 +361,7 @@ TEST_P(DatarateTestVP9, BasicRateTargeting) { cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 140); @@ -279,12 +369,10 @@ TEST_P(DatarateTestVP9, BasicRateTargeting) { cfg_.rc_target_bitrate = i; ResetModel(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(static_cast(cfg_.rc_target_bitrate), - effective_datarate_ * 0.85) - << " The datarate for the file exceeds the target by too much!"; - ASSERT_LE(static_cast(cfg_.rc_target_bitrate), - effective_datarate_ * 1.15) - << " The datarate for the file missed the target!"; + ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) + << " The datarate for the file is greater than target by too much!"; } } @@ -309,10 +397,10 @@ TEST_P(DatarateTestVP9, BasicRateTargeting444) { ResetModel(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(static_cast(cfg_.rc_target_bitrate), - effective_datarate_ * 0.85) + effective_datarate_[0] * 0.85) << " The datarate for the file exceeds the target by too much!"; ASSERT_LE(static_cast(cfg_.rc_target_bitrate), - effective_datarate_ * 1.15) + effective_datarate_[0] * 1.15) << " The datarate for the file missed the target!" << cfg_.rc_target_bitrate << " "<< effective_datarate_; } @@ -334,6 +422,7 @@ TEST_P(DatarateTestVP9, ChangingDropFrameThresh) { cfg_.rc_max_quantizer = 50; cfg_.rc_end_usage = VPX_CBR; cfg_.rc_target_bitrate = 200; + cfg_.g_lag_in_frames = 0; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 140); @@ -345,10 +434,10 @@ TEST_P(DatarateTestVP9, ChangingDropFrameThresh) { cfg_.rc_dropframe_thresh = i; ResetModel(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); - ASSERT_GE(effective_datarate_, cfg_.rc_target_bitrate * 0.85) - << " The datarate for the file is lower than target by too much!"; - ASSERT_LE(effective_datarate_, cfg_.rc_target_bitrate * 1.15) - << " The datarate for the file is greater than target by too much!"; + ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) + << " The datarate for the file is lower than target by too much!"; + ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) + << " The datarate for the file is greater than target by too much!"; ASSERT_LE(first_drop_, last_drop) << " The first dropped frame for drop_thresh " << i << " > first dropped frame for drop_thresh " @@ -362,6 +451,81 @@ TEST_P(DatarateTestVP9, ChangingDropFrameThresh) { } } +// Check basic rate targeting for 2 temporal layers. +TEST_P(DatarateTestVP9, BasicRateTargeting2TemporalLayers) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + + // 2 Temporal layers, no spatial layers: Framerate decimation (2, 1). + cfg_.ss_number_layers = 1; + cfg_.ts_number_layers = 2; + cfg_.ts_rate_decimator[0] = 2; + cfg_.ts_rate_decimator[1] = 1; + + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 200); + for (int i = 200; i <= 800; i += 200) { + cfg_.rc_target_bitrate = i; + ResetModel(); + // 60-40 bitrate allocation for 2 temporal layers. + cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100; + cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int j = 0; j < static_cast(cfg_.ts_number_layers); ++j) { + ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.85) + << " The datarate for the file is lower than target by too much, " + "for layer: " << j; + ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.15) + << " The datarate for the file is greater than target by too much, " + "for layer: " << j; + } + } +} + +// Check basic rate targeting for 3 temporal layers. +TEST_P(DatarateTestVP9, BasicRateTargeting3TemporalLayers) { + cfg_.rc_buf_initial_sz = 500; + cfg_.rc_buf_optimal_sz = 500; + cfg_.rc_buf_sz = 1000; + cfg_.rc_dropframe_thresh = 1; + cfg_.rc_min_quantizer = 0; + cfg_.rc_max_quantizer = 63; + cfg_.rc_end_usage = VPX_CBR; + cfg_.g_lag_in_frames = 0; + + // 3 Temporal layers, no spatial layers: Framerate decimation (4, 2, 1). + cfg_.ss_number_layers = 1; + cfg_.ts_number_layers = 3; + cfg_.ts_rate_decimator[0] = 4; + cfg_.ts_rate_decimator[1] = 2; + cfg_.ts_rate_decimator[2] = 1; + + ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, + 30, 1, 0, 200); + for (int i = 200; i <= 800; i += 200) { + cfg_.rc_target_bitrate = i; + ResetModel(); + // 40-20-40 bitrate allocation for 3 temporal layers. + cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100; + cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100; + cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate; + ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); + for (int j = 0; j < static_cast(cfg_.ts_number_layers); ++j) { + ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.85) + << " The datarate for the file is lower than target by too much, " + "for layer: " << j; + ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.15) + << " The datarate for the file is greater than target by too much, " + "for layer: " << j; + } + } +} VP8_INSTANTIATE_TEST_CASE(DatarateTest, ALL_TEST_MODES); VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9, ::testing::Values(::libvpx_test::kOnePassGood), diff --git a/test/encode_test_driver.h b/test/encode_test_driver.h index 4dabcd5b4..8017a2a06 100644 --- a/test/encode_test_driver.h +++ b/test/encode_test_driver.h @@ -123,6 +123,11 @@ class Encoder { ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } + void Control(int ctrl_id, struct vpx_svc_layer_id *arg) { + const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); + ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); + } + void set_deadline(unsigned long deadline) { deadline_ = deadline; } diff --git a/vp8_scalable_patterns.c b/vp8_scalable_patterns.c deleted file mode 100644 index 870edf161..000000000 --- a/vp8_scalable_patterns.c +++ /dev/null @@ -1,694 +0,0 @@ -/* - * Copyright (c) 2012 The WebM project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - - -/* - * This is an example demonstrating how to implement a multi-layer VP8 - * encoding scheme based on temporal scalability for video applications - * that benefit from a scalable bitstream. - */ -#include -#include -#include -#include -#define VPX_CODEC_DISABLE_COMPAT 1 -#include "vpx/vpx_encoder.h" -#include "vpx/vp8cx.h" -#define interface (vpx_codec_vp8_cx()) -#define fourcc 0x30385056 - -#define IVF_FILE_HDR_SZ (32) -#define IVF_FRAME_HDR_SZ (12) - -static void mem_put_le16(char *mem, unsigned int val) { - mem[0] = val; - mem[1] = val>>8; -} - -static void mem_put_le32(char *mem, unsigned int val) { - mem[0] = val; - mem[1] = val>>8; - mem[2] = val>>16; - mem[3] = val>>24; -} - -static void die(const char *fmt, ...) { - va_list ap; - - va_start(ap, fmt); - vprintf(fmt, ap); - if(fmt[strlen(fmt)-1] != '\n') - printf("\n"); - exit(EXIT_FAILURE); -} - -static void die_codec(vpx_codec_ctx_t *ctx, const char *s) { - const char *detail = vpx_codec_error_detail(ctx); - - printf("%s: %s\n", s, vpx_codec_error(ctx)); - if(detail) - printf(" %s\n",detail); - exit(EXIT_FAILURE); -} - -static int read_frame(FILE *f, vpx_image_t *img) { - size_t nbytes, to_read; - int res = 1; - - to_read = img->w*img->h*3/2; - nbytes = fread(img->planes[0], 1, to_read, f); - if(nbytes != to_read) { - res = 0; - if(nbytes > 0) - printf("Warning: Read partial frame. Check your width & height!\n"); - } - return res; -} - -static void write_ivf_file_header(FILE *outfile, - const vpx_codec_enc_cfg_t *cfg, - int frame_cnt) { - char header[32]; - - if(cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS) - return; - header[0] = 'D'; - header[1] = 'K'; - header[2] = 'I'; - header[3] = 'F'; - mem_put_le16(header+4, 0); /* version */ - mem_put_le16(header+6, 32); /* headersize */ - mem_put_le32(header+8, fourcc); /* headersize */ - mem_put_le16(header+12, cfg->g_w); /* width */ - mem_put_le16(header+14, cfg->g_h); /* height */ - mem_put_le32(header+16, cfg->g_timebase.den); /* rate */ - mem_put_le32(header+20, cfg->g_timebase.num); /* scale */ - mem_put_le32(header+24, frame_cnt); /* length */ - mem_put_le32(header+28, 0); /* unused */ - - (void) fwrite(header, 1, 32, outfile); -} - - -static void write_ivf_frame_header(FILE *outfile, - const vpx_codec_cx_pkt_t *pkt) -{ - char header[12]; - vpx_codec_pts_t pts; - - if(pkt->kind != VPX_CODEC_CX_FRAME_PKT) - return; - - pts = pkt->data.frame.pts; - mem_put_le32(header, pkt->data.frame.sz); - mem_put_le32(header+4, pts&0xFFFFFFFF); - mem_put_le32(header+8, pts >> 32); - - (void) fwrite(header, 1, 12, outfile); -} - -static int mode_to_num_layers[12] = {1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3}; - -int main(int argc, char **argv) { - FILE *infile, *outfile[VPX_TS_MAX_LAYERS]; - vpx_codec_ctx_t codec; - vpx_codec_enc_cfg_t cfg; - int frame_cnt = 0; - vpx_image_t raw; - vpx_codec_err_t res; - unsigned int width; - unsigned int height; - int frame_avail; - int got_data; - int flags = 0; - int i; - int pts = 0; /* PTS starts at 0 */ - int frame_duration = 1; /* 1 timebase tick per frame */ - - int layering_mode = 0; - int frames_in_layer[VPX_TS_MAX_LAYERS] = {0}; - int layer_flags[VPX_TS_MAX_PERIODICITY] = {0}; - int flag_periodicity; - int max_intra_size_pct; - - /* Check usage and arguments */ - if (argc < 9) - die("Usage: %s " - " ... \n", argv[0]); - - width = strtol (argv[3], NULL, 0); - height = strtol (argv[4], NULL, 0); - if (width < 16 || width%2 || height <16 || height%2) - die ("Invalid resolution: %d x %d", width, height); - - if (!sscanf(argv[7], "%d", &layering_mode)) - die ("Invalid mode %s", argv[7]); - if (layering_mode<0 || layering_mode>11) - die ("Invalid mode (0..11) %s", argv[7]); - - if (argc != 8+mode_to_num_layers[layering_mode]) - die ("Invalid number of arguments"); - - if (!vpx_img_alloc (&raw, VPX_IMG_FMT_I420, width, height, 32)) - die ("Failed to allocate image", width, height); - - printf("Using %s\n",vpx_codec_iface_name(interface)); - - /* Populate encoder configuration */ - res = vpx_codec_enc_config_default(interface, &cfg, 0); - if(res) { - printf("Failed to get config: %s\n", vpx_codec_err_to_string(res)); - return EXIT_FAILURE; - } - - /* Update the default configuration with our settings */ - cfg.g_w = width; - cfg.g_h = height; - - /* Timebase format e.g. 30fps: numerator=1, demoninator=30 */ - if (!sscanf (argv[5], "%d", &cfg.g_timebase.num )) - die ("Invalid timebase numerator %s", argv[5]); - if (!sscanf (argv[6], "%d", &cfg.g_timebase.den )) - die ("Invalid timebase denominator %s", argv[6]); - - for (i=8; i<8+mode_to_num_layers[layering_mode]; i++) - if (!sscanf(argv[i], "%ud", &cfg.ts_target_bitrate[i-8])) - die ("Invalid data rate %s", argv[i]); - - /* Real time parameters */ - cfg.rc_dropframe_thresh = 0; - cfg.rc_end_usage = VPX_CBR; - cfg.rc_resize_allowed = 0; - cfg.rc_min_quantizer = 2; - cfg.rc_max_quantizer = 56; - cfg.rc_undershoot_pct = 100; - cfg.rc_overshoot_pct = 15; - cfg.rc_buf_initial_sz = 500; - cfg.rc_buf_optimal_sz = 600; - cfg.rc_buf_sz = 1000; - - /* Enable error resilient mode */ - cfg.g_error_resilient = 1; - cfg.g_lag_in_frames = 0; - cfg.kf_mode = VPX_KF_DISABLED; - - /* Disable automatic keyframe placement */ - cfg.kf_min_dist = cfg.kf_max_dist = 3000; - - /* Default setting for bitrate: used in special case of 1 layer (case 0). */ - cfg.rc_target_bitrate = cfg.ts_target_bitrate[0]; - - /* Temporal scaling parameters: */ - /* NOTE: The 3 prediction frames cannot be used interchangeably due to - * differences in the way they are handled throughout the code. The - * frames should be allocated to layers in the order LAST, GF, ARF. - * Other combinations work, but may produce slightly inferior results. - */ - switch (layering_mode) - { - case 0: - { - /* 1-layer */ - int ids[1] = {0}; - cfg.ts_number_layers = 1; - cfg.ts_periodicity = 1; - cfg.ts_rate_decimator[0] = 1; - memcpy(cfg.ts_layer_id, ids, sizeof(ids)); - - flag_periodicity = cfg.ts_periodicity; - - // Update L only. - layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; - break; - } - case 1: - { - /* 2-layers, 2-frame period */ - int ids[2] = {0,1}; - cfg.ts_number_layers = 2; - cfg.ts_periodicity = 2; - cfg.ts_rate_decimator[0] = 2; - cfg.ts_rate_decimator[1] = 1; - memcpy(cfg.ts_layer_id, ids, sizeof(ids)); - - flag_periodicity = cfg.ts_periodicity; -#if 1 - /* 0=L, 1=GF, Intra-layer prediction enabled */ - layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; - layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | - VP8_EFLAG_NO_REF_ARF; -#else - /* 0=L, 1=GF, Intra-layer prediction disabled */ - layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; - layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | - VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_LAST; -#endif - break; - } - - case 2: - { - /* 2-layers, 3-frame period */ - int ids[3] = {0,1,1}; - cfg.ts_number_layers = 2; - cfg.ts_periodicity = 3; - cfg.ts_rate_decimator[0] = 3; - cfg.ts_rate_decimator[1] = 1; - memcpy(cfg.ts_layer_id, ids, sizeof(ids)); - - flag_periodicity = cfg.ts_periodicity; - - /* 0=L, 1=GF, Intra-layer prediction enabled */ - layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; - layer_flags[1] = - layer_flags[2] = VP8_EFLAG_NO_REF_GF | - VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_UPD_LAST; - break; - } - - case 3: - { - /* 3-layers, 6-frame period */ - int ids[6] = {0,2,2,1,2,2}; - cfg.ts_number_layers = 3; - cfg.ts_periodicity = 6; - cfg.ts_rate_decimator[0] = 6; - cfg.ts_rate_decimator[1] = 3; - cfg.ts_rate_decimator[2] = 1; - memcpy(cfg.ts_layer_id, ids, sizeof(ids)); - - flag_periodicity = cfg.ts_periodicity; - - /* 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled */ - layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; - layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_UPD_LAST; - layer_flags[1] = - layer_flags[2] = - layer_flags[4] = - layer_flags[5] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST; - break; - } - - case 4: - { - /* 3-layers, 4-frame period */ - int ids[4] = {0,2,1,2}; - cfg.ts_number_layers = 3; - cfg.ts_periodicity = 4; - cfg.ts_rate_decimator[0] = 4; - cfg.ts_rate_decimator[1] = 2; - cfg.ts_rate_decimator[2] = 1; - memcpy(cfg.ts_layer_id, ids, sizeof(ids)); - - flag_periodicity = cfg.ts_periodicity; - - /* 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled */ - layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; - layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_UPD_LAST; - layer_flags[1] = - layer_flags[3] = VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | - VP8_EFLAG_NO_UPD_ARF; - break; - } - - case 5: - { - /* 3-layers, 4-frame period */ - int ids[4] = {0,2,1,2}; - cfg.ts_number_layers = 3; - cfg.ts_periodicity = 4; - cfg.ts_rate_decimator[0] = 4; - cfg.ts_rate_decimator[1] = 2; - cfg.ts_rate_decimator[2] = 1; - memcpy(cfg.ts_layer_id, ids, sizeof(ids)); - - flag_periodicity = cfg.ts_periodicity; - - /* 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1, - * disabled in layer 2 - */ - layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; - layer_flags[2] = VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; - layer_flags[1] = - layer_flags[3] = VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | - VP8_EFLAG_NO_UPD_ARF; - break; - } - - case 6: - { - /* 3-layers, 4-frame period */ - int ids[4] = {0,2,1,2}; - cfg.ts_number_layers = 3; - cfg.ts_periodicity = 4; - cfg.ts_rate_decimator[0] = 4; - cfg.ts_rate_decimator[1] = 2; - cfg.ts_rate_decimator[2] = 1; - memcpy(cfg.ts_layer_id, ids, sizeof(ids)); - - flag_periodicity = cfg.ts_periodicity; - - /* 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled */ - layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; - layer_flags[2] = VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; - layer_flags[1] = - layer_flags[3] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; - break; - } - - case 7: - { - /* NOTE: Probably of academic interest only */ - - /* 5-layers, 16-frame period */ - int ids[16] = {0,4,3,4,2,4,3,4,1,4,3,4,2,4,3,4}; - cfg.ts_number_layers = 5; - cfg.ts_periodicity = 16; - cfg.ts_rate_decimator[0] = 16; - cfg.ts_rate_decimator[1] = 8; - cfg.ts_rate_decimator[2] = 4; - cfg.ts_rate_decimator[3] = 2; - cfg.ts_rate_decimator[4] = 1; - memcpy(cfg.ts_layer_id, ids, sizeof(ids)); - - flag_periodicity = cfg.ts_periodicity; - - layer_flags[0] = VPX_EFLAG_FORCE_KF; - layer_flags[1] = - layer_flags[3] = - layer_flags[5] = - layer_flags[7] = - layer_flags[9] = - layer_flags[11] = - layer_flags[13] = - layer_flags[15] = VP8_EFLAG_NO_UPD_LAST | - VP8_EFLAG_NO_UPD_GF | - VP8_EFLAG_NO_UPD_ARF; - layer_flags[2] = - layer_flags[6] = - layer_flags[10] = - layer_flags[14] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF; - layer_flags[4] = - layer_flags[12] = VP8_EFLAG_NO_REF_LAST | - VP8_EFLAG_NO_UPD_ARF; - layer_flags[8] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF; - break; - } - - case 8: - { - /* 2-layers, with sync point at first frame of layer 1. */ - int ids[2] = {0,1}; - cfg.ts_number_layers = 2; - cfg.ts_periodicity = 2; - cfg.ts_rate_decimator[0] = 2; - cfg.ts_rate_decimator[1] = 1; - memcpy(cfg.ts_layer_id, ids, sizeof(ids)); - - flag_periodicity = 8; - - /* 0=L, 1=GF */ - // ARF is used as predictor for all frames, and is only updated on - // key frame. Sync point every 8 frames. - - // Layer 0: predict from L and ARF, update L and G. - layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_REF_GF | - VP8_EFLAG_NO_UPD_ARF; - - // Layer 1: sync point: predict from L and ARF, and update G. - layer_flags[1] = VP8_EFLAG_NO_REF_GF | - VP8_EFLAG_NO_UPD_LAST | - VP8_EFLAG_NO_UPD_ARF; - - // Layer 0, predict from L and ARF, update L. - layer_flags[2] = VP8_EFLAG_NO_REF_GF | - VP8_EFLAG_NO_UPD_GF | - VP8_EFLAG_NO_UPD_ARF; - - // Layer 1: predict from L, G and ARF, and update G. - layer_flags[3] = VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_UPD_LAST | - VP8_EFLAG_NO_UPD_ENTROPY; - - // Layer 0 - layer_flags[4] = layer_flags[2]; - - // Layer 1 - layer_flags[5] = layer_flags[3]; - - // Layer 0 - layer_flags[6] = layer_flags[4]; - - // Layer 1 - layer_flags[7] = layer_flags[5]; - break; - } - - case 9: - { - /* 3-layers */ - // Sync points for layer 1 and 2 every 8 frames. - - int ids[4] = {0,2,1,2}; - cfg.ts_number_layers = 3; - cfg.ts_periodicity = 4; - cfg.ts_rate_decimator[0] = 4; - cfg.ts_rate_decimator[1] = 2; - cfg.ts_rate_decimator[2] = 1; - memcpy(cfg.ts_layer_id, ids, sizeof(ids)); - - flag_periodicity = 8; - - /* 0=L, 1=GF, 2=ARF */ - layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; - layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; - layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; - layer_flags[3] = - layer_flags[5] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; - layer_flags[4] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; - layer_flags[6] = VP8_EFLAG_NO_REF_ARF | - VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; - layer_flags[7] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | - VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_UPD_ENTROPY; - break; - } - case 10: - { - // 3-layers structure where ARF is used as predictor for all frames, - // and is only updated on key frame. - // Sync points for layer 1 and 2 every 8 frames. - - int ids[4] = {0,2,1,2}; - cfg.ts_number_layers = 3; - cfg.ts_periodicity = 4; - cfg.ts_rate_decimator[0] = 4; - cfg.ts_rate_decimator[1] = 2; - cfg.ts_rate_decimator[2] = 1; - memcpy(cfg.ts_layer_id, ids, sizeof(ids)); - - flag_periodicity = 8; - - /* 0=L, 1=GF, 2=ARF */ - - // Layer 0: predict from L and ARF; update L and G. - layer_flags[0] = VPX_EFLAG_FORCE_KF | - VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_REF_GF; - - // Layer 2: sync point: predict from L and ARF; update none. - layer_flags[1] = VP8_EFLAG_NO_REF_GF | - VP8_EFLAG_NO_UPD_GF | - VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_UPD_LAST | - VP8_EFLAG_NO_UPD_ENTROPY; - - // Layer 1: sync point: predict from L and ARF; update G. - layer_flags[2] = VP8_EFLAG_NO_REF_GF | - VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_UPD_LAST; - - // Layer 2: predict from L, G, ARF; update none. - layer_flags[3] = VP8_EFLAG_NO_UPD_GF | - VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_UPD_LAST | - VP8_EFLAG_NO_UPD_ENTROPY; - - // Layer 0: predict from L and ARF; update L. - layer_flags[4] = VP8_EFLAG_NO_UPD_GF | - VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_REF_GF; - - // Layer 2: predict from L, G, ARF; update none. - layer_flags[5] = layer_flags[3]; - - // Layer 1: predict from L, G, ARF; update G. - layer_flags[6] = VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_UPD_LAST; - - // Layer 2: predict from L, G, ARF; update none. - layer_flags[7] = layer_flags[3]; - break; - } - case 11: - default: - { - // 3-layers structure as in case 10, but no sync/refresh points for - // layer 1 and 2. - - int ids[4] = {0,2,1,2}; - cfg.ts_number_layers = 3; - cfg.ts_periodicity = 4; - cfg.ts_rate_decimator[0] = 4; - cfg.ts_rate_decimator[1] = 2; - cfg.ts_rate_decimator[2] = 1; - memcpy(cfg.ts_layer_id, ids, sizeof(ids)); - - flag_periodicity = 8; - - /* 0=L, 1=GF, 2=ARF */ - - // Layer 0: predict from L and ARF; update L. - layer_flags[0] = VP8_EFLAG_NO_UPD_GF | - VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_REF_GF; - layer_flags[4] = layer_flags[0]; - - // Layer 1: predict from L, G, ARF; update G. - layer_flags[2] = VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_UPD_LAST; - layer_flags[6] = layer_flags[2]; - - // Layer 2: predict from L, G, ARF; update none. - layer_flags[1] = VP8_EFLAG_NO_UPD_GF | - VP8_EFLAG_NO_UPD_ARF | - VP8_EFLAG_NO_UPD_LAST | - VP8_EFLAG_NO_UPD_ENTROPY; - layer_flags[3] = layer_flags[1]; - layer_flags[5] = layer_flags[1]; - layer_flags[7] = layer_flags[1]; - break; - } - } - - /* Open input file */ - if(!(infile = fopen(argv[1], "rb"))) - die("Failed to open %s for reading", argv[1]); - - /* Open an output file for each stream */ - for (i=0; i<(int)cfg.ts_number_layers; i++) - { - char file_name[512]; - sprintf (file_name, "%s_%d.ivf", argv[2], i); - if (!(outfile[i] = fopen(file_name, "wb"))) - die("Failed to open %s for writing", file_name); - write_ivf_file_header(outfile[i], &cfg, 0); - } - - /* Initialize codec */ - if (vpx_codec_enc_init (&codec, interface, &cfg, 0)) - die_codec (&codec, "Failed to initialize encoder"); - - /* Cap CPU & first I-frame size */ - vpx_codec_control (&codec, VP8E_SET_CPUUSED, -6); - vpx_codec_control (&codec, VP8E_SET_STATIC_THRESHOLD, 1); - vpx_codec_control (&codec, VP8E_SET_NOISE_SENSITIVITY, 1); - vpx_codec_control(&codec, VP8E_SET_TOKEN_PARTITIONS, 1); - - max_intra_size_pct = (int) (((double)cfg.rc_buf_optimal_sz * 0.5) - * ((double) cfg.g_timebase.den / cfg.g_timebase.num) - / 10.0); - /* printf ("max_intra_size_pct=%d\n", max_intra_size_pct); */ - - vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, - max_intra_size_pct); - - frame_avail = 1; - while (frame_avail || got_data) { - vpx_codec_iter_t iter = NULL; - const vpx_codec_cx_pkt_t *pkt; - - flags = layer_flags[frame_cnt % flag_periodicity]; - - frame_avail = read_frame(infile, &raw); - if (vpx_codec_encode(&codec, frame_avail? &raw : NULL, pts, - 1, flags, VPX_DL_REALTIME)) - die_codec(&codec, "Failed to encode frame"); - - /* Reset KF flag */ - if (layering_mode != 7) - layer_flags[0] &= ~VPX_EFLAG_FORCE_KF; - - got_data = 0; - while ( (pkt = vpx_codec_get_cx_data(&codec, &iter)) ) { - got_data = 1; - switch (pkt->kind) { - case VPX_CODEC_CX_FRAME_PKT: - for (i=cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity]; - i<(int)cfg.ts_number_layers; i++) - { - write_ivf_frame_header(outfile[i], pkt); - (void) fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, - outfile[i]); - frames_in_layer[i]++; - } - break; - default: - break; - } - } - frame_cnt++; - pts += frame_duration; - } - fclose (infile); - - printf ("Processed %d frames.\n",frame_cnt-1); - if (vpx_codec_destroy(&codec)) - die_codec (&codec, "Failed to destroy codec"); - - /* Try to rewrite the output file headers with the actual frame count */ - for (i=0; i<(int)cfg.ts_number_layers; i++) - { - if (!fseek(outfile[i], 0, SEEK_SET)) - write_ivf_file_header (outfile[i], &cfg, frames_in_layer[i]); - fclose (outfile[i]); - } - - return EXIT_SUCCESS; -} diff --git a/vp9/common/vp9_onyx.h b/vp9/common/vp9_onyx.h index 564e4195f..ac39a98fd 100644 --- a/vp9/common/vp9_onyx.h +++ b/vp9/common/vp9_onyx.h @@ -147,8 +147,12 @@ extern "C" { // END DATARATE CONTROL OPTIONS // ---------------------------------------------------------------- - // Spatial scalability - int ss_number_layers; + // Spatial and temporal scalability. + int ss_number_layers; // Number of spatial layers. + int ts_number_layers; // Number of temporal layers. + // Bitrate allocation (CBR mode) and framerate factor, for temporal layers. + int ts_target_bitrate[VPX_TS_MAX_LAYERS]; + int ts_rate_decimator[VPX_TS_MAX_LAYERS]; // these parameters aren't to be used in final build don't use!!! int play_alternate; diff --git a/vp9/encoder/vp9_onyx_if.c b/vp9/encoder/vp9_onyx_if.c index b4972be64..92344f6a6 100644 --- a/vp9/encoder/vp9_onyx_if.c +++ b/vp9/encoder/vp9_onyx_if.c @@ -1144,6 +1144,109 @@ static int64_t rescale(int val, int64_t num, int denom) { return (llval * llnum / llden); } +// Initialize layer context data from init_config(). +static void init_layer_context(VP9_COMP *const cpi) { + const VP9_CONFIG *const oxcf = &cpi->oxcf; + int temporal_layer = 0; + cpi->svc.spatial_layer_id = 0; + cpi->svc.temporal_layer_id = 0; + for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers; + ++temporal_layer) { + LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; + RATE_CONTROL *const lrc = &lc->rc; + lrc->active_worst_quality = q_trans[oxcf->worst_allowed_q]; + lrc->avg_frame_qindex[INTER_FRAME] = q_trans[oxcf->worst_allowed_q]; + lrc->last_q[INTER_FRAME] = q_trans[oxcf->worst_allowed_q]; + lrc->ni_av_qi = lrc->active_worst_quality; + lrc->total_actual_bits = 0; + lrc->total_target_vs_actual = 0; + lrc->ni_tot_qi = 0; + lrc->tot_q = 0.0; + lrc->ni_frames = 0; + lrc->rate_correction_factor = 1.0; + lrc->key_frame_rate_correction_factor = 1.0; + lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] * + 1000; + lrc->buffer_level = rescale((int)(oxcf->starting_buffer_level), + lc->target_bandwidth, 1000); + lrc->bits_off_target = lrc->buffer_level; + } +} + +// Update the layer context from a change_config() call. +static void update_layer_context_change_config(VP9_COMP *const cpi, + const int target_bandwidth) { + const VP9_CONFIG *const oxcf = &cpi->oxcf; + const RATE_CONTROL *const rc = &cpi->rc; + int temporal_layer = 0; + float bitrate_alloc = 1.0; + for (temporal_layer = 0; temporal_layer < cpi->svc.number_temporal_layers; + ++temporal_layer) { + LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; + RATE_CONTROL *const lrc = &lc->rc; + lc->target_bandwidth = oxcf->ts_target_bitrate[temporal_layer] * 1000; + bitrate_alloc = (float)lc->target_bandwidth / (float)target_bandwidth; + // Update buffer-related quantities. + lc->starting_buffer_level = oxcf->starting_buffer_level * bitrate_alloc; + lc->optimal_buffer_level = oxcf->optimal_buffer_level * bitrate_alloc; + lc->maximum_buffer_size = oxcf->maximum_buffer_size * bitrate_alloc; + lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size); + lrc->buffer_level = MIN(lrc->buffer_level, lc->maximum_buffer_size); + // Update framerate-related quantities. + lc->framerate = oxcf->framerate / oxcf->ts_rate_decimator[temporal_layer]; + lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); + lrc->max_frame_bandwidth = rc->max_frame_bandwidth; + // Update qp-related quantities. + lrc->worst_quality = rc->worst_quality; + lrc->best_quality = rc->best_quality; + lrc->active_worst_quality = rc->active_worst_quality; + } +} + +// Prior to encoding the frame, update framerate-related quantities +// for the current layer. +static void update_layer_framerate(VP9_COMP *const cpi) { + int temporal_layer = cpi->svc.temporal_layer_id; + LAYER_CONTEXT *const lc = &cpi->svc.layer_context[temporal_layer]; + RATE_CONTROL *const lrc = &lc->rc; + lc->framerate = cpi->oxcf.framerate / + cpi->oxcf.ts_rate_decimator[temporal_layer]; + lrc->av_per_frame_bandwidth = (int)(lc->target_bandwidth / + lc->framerate); + lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth; +} + +// Prior to encoding the frame, set the layer context, for the current layer +// to be encoded, to the cpi struct. +static void restore_layer_context(VP9_COMP *const cpi) { + int temporal_layer = cpi->svc.temporal_layer_id; + LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer]; + int frame_since_key = cpi->rc.frames_since_key; + int frame_to_key = cpi->rc.frames_to_key; + cpi->rc = lc->rc; + cpi->oxcf.target_bandwidth = lc->target_bandwidth; + cpi->oxcf.starting_buffer_level = lc->starting_buffer_level; + cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level; + cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size; + cpi->output_framerate = lc->framerate; + // Reset the frames_since_key and frames_to_key counters to their values + // before the layer restore. Keep these defined for the stream (not layer). + cpi->rc.frames_since_key = frame_since_key; + cpi->rc.frames_to_key = frame_to_key; +} + +// Save the layer context after encoding the frame. +static void save_layer_context(VP9_COMP *const cpi) { + int temporal_layer = cpi->svc.temporal_layer_id; + LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer]; + lc->rc = cpi->rc; + lc->target_bandwidth = cpi->oxcf.target_bandwidth; + lc->starting_buffer_level = cpi->oxcf.starting_buffer_level; + lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level; + lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size; + lc->framerate = cpi->output_framerate; +} + static void set_tile_limits(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; @@ -1170,6 +1273,16 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cm->subsampling_y = 0; vp9_alloc_compressor_data(cpi); + // Spatial scalability. + cpi->svc.number_spatial_layers = oxcf->ss_number_layers; + // Temporal scalability. + cpi->svc.number_temporal_layers = oxcf->ts_number_layers; + + if (cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + init_layer_context(cpi); + } + // change includes all joint functionality vp9_change_config(ptr, oxcf); @@ -1210,9 +1323,6 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->gld_fb_idx = 1; cpi->alt_fb_idx = 2; - cpi->current_layer = 0; - cpi->use_svc = 0; - set_tile_limits(cpi); cpi->fixed_divide[0] = 0; @@ -1220,7 +1330,6 @@ static void init_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->fixed_divide[i] = 0x80000 / i; } - void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { VP9_COMP *cpi = (VP9_COMP *)(ptr); VP9_COMMON *const cm = &cpi->common; @@ -1312,10 +1421,10 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { cpi->oxcf.target_bandwidth, 1000); // Under a configuration change, where maximum_buffer_size may change, // keep buffer level clipped to the maximum allowed buffer size. - if (cpi->rc.bits_off_target > cpi->oxcf.maximum_buffer_size) { - cpi->rc.bits_off_target = cpi->oxcf.maximum_buffer_size; - cpi->rc.buffer_level = cpi->rc.bits_off_target; - } + cpi->rc.bits_off_target = MIN(cpi->rc.bits_off_target, + cpi->oxcf.maximum_buffer_size); + cpi->rc.buffer_level = MIN(cpi->rc.buffer_level, + cpi->oxcf.maximum_buffer_size); // Set up frame rate and related parameters rate control values. vp9_new_framerate(cpi, cpi->oxcf.framerate); @@ -1350,6 +1459,11 @@ void vp9_change_config(VP9_PTR ptr, VP9_CONFIG *oxcf) { } update_frame_size(cpi); + if (cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + update_layer_context_change_config(cpi, cpi->oxcf.target_bandwidth); + } + cpi->speed = cpi->oxcf.cpu_used; if (cpi->oxcf.lag_in_frames == 0) { @@ -1573,6 +1687,8 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { vp9_create_common(cm); + cpi->use_svc = 0; + init_config((VP9_PTR)cpi, oxcf); init_pick_mode_context(cpi); @@ -1588,9 +1704,6 @@ VP9_PTR vp9_create_compressor(VP9_CONFIG *oxcf) { cpi->alt_is_last = 0; cpi->gold_is_alt = 0; - // Spatial scalability - cpi->number_spatial_layers = oxcf->ss_number_layers; - // Create the encoder segmentation map and set all entries to 0 CHECK_MEM_ERROR(cm, cpi->segmentation_map, vpx_calloc(cm->mi_rows * cm->mi_cols, 1)); @@ -3504,6 +3617,12 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, adjust_frame_rate(cpi); } + if (cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + update_layer_framerate(cpi); + restore_layer_context(cpi); + } + // start with a 0 size frame *size = 0; @@ -3579,6 +3698,12 @@ int vp9_get_compressed_data(VP9_PTR ptr, unsigned int *frame_flags, cpi->droppable = !frame_is_reference(cpi); } + // Save layer specific state. + if (cpi->svc.number_temporal_layers > 1 && + cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + save_layer_context(cpi); + } + vpx_usec_timer_mark(&cmptimer); cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer); diff --git a/vp9/encoder/vp9_onyx_int.h b/vp9/encoder/vp9_onyx_int.h index d0ca962db..09211aef7 100644 --- a/vp9/encoder/vp9_onyx_int.h +++ b/vp9/encoder/vp9_onyx_int.h @@ -407,6 +407,15 @@ typedef struct { int super_fast_rtc; } SPEED_FEATURES; +typedef struct { + RATE_CONTROL rc; + int target_bandwidth; + int64_t starting_buffer_level; + int64_t optimal_buffer_level; + int64_t maximum_buffer_size; + double framerate; +} LAYER_CONTEXT; + typedef struct VP9_COMP { DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]); @@ -451,9 +460,6 @@ typedef struct VP9_COMP { int gld_fb_idx; int alt_fb_idx; - int current_layer; - int use_svc; - #if CONFIG_MULTIPLE_ARF int alt_ref_fb_idx[REF_FRAMES - 3]; #endif @@ -669,7 +675,18 @@ typedef struct VP9_COMP { int initial_width; int initial_height; - int number_spatial_layers; + int use_svc; + + struct svc { + int spatial_layer_id; + int temporal_layer_id; + int number_spatial_layers; + int number_temporal_layers; + // Layer context used for rate control in CBR mode, only defined for + // temporal layers for now. + LAYER_CONTEXT layer_context[VPX_TS_MAX_LAYERS]; + } svc; + int enable_encode_breakout; // Default value is 1. From first pass stats, // encode_breakout may be disabled. diff --git a/vp9/encoder/vp9_ratectrl.c b/vp9/encoder/vp9_ratectrl.c index abbf39b81..4bff9947b 100644 --- a/vp9/encoder/vp9_ratectrl.c +++ b/vp9/encoder/vp9_ratectrl.c @@ -241,6 +241,26 @@ int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) { return target; } + +// Update the buffer level for higher layers, given the encoded current layer. +static void update_layer_buffer_level(VP9_COMP *const cpi, + int encoded_frame_size) { + int temporal_layer = 0; + int current_temporal_layer = cpi->svc.temporal_layer_id; + for (temporal_layer = current_temporal_layer + 1; + temporal_layer < cpi->svc.number_temporal_layers; ++temporal_layer) { + LAYER_CONTEXT *lc = &cpi->svc.layer_context[temporal_layer]; + RATE_CONTROL *lrc = &lc->rc; + int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate - + encoded_frame_size); + lrc->bits_off_target += bits_off_for_this_layer; + + // Clip buffer level to maximum buffer size for the layer. + lrc->bits_off_target = MIN(lrc->bits_off_target, lc->maximum_buffer_size); + lrc->buffer_level = lrc->bits_off_target; + } +} + // Update the buffer level: leaky bucket model. static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { const VP9_COMMON *const cm = &cpi->common; @@ -255,14 +275,18 @@ static void update_buffer_level(VP9_COMP *cpi, int encoded_frame_size) { } // Clip the buffer level to the maximum specified buffer size. - rc->buffer_level = MIN(rc->bits_off_target, oxcf->maximum_buffer_size); + rc->bits_off_target = MIN(rc->bits_off_target, oxcf->maximum_buffer_size); + rc->buffer_level = rc->bits_off_target; + + if (cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + update_layer_buffer_level(cpi, encoded_frame_size); + } } int vp9_rc_drop_frame(VP9_COMP *cpi) { const VP9_CONFIG *oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; - if (!oxcf->drop_frames_water_mark) { return 0; } else { @@ -273,7 +297,7 @@ int vp9_rc_drop_frame(VP9_COMP *cpi) { // If buffer is below drop_mark, for now just drop every other frame // (starting with the next frame) until it increases back over drop_mark. int drop_mark = (int)(oxcf->drop_frames_water_mark * - oxcf->optimal_buffer_level / 100); + oxcf->optimal_buffer_level / 100); if ((rc->buffer_level > drop_mark) && (rc->decimation_factor > 0)) { --rc->decimation_factor; @@ -301,7 +325,8 @@ static double get_rate_correction_factor(const VP9_COMP *cpi) { if (cpi->common.frame_type == KEY_FRAME) { return cpi->rc.key_frame_rate_correction_factor; } else { - if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) + if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && + !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) return cpi->rc.gf_rate_correction_factor; else return cpi->rc.rate_correction_factor; @@ -312,7 +337,8 @@ static void set_rate_correction_factor(VP9_COMP *cpi, double factor) { if (cpi->common.frame_type == KEY_FRAME) { cpi->rc.key_frame_rate_correction_factor = factor; } else { - if (cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) + if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && + !(cpi->use_svc && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) cpi->rc.gf_rate_correction_factor = factor; else cpi->rc.rate_correction_factor = factor; @@ -538,7 +564,12 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi, if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) { active_best_quality = cpi->cq_target_quality; } else { - active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]]; + // Use the lower of active_worst_quality and recent/average Q. + if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) + active_best_quality = inter_minq[rc->avg_frame_qindex[INTER_FRAME]]; + else + active_best_quality = inter_minq[active_worst_quality]; + // // For the constrained quality mode we don't want // q to fall below the cq level. if ((oxcf->end_usage == USAGE_CONSTRAINED_QUALITY) && @@ -574,7 +605,6 @@ static int rc_pick_q_and_adjust_q_bounds_one_pass(const VP9_COMP *cpi, *top_index = (active_worst_quality + active_best_quality) / 2; } #endif - if (oxcf->end_usage == USAGE_CONSTANT_QUALITY) { q = active_best_quality; // Special case code to try and match quality with forced key frames @@ -1002,21 +1032,6 @@ void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) { cpi->rc.frames_to_key--; } -void vp9_rc_get_svc_params(VP9_COMP *cpi) { - VP9_COMMON *const cm = &cpi->common; - if ((cm->current_video_frame == 0) || - (cm->frame_flags & FRAMEFLAGS_KEY) || - (cpi->oxcf.auto_key && (cpi->rc.frames_since_key % - cpi->key_frame_frequency == 0))) { - cm->frame_type = KEY_FRAME; - cpi->rc.source_alt_ref_active = 0; - } else { - cm->frame_type = INTER_FRAME; - } - cpi->rc.frames_till_gf_update_due = INT_MAX; - cpi->rc.baseline_gf_interval = INT_MAX; -} - static int test_for_kf_one_pass(VP9_COMP *cpi) { // Placeholder function for auto key frame return 0; @@ -1171,6 +1186,32 @@ static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { return target; } +void vp9_rc_get_svc_params(VP9_COMP *cpi) { + VP9_COMMON *const cm = &cpi->common; + int target = cpi->rc.av_per_frame_bandwidth; + if ((cm->current_video_frame == 0) || + (cm->frame_flags & FRAMEFLAGS_KEY) || + (cpi->oxcf.auto_key && (cpi->rc.frames_since_key % + cpi->key_frame_frequency == 0))) { + cm->frame_type = KEY_FRAME; + cpi->rc.source_alt_ref_active = 0; + if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + target = calc_iframe_target_size_one_pass_cbr(cpi); + cpi->rc.active_worst_quality = cpi->rc.worst_quality; + } + } else { + cm->frame_type = INTER_FRAME; + if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { + target = calc_pframe_target_size_one_pass_cbr(cpi); + cpi->rc.active_worst_quality = + calc_active_worst_quality_one_pass_cbr(cpi); + } + } + vp9_rc_set_frame_target(cpi, target); + cpi->rc.frames_till_gf_update_due = INT_MAX; + cpi->rc.baseline_gf_interval = INT_MAX; +} + void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; int target; diff --git a/vp9/vp9_cx_iface.c b/vp9/vp9_cx_iface.c index 35d202036..b601fa25c 100644 --- a/vp9/vp9_cx_iface.c +++ b/vp9/vp9_cx_iface.c @@ -175,6 +175,23 @@ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS); /*Spatial layers max */ + + RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS); + if (cfg->ts_number_layers > 1) { + int i; + for (i = 1; i < cfg->ts_number_layers; ++i) { + if (cfg->ts_target_bitrate[i] < cfg->ts_target_bitrate[i-1]) { + ERROR("ts_target_bitrate entries are not increasing"); + } + } + RANGE_CHECK(cfg, ts_rate_decimator[cfg->ts_number_layers-1], 1, 1); + for (i = cfg->ts_number_layers-2; i > 0; --i) { + if (cfg->ts_rate_decimator[i-1] != 2*cfg->ts_rate_decimator[i]) { + ERROR("ts_rate_decimator factors are not powers of 2"); + } + } + } + /* VP8 does not support a lower bound on the keyframe interval in * automatic keyframe placement mode. */ @@ -342,6 +359,19 @@ static vpx_codec_err_t set_vp9e_config(VP9_CONFIG *oxcf, oxcf->aq_mode = vp8_cfg.aq_mode; oxcf->ss_number_layers = cfg.ss_number_layers; + + oxcf->ts_number_layers = cfg.ts_number_layers; + + if (oxcf->ts_number_layers > 1) { + memcpy(oxcf->ts_target_bitrate, cfg.ts_target_bitrate, + sizeof(cfg.ts_target_bitrate)); + memcpy(oxcf->ts_rate_decimator, cfg.ts_rate_decimator, + sizeof(cfg.ts_rate_decimator)); + } else if (oxcf->ts_number_layers == 1) { + oxcf->ts_target_bitrate[0] = oxcf->target_bandwidth; + oxcf->ts_rate_decimator[0] = 1; + } + /* printf("Current VP9 Settings: \n"); printf("target_bandwidth: %d\n", oxcf->target_bandwidth); @@ -1012,6 +1042,32 @@ static vpx_codec_err_t vp9e_set_svc(vpx_codec_alg_priv_t *ctx, int ctr_id, va_list args) { int data = va_arg(args, int); vp9_set_svc(ctx->cpi, data); + // CBR mode for SVC with both temporal and spatial layers not yet supported. + if (data == 1 && + ctx->cfg.rc_end_usage == VPX_CBR && + ctx->cfg.ss_number_layers > 1 && + ctx->cfg.ts_number_layers > 1) { + return VPX_CODEC_INVALID_PARAM; + } + return VPX_CODEC_OK; +} + +static vpx_codec_err_t vp9e_set_svc_layer_id(vpx_codec_alg_priv_t *ctx, + int ctr_id, + va_list args) { + vpx_svc_layer_id_t *data = va_arg(args, vpx_svc_layer_id_t *); + VP9_COMP *cpi = (VP9_COMP *)ctx->cpi; + cpi->svc.spatial_layer_id = data->spatial_layer_id; + cpi->svc.temporal_layer_id = data->temporal_layer_id; + // Checks on valid layer_id input. + if (cpi->svc.temporal_layer_id < 0 || + cpi->svc.temporal_layer_id >= ctx->cfg.ts_number_layers) { + return VPX_CODEC_INVALID_PARAM; + } + if (cpi->svc.spatial_layer_id < 0 || + cpi->svc.spatial_layer_id >= ctx->cfg.ss_number_layers) { + return VPX_CODEC_INVALID_PARAM; + } return VPX_CODEC_OK; } @@ -1027,7 +1083,9 @@ static vpx_codec_err_t vp9e_set_svc_parameters(vpx_codec_alg_priv_t *ctx, params = *(vpx_svc_parameters_t *)data; - cpi->current_layer = params.layer; + cpi->svc.spatial_layer_id = params.spatial_layer; + cpi->svc.temporal_layer_id = params.temporal_layer; + cpi->lst_fb_idx = params.lst_fb_idx; cpi->gld_fb_idx = params.gld_fb_idx; cpi->alt_fb_idx = params.alt_fb_idx; @@ -1076,6 +1134,7 @@ static vpx_codec_ctrl_fn_map_t vp9e_ctf_maps[] = { {VP9_GET_REFERENCE, get_reference}, {VP9E_SET_SVC, vp9e_set_svc}, {VP9E_SET_SVC_PARAMETERS, vp9e_set_svc_parameters}, + {VP9E_SET_SVC_LAYER_ID, vp9e_set_svc_layer_id}, { -1, NULL}, }; @@ -1126,7 +1185,11 @@ static vpx_codec_enc_cfg_map_t vp9e_usage_cfg_map[] = { 9999, /* kf_max_dist */ VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */ - + 1, /* ts_number_layers */ + {0}, /* ts_target_bitrate */ + {0}, /* ts_rate_decimator */ + 0, /* ts_periodicity */ + {0}, /* ts_layer_id */ #if VPX_ENCODER_ABI_VERSION == (1 + VPX_CODEC_ABI_VERSION) "vp8.fpf" /* first pass filename */ #endif diff --git a/vpx/src/svc_encodeframe.c b/vpx/src/svc_encodeframe.c index 810e881c8..12f7166c3 100644 --- a/vpx/src/svc_encodeframe.c +++ b/vpx/src/svc_encodeframe.c @@ -499,6 +499,7 @@ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, // modify encoder configuration enc_cfg->ss_number_layers = si->layers; + enc_cfg->ts_number_layers = 1; // Temporal layers not used in this encoder. enc_cfg->kf_mode = VPX_KF_DISABLED; enc_cfg->g_pass = VPX_RC_ONE_PASS; // Lag in frames not currently supported @@ -691,7 +692,8 @@ static void set_svc_parameters(SvcContext *svc_ctx, SvcInternal *const si = get_svc_internal(svc_ctx); memset(&svc_params, 0, sizeof(svc_params)); - svc_params.layer = si->layer; + svc_params.temporal_layer = 0; + svc_params.spatial_layer = si->layer; svc_params.flags = si->enc_frame_flags; layer = si->layer; diff --git a/vpx/vp8cx.h b/vpx/vp8cx.h index 829490f73..d0ac1afc8 100644 --- a/vpx/vp8cx.h +++ b/vpx/vp8cx.h @@ -194,7 +194,8 @@ enum vp8e_enc_control_id { VP9E_SET_AQ_MODE, VP9E_SET_SVC, - VP9E_SET_SVC_PARAMETERS + VP9E_SET_SVC_PARAMETERS, + VP9E_SET_SVC_LAYER_ID }; /*!\brief vpx 1-D scaling mode @@ -285,7 +286,8 @@ typedef enum { typedef struct vpx_svc_parameters { unsigned int width; /**< width of current spatial layer */ unsigned int height; /**< height of current spatial layer */ - int layer; /**< current layer number - 0 = base */ + int spatial_layer; /**< current spatial layer number - 0 = base */ + int temporal_layer; /**< current temporal layer number - 0 = base */ int flags; /**< encode frame flags */ int max_quantizer; /**< max quantizer for current layer */ int min_quantizer; /**< min quantizer for current layer */ @@ -295,6 +297,11 @@ typedef struct vpx_svc_parameters { int alt_fb_idx; /**< alt reference frame frame buffer index */ } vpx_svc_parameters_t; +typedef struct vpx_svc_layer_id { + int spatial_layer_id; + int temporal_layer_id; +} vpx_svc_layer_id_t; + /*!\brief VP8 encoder control function parameter type * * Defines the data types that VP8E control functions take. Note that @@ -316,6 +323,7 @@ VPX_CTRL_USE_TYPE(VP8E_SET_SCALEMODE, vpx_scaling_mode_t *) VPX_CTRL_USE_TYPE(VP9E_SET_SVC, int) VPX_CTRL_USE_TYPE(VP9E_SET_SVC_PARAMETERS, vpx_svc_parameters_t *) +VPX_CTRL_USE_TYPE(VP9E_SET_SVC_LAYER_ID, vpx_svc_layer_id_t *) VPX_CTRL_USE_TYPE(VP8E_SET_CPUUSED, int) VPX_CTRL_USE_TYPE(VP8E_SET_ENABLEAUTOALTREF, unsigned int) diff --git a/vpx/vpx_encoder.h b/vpx/vpx_encoder.h index 347388551..1d9f0c9b7 100644 --- a/vpx/vpx_encoder.h +++ b/vpx/vpx_encoder.h @@ -604,47 +604,48 @@ extern "C" { * Spatial scalability settings (ss) */ - /*!\brief Number of coding layers (spatial) + /*!\brief Number of spatial coding layers. * - * This value specifies the number of coding layers to be used. + * This value specifies the number of spatial coding layers to be used. */ unsigned int ss_number_layers; - /*!\brief Number of coding layers + /*!\brief Number of temporal coding layers. * - * This value specifies the number of coding layers to be used. + * This value specifies the number of temporal layers to be used. */ unsigned int ts_number_layers; - /*!\brief Target bitrate for each layer + /*!\brief Target bitrate for each temporal layer. * - * These values specify the target coding bitrate for each coding layer. + * These values specify the target coding bitrate to be used for each + * temporal layer. */ unsigned int ts_target_bitrate[VPX_TS_MAX_LAYERS]; - /*!\brief Frame rate decimation factor for each layer + /*!\brief Frame rate decimation factor for each temporal layer. * * These values specify the frame rate decimation factors to apply - * to each layer. + * to each temporal layer. */ unsigned int ts_rate_decimator[VPX_TS_MAX_LAYERS]; - /*!\brief Length of the sequence defining frame layer membership + /*!\brief Length of the sequence defining frame temporal layer membership. * * This value specifies the length of the sequence that defines the - * membership of frames to layers. For example, if ts_periodicity=8 then - * frames are assigned to coding layers with a repeated sequence of - * length 8. - */ + * membership of frames to temporal layers. For example, if the + * ts_periodicity = 8, then the frames are assigned to coding layers with a + * repeated sequence of length 8. + */ unsigned int ts_periodicity; - /*!\brief Template defining the membership of frames to coding layers + /*!\brief Template defining the membership of frames to temporal layers. * - * This array defines the membership of frames to coding layers. For a - * 2-layer encoding that assigns even numbered frames to one layer (0) - * and odd numbered frames to a second layer (1) with ts_periodicity=8, - * then ts_layer_id = (0,1,0,1,0,1,0,1). - */ + * This array defines the membership of frames to temporal coding layers. + * For a 2-layer encoding that assigns even numbered frames to one temporal + * layer (0) and odd numbered frames to a second temporal layer (1) with + * ts_periodicity=8, then ts_layer_id = (0,1,0,1,0,1,0,1). + */ unsigned int ts_layer_id[VPX_TS_MAX_PERIODICITY]; } vpx_codec_enc_cfg_t; /**< alias for struct vpx_codec_enc_cfg */