/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "AudioConverter.h" #include #include #include /* * Parts derived from MythTV AudioConvert Class * Created by Jean-Yves Avenard. * * Copyright (C) Bubblestuff Pty Ltd 2013 * Copyright (C) foobum@gmail.com 2010 */ namespace mozilla { AudioConverter::AudioConverter(const AudioConfig& aIn, const AudioConfig& aOut) : mIn(aIn), mOut(aOut), mResampler(nullptr) { MOZ_DIAGNOSTIC_ASSERT( aIn.Format() == aOut.Format() && aIn.Interleaved() == aOut.Interleaved(), "No format or rate conversion is supported at this stage"); MOZ_DIAGNOSTIC_ASSERT( aOut.Channels() <= 2 || aIn.Channels() == aOut.Channels(), "Only down/upmixing to mono or stereo is supported at this stage"); MOZ_DIAGNOSTIC_ASSERT(aOut.Interleaved(), "planar audio format not supported"); mIn.Layout().MappingTable(mOut.Layout(), &mChannelOrderMap); if (aIn.Rate() != aOut.Rate()) { RecreateResampler(); } } AudioConverter::~AudioConverter() { if (mResampler) { speex_resampler_destroy(mResampler); mResampler = nullptr; } } bool AudioConverter::CanWorkInPlace() const { bool needDownmix = mIn.Channels() > mOut.Channels(); bool needUpmix = mIn.Channels() < mOut.Channels(); bool canDownmixInPlace = mIn.Channels() * AudioConfig::SampleSize(mIn.Format()) >= mOut.Channels() * AudioConfig::SampleSize(mOut.Format()); bool needResample = mIn.Rate() != mOut.Rate(); bool canResampleInPlace = mIn.Rate() >= mOut.Rate(); // We should be able to work in place if 1s of audio input takes less space // than 1s of audio output. However, as we downmix before resampling we can't // perform any upsampling in place (e.g. if incoming rate >= outgoing rate) return !needUpmix && (!needDownmix || canDownmixInPlace) && (!needResample || canResampleInPlace); } size_t AudioConverter::ProcessInternal(void* aOut, const void* aIn, size_t aFrames) { if (!aFrames) { return 0; } if (mIn.Channels() > mOut.Channels()) { return DownmixAudio(aOut, aIn, aFrames); } else if (mIn.Channels() < mOut.Channels()) { return UpmixAudio(aOut, aIn, aFrames); } else if (mIn.Layout() != mOut.Layout() && CanReorderAudio()) { ReOrderInterleavedChannels(aOut, aIn, aFrames); } else if (aIn != aOut) { memmove(aOut, aIn, FramesOutToBytes(aFrames)); } return aFrames; } // Reorder interleaved channels. // Can work in place (e.g aOut == aIn). template void _ReOrderInterleavedChannels(AudioDataType* aOut, const AudioDataType* aIn, uint32_t aFrames, uint32_t aChannels, const uint8_t* aChannelOrderMap) { MOZ_DIAGNOSTIC_ASSERT(aChannels <= AudioConfig::ChannelLayout::MAX_CHANNELS); AudioDataType val[AudioConfig::ChannelLayout::MAX_CHANNELS]; for (uint32_t i = 0; i < aFrames; i++) { for (uint32_t j = 0; j < aChannels; j++) { val[j] = aIn[aChannelOrderMap[j]]; } for (uint32_t j = 0; j < aChannels; j++) { aOut[j] = val[j]; } aOut += aChannels; aIn += aChannels; } } void AudioConverter::ReOrderInterleavedChannels(void* aOut, const void* aIn, size_t aFrames) const { MOZ_DIAGNOSTIC_ASSERT(mIn.Channels() == mOut.Channels()); MOZ_DIAGNOSTIC_ASSERT(CanReorderAudio()); if (mChannelOrderMap.IsEmpty() || mOut.Channels() == 1 || mOut.Layout() == mIn.Layout()) { // If channel count is 1, planar and non-planar formats are the same or // there's nothing to reorder, or if we don't know how to re-order. if (aOut != aIn) { memmove(aOut, aIn, FramesOutToBytes(aFrames)); } return; } uint32_t bits = AudioConfig::FormatToBits(mOut.Format()); switch (bits) { case 8: _ReOrderInterleavedChannels((uint8_t*)aOut, (const uint8_t*)aIn, aFrames, mIn.Channels(), mChannelOrderMap.Elements()); break; case 16: _ReOrderInterleavedChannels((int16_t*)aOut, (const int16_t*)aIn, aFrames, mIn.Channels(), mChannelOrderMap.Elements()); break; default: MOZ_DIAGNOSTIC_ASSERT(AudioConfig::SampleSize(mOut.Format()) == 4); _ReOrderInterleavedChannels((int32_t*)aOut, (const int32_t*)aIn, aFrames, mIn.Channels(), mChannelOrderMap.Elements()); break; } } static inline int16_t clipTo15(int32_t aX) { return aX < -32768 ? -32768 : aX <= 32767 ? aX : 32767; } template static void dumbUpDownMix(TYPE* aOut, int32_t aOutChannels, const TYPE* aIn, int32_t aInChannels, int32_t aFrames) { if (aIn == aOut) { return; } int32_t commonChannels = std::min(aInChannels, aOutChannels); for (int32_t i = 0; i < aFrames; i++) { for (int32_t j = 0; j < commonChannels; j++) { aOut[i * aOutChannels + j] = aIn[i * aInChannels + j]; } for (int32_t j = 0; j < aInChannels - aOutChannels; j++) { aOut[i * aOutChannels + j] = 0; } } } size_t AudioConverter::DownmixAudio(void* aOut, const void* aIn, size_t aFrames) const { MOZ_ASSERT(mIn.Format() == AudioConfig::FORMAT_S16 || mIn.Format() == AudioConfig::FORMAT_FLT); MOZ_ASSERT(mIn.Channels() >= mOut.Channels()); MOZ_ASSERT(mOut.Layout() == AudioConfig::ChannelLayout(2) || mOut.Layout() == AudioConfig::ChannelLayout(1)); uint32_t channels = mIn.Channels(); if (channels == 1 && mOut.Channels() == 1) { if (aOut != aIn) { memmove(aOut, aIn, FramesOutToBytes(aFrames)); } return aFrames; } if (!mIn.Layout().IsValid() || !mOut.Layout().IsValid()) { // Dumb copy dropping extra channels. if (mIn.Format() == AudioConfig::FORMAT_FLT) { dumbUpDownMix(static_cast(aOut), mOut.Channels(), static_cast(aIn), mIn.Channels(), aFrames); } else if (mIn.Format() == AudioConfig::FORMAT_S16) { dumbUpDownMix(static_cast(aOut), mOut.Channels(), static_cast(aIn), mIn.Channels(), aFrames); } else { MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type"); } return aFrames; } MOZ_ASSERT( mIn.Layout() == AudioConfig::ChannelLayout::SMPTEDefault(mIn.Layout()), "Can only downmix input data in SMPTE layout"); if (channels > 2) { if (mIn.Format() == AudioConfig::FORMAT_FLT) { // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows // 5-8. static const float dmatrix[6][8][2] = { /*3*/ {{0.5858f, 0}, {0, 0.5858f}, {0.4142f, 0.4142f}}, /*4*/ {{0.4226f, 0}, {0, 0.4226f}, {0.366f, 0.2114f}, {0.2114f, 0.366f}}, /*5*/ {{0.6510f, 0}, {0, 0.6510f}, {0.4600f, 0.4600f}, {0.5636f, 0.3254f}, {0.3254f, 0.5636f}}, /*6*/ {{0.5290f, 0}, {0, 0.5290f}, {0.3741f, 0.3741f}, {0.3741f, 0.3741f}, {0.4582f, 0.2645f}, {0.2645f, 0.4582f}}, /*7*/ {{0.4553f, 0}, {0, 0.4553f}, {0.3220f, 0.3220f}, {0.3220f, 0.3220f}, {0.2788f, 0.2788f}, {0.3943f, 0.2277f}, {0.2277f, 0.3943f}}, /*8*/ {{0.3886f, 0}, {0, 0.3886f}, {0.2748f, 0.2748f}, {0.2748f, 0.2748f}, {0.3366f, 0.1943f}, {0.1943f, 0.3366f}, {0.3366f, 0.1943f}, {0.1943f, 0.3366f}}, }; // Re-write the buffer with downmixed data const float* in = static_cast(aIn); float* out = static_cast(aOut); for (uint32_t i = 0; i < aFrames; i++) { float sampL = 0.0; float sampR = 0.0; for (uint32_t j = 0; j < channels; j++) { sampL += in[i * mIn.Channels() + j] * dmatrix[mIn.Channels() - 3][j][0]; sampR += in[i * mIn.Channels() + j] * dmatrix[mIn.Channels() - 3][j][1]; } *out++ = sampL; *out++ = sampR; } } else if (mIn.Format() == AudioConfig::FORMAT_S16) { // Downmix matrix. Per-row normalization 1 for rows 3,4 and 2 for rows // 5-8. Coefficients in Q14. static const int16_t dmatrix[6][8][2] = { /*3*/ {{9598, 0}, {0, 9598}, {6786, 6786}}, /*4*/ {{6925, 0}, {0, 6925}, {5997, 3462}, {3462, 5997}}, /*5*/ {{10663, 0}, {0, 10663}, {7540, 7540}, {9234, 5331}, {5331, 9234}}, /*6*/ {{8668, 0}, {0, 8668}, {6129, 6129}, {6129, 6129}, {7507, 4335}, {4335, 7507}}, /*7*/ {{7459, 0}, {0, 7459}, {5275, 5275}, {5275, 5275}, {4568, 4568}, {6460, 3731}, {3731, 6460}}, /*8*/ {{6368, 0}, {0, 6368}, {4502, 4502}, {4502, 4502}, {5514, 3184}, {3184, 5514}, {5514, 3184}, {3184, 5514}}}; // Re-write the buffer with downmixed data const int16_t* in = static_cast(aIn); int16_t* out = static_cast(aOut); for (uint32_t i = 0; i < aFrames; i++) { int32_t sampL = 0; int32_t sampR = 0; for (uint32_t j = 0; j < channels; j++) { sampL += in[i * channels + j] * dmatrix[channels - 3][j][0]; sampR += in[i * channels + j] * dmatrix[channels - 3][j][1]; } *out++ = clipTo15((sampL + 8192) >> 14); *out++ = clipTo15((sampR + 8192) >> 14); } } else { MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type"); } // If we are to continue downmixing to mono, start working on the output // buffer. aIn = aOut; channels = 2; } if (mOut.Channels() == 1) { if (mIn.Format() == AudioConfig::FORMAT_FLT) { const float* in = static_cast(aIn); float* out = static_cast(aOut); for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) { float sample = 0.0; // The sample of the buffer would be interleaved. sample = (in[fIdx * channels] + in[fIdx * channels + 1]) * 0.5; *out++ = sample; } } else if (mIn.Format() == AudioConfig::FORMAT_S16) { const int16_t* in = static_cast(aIn); int16_t* out = static_cast(aOut); for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) { int32_t sample = 0.0; // The sample of the buffer would be interleaved. sample = (in[fIdx * channels] + in[fIdx * channels + 1]) * 0.5; *out++ = sample; } } else { MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type"); } } return aFrames; } size_t AudioConverter::ResampleAudio(void* aOut, const void* aIn, size_t aFrames) { if (!mResampler) { return 0; } uint32_t outframes = ResampleRecipientFrames(aFrames); uint32_t inframes = aFrames; int error; if (mOut.Format() == AudioConfig::FORMAT_FLT) { const float* in = reinterpret_cast(aIn); float* out = reinterpret_cast(aOut); error = speex_resampler_process_interleaved_float(mResampler, in, &inframes, out, &outframes); } else if (mOut.Format() == AudioConfig::FORMAT_S16) { const int16_t* in = reinterpret_cast(aIn); int16_t* out = reinterpret_cast(aOut); error = speex_resampler_process_interleaved_int(mResampler, in, &inframes, out, &outframes); } else { MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type"); error = RESAMPLER_ERR_ALLOC_FAILED; } MOZ_ASSERT(error == RESAMPLER_ERR_SUCCESS); if (error != RESAMPLER_ERR_SUCCESS) { speex_resampler_destroy(mResampler); mResampler = nullptr; return 0; } MOZ_ASSERT(inframes == aFrames, "Some frames will be dropped"); return outframes; } void AudioConverter::RecreateResampler() { if (mResampler) { speex_resampler_destroy(mResampler); } int error; mResampler = speex_resampler_init(mOut.Channels(), mIn.Rate(), mOut.Rate(), SPEEX_RESAMPLER_QUALITY_DEFAULT, &error); if (error == RESAMPLER_ERR_SUCCESS) { speex_resampler_skip_zeros(mResampler); } else { NS_WARNING("Failed to initialize resampler."); mResampler = nullptr; } } size_t AudioConverter::DrainResampler(void* aOut) { if (!mResampler) { return 0; } int frames = speex_resampler_get_input_latency(mResampler); AlignedByteBuffer buffer(FramesOutToBytes(frames)); if (!buffer) { // OOM return 0; } frames = ResampleAudio(aOut, buffer.Data(), frames); // Tore down the resampler as it's easier than handling follow-up. RecreateResampler(); return frames; } size_t AudioConverter::UpmixAudio(void* aOut, const void* aIn, size_t aFrames) const { MOZ_ASSERT(mIn.Format() == AudioConfig::FORMAT_S16 || mIn.Format() == AudioConfig::FORMAT_FLT); MOZ_ASSERT(mIn.Channels() < mOut.Channels()); MOZ_ASSERT(mIn.Channels() == 1, "Can only upmix mono for now"); MOZ_ASSERT(mOut.Channels() == 2, "Can only upmix to stereo for now"); if (!mIn.Layout().IsValid() || !mOut.Layout().IsValid() || mOut.Channels() != 2) { // Dumb copy the channels and insert silence for the extra channels. if (mIn.Format() == AudioConfig::FORMAT_FLT) { dumbUpDownMix(static_cast(aOut), mOut.Channels(), static_cast(aIn), mIn.Channels(), aFrames); } else if (mIn.Format() == AudioConfig::FORMAT_S16) { dumbUpDownMix(static_cast(aOut), mOut.Channels(), static_cast(aIn), mIn.Channels(), aFrames); } else { MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type"); } return aFrames; } // Upmix mono to stereo. // This is a very dumb mono to stereo upmixing, power levels are preserved // following the calculation: left = right = -3dB*mono. if (mIn.Format() == AudioConfig::FORMAT_FLT) { const float m3db = std::sqrt(0.5); // -3dB = sqrt(1/2) const float* in = static_cast(aIn); float* out = static_cast(aOut); for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) { float sample = in[fIdx] * m3db; // The samples of the buffer would be interleaved. *out++ = sample; *out++ = sample; } } else if (mIn.Format() == AudioConfig::FORMAT_S16) { const int16_t* in = static_cast(aIn); int16_t* out = static_cast(aOut); for (size_t fIdx = 0; fIdx < aFrames; ++fIdx) { int16_t sample = ((int32_t)in[fIdx] * 11585) >> 14; // close enough to i*sqrt(0.5) // The samples of the buffer would be interleaved. *out++ = sample; *out++ = sample; } } else { MOZ_DIAGNOSTIC_ASSERT(false, "Unsupported data type"); } return aFrames; } size_t AudioConverter::ResampleRecipientFrames(size_t aFrames) const { if (!aFrames && mIn.Rate() != mOut.Rate()) { if (!mResampler) { return 0; } // We drain by pushing in get_input_latency() samples of 0 aFrames = speex_resampler_get_input_latency(mResampler); } return (uint64_t)aFrames * mOut.Rate() / mIn.Rate() + 1; } size_t AudioConverter::FramesOutToSamples(size_t aFrames) const { return aFrames * mOut.Channels(); } size_t AudioConverter::SamplesInToFrames(size_t aSamples) const { return aSamples / mIn.Channels(); } size_t AudioConverter::FramesOutToBytes(size_t aFrames) const { return FramesOutToSamples(aFrames) * AudioConfig::SampleSize(mOut.Format()); } } // namespace mozilla