зеркало из https://github.com/mozilla/gecko-dev.git
315 строки
12 KiB
C++
315 строки
12 KiB
C++
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
|
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include "AudioSegment.h"
|
|
|
|
#include "AudioStream.h"
|
|
#include "AudioMixer.h"
|
|
#include "AudioChannelFormat.h"
|
|
#include "Latency.h"
|
|
#include <speex/speex_resampler.h>
|
|
|
|
namespace mozilla {
|
|
|
|
template <class SrcT, class DestT>
|
|
static void
|
|
InterleaveAndConvertBuffer(const SrcT** aSourceChannels,
|
|
int32_t aLength, float aVolume,
|
|
int32_t aChannels,
|
|
DestT* aOutput)
|
|
{
|
|
DestT* output = aOutput;
|
|
for (int32_t i = 0; i < aLength; ++i) {
|
|
for (int32_t channel = 0; channel < aChannels; ++channel) {
|
|
float v = AudioSampleToFloat(aSourceChannels[channel][i])*aVolume;
|
|
*output = FloatToAudioSample<DestT>(v);
|
|
++output;
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
InterleaveAndConvertBuffer(const void** aSourceChannels,
|
|
AudioSampleFormat aSourceFormat,
|
|
int32_t aLength, float aVolume,
|
|
int32_t aChannels,
|
|
AudioDataValue* aOutput)
|
|
{
|
|
switch (aSourceFormat) {
|
|
case AUDIO_FORMAT_FLOAT32:
|
|
InterleaveAndConvertBuffer(reinterpret_cast<const float**>(aSourceChannels),
|
|
aLength,
|
|
aVolume,
|
|
aChannels,
|
|
aOutput);
|
|
break;
|
|
case AUDIO_FORMAT_S16:
|
|
InterleaveAndConvertBuffer(reinterpret_cast<const int16_t**>(aSourceChannels),
|
|
aLength,
|
|
aVolume,
|
|
aChannels,
|
|
aOutput);
|
|
break;
|
|
case AUDIO_FORMAT_SILENCE:
|
|
// nothing to do here.
|
|
break;
|
|
}
|
|
}
|
|
|
|
void
|
|
AudioSegment::ApplyVolume(float aVolume)
|
|
{
|
|
for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
|
|
ci->mVolume *= aVolume;
|
|
}
|
|
}
|
|
|
|
static const int AUDIO_PROCESSING_FRAMES = 640; /* > 10ms of 48KHz audio */
|
|
static const uint8_t gZeroChannel[MAX_AUDIO_SAMPLE_SIZE*AUDIO_PROCESSING_FRAMES] = {0};
|
|
|
|
void
|
|
DownmixAndInterleave(const nsTArray<const void*>& aChannelData,
|
|
AudioSampleFormat aSourceFormat, int32_t aDuration,
|
|
float aVolume, uint32_t aOutputChannels,
|
|
AudioDataValue* aOutput)
|
|
{
|
|
nsAutoTArray<const void*,GUESS_AUDIO_CHANNELS> channelData;
|
|
nsAutoTArray<float,AUDIO_PROCESSING_FRAMES*GUESS_AUDIO_CHANNELS> downmixConversionBuffer;
|
|
nsAutoTArray<float,AUDIO_PROCESSING_FRAMES*GUESS_AUDIO_CHANNELS> downmixOutputBuffer;
|
|
|
|
channelData.SetLength(aChannelData.Length());
|
|
if (aSourceFormat != AUDIO_FORMAT_FLOAT32) {
|
|
NS_ASSERTION(aSourceFormat == AUDIO_FORMAT_S16, "unknown format");
|
|
downmixConversionBuffer.SetLength(aDuration*aChannelData.Length());
|
|
for (uint32_t i = 0; i < aChannelData.Length(); ++i) {
|
|
float* conversionBuf = downmixConversionBuffer.Elements() + (i*aDuration);
|
|
const int16_t* sourceBuf = static_cast<const int16_t*>(aChannelData[i]);
|
|
for (uint32_t j = 0; j < (uint32_t)aDuration; ++j) {
|
|
conversionBuf[j] = AudioSampleToFloat(sourceBuf[j]);
|
|
}
|
|
channelData[i] = conversionBuf;
|
|
}
|
|
} else {
|
|
for (uint32_t i = 0; i < aChannelData.Length(); ++i) {
|
|
channelData[i] = aChannelData[i];
|
|
}
|
|
}
|
|
|
|
downmixOutputBuffer.SetLength(aDuration*aOutputChannels);
|
|
nsAutoTArray<float*,GUESS_AUDIO_CHANNELS> outputChannelBuffers;
|
|
nsAutoTArray<const void*,GUESS_AUDIO_CHANNELS> outputChannelData;
|
|
outputChannelBuffers.SetLength(aOutputChannels);
|
|
outputChannelData.SetLength(aOutputChannels);
|
|
for (uint32_t i = 0; i < (uint32_t)aOutputChannels; ++i) {
|
|
outputChannelData[i] = outputChannelBuffers[i] =
|
|
downmixOutputBuffer.Elements() + aDuration*i;
|
|
}
|
|
if (channelData.Length() > aOutputChannels) {
|
|
AudioChannelsDownMix(channelData, outputChannelBuffers.Elements(),
|
|
aOutputChannels, aDuration);
|
|
}
|
|
InterleaveAndConvertBuffer(outputChannelData.Elements(), AUDIO_FORMAT_FLOAT32,
|
|
aDuration, aVolume, aOutputChannels, aOutput);
|
|
}
|
|
|
|
void AudioSegment::ResampleChunks(SpeexResamplerState* aResampler, uint32_t aInRate, uint32_t aOutRate)
|
|
{
|
|
if (mChunks.IsEmpty()) {
|
|
return;
|
|
}
|
|
|
|
MOZ_ASSERT(aResampler || IsNull(), "We can only be here without a resampler if this segment is null.");
|
|
|
|
AudioSampleFormat format = AUDIO_FORMAT_SILENCE;
|
|
for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
|
|
if (ci->mBufferFormat != AUDIO_FORMAT_SILENCE) {
|
|
format = ci->mBufferFormat;
|
|
}
|
|
}
|
|
|
|
switch (format) {
|
|
// If the format is silence at this point, all the chunks are silent. The
|
|
// actual function we use does not matter, it's just a matter of changing
|
|
// the chunks duration.
|
|
case AUDIO_FORMAT_SILENCE:
|
|
case AUDIO_FORMAT_FLOAT32:
|
|
Resample<float>(aResampler, aInRate, aOutRate);
|
|
break;
|
|
case AUDIO_FORMAT_S16:
|
|
Resample<int16_t>(aResampler, aInRate, aOutRate);
|
|
break;
|
|
default:
|
|
MOZ_ASSERT(false);
|
|
break;
|
|
}
|
|
}
|
|
|
|
// This helps to to safely get a pointer to the position we want to start
|
|
// writing a planar audio buffer, depending on the channel and the offset in the
|
|
// buffer.
|
|
static AudioDataValue*
|
|
PointerForOffsetInChannel(AudioDataValue* aData, size_t aLengthSamples,
|
|
uint32_t aChannelCount, uint32_t aChannel,
|
|
uint32_t aOffsetSamples)
|
|
{
|
|
size_t samplesPerChannel = aLengthSamples / aChannelCount;
|
|
size_t beginningOfChannel = samplesPerChannel * aChannel;
|
|
MOZ_ASSERT(aChannel * samplesPerChannel + aOffsetSamples < aLengthSamples,
|
|
"Offset request out of bounds.");
|
|
return aData + beginningOfChannel + aOffsetSamples;
|
|
}
|
|
|
|
void
|
|
AudioSegment::Mix(AudioMixer& aMixer, uint32_t aOutputChannels,
|
|
uint32_t aSampleRate)
|
|
{
|
|
nsAutoTArray<AudioDataValue, AUDIO_PROCESSING_FRAMES* GUESS_AUDIO_CHANNELS>
|
|
buf;
|
|
nsAutoTArray<const void*, GUESS_AUDIO_CHANNELS> channelData;
|
|
uint32_t offsetSamples = 0;
|
|
uint32_t duration = GetDuration();
|
|
|
|
if (duration <= 0) {
|
|
MOZ_ASSERT(duration == 0);
|
|
return;
|
|
}
|
|
|
|
uint32_t outBufferLength = duration * aOutputChannels;
|
|
buf.SetLength(outBufferLength);
|
|
|
|
for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
|
|
AudioChunk& c = *ci;
|
|
uint32_t frames = c.mDuration;
|
|
|
|
// If the chunk is silent, simply write the right number of silence in the
|
|
// buffers.
|
|
if (c.mBufferFormat == AUDIO_FORMAT_SILENCE) {
|
|
for (uint32_t channel = 0; channel < aOutputChannels; channel++) {
|
|
AudioDataValue* ptr =
|
|
PointerForOffsetInChannel(buf.Elements(), outBufferLength,
|
|
aOutputChannels, channel, offsetSamples);
|
|
PodZero(ptr, frames);
|
|
}
|
|
} else {
|
|
// Othewise, we need to upmix or downmix appropriately, depending on the
|
|
// desired input and output channels.
|
|
channelData.SetLength(c.mChannelData.Length());
|
|
for (uint32_t i = 0; i < channelData.Length(); ++i) {
|
|
channelData[i] = c.mChannelData[i];
|
|
}
|
|
if (channelData.Length() < aOutputChannels) {
|
|
// Up-mix.
|
|
AudioChannelsUpMix(&channelData, aOutputChannels, gZeroChannel);
|
|
for (uint32_t channel = 0; channel < aOutputChannels; channel++) {
|
|
AudioDataValue* ptr =
|
|
PointerForOffsetInChannel(buf.Elements(), outBufferLength,
|
|
aOutputChannels, channel, offsetSamples);
|
|
PodCopy(ptr, reinterpret_cast<const AudioDataValue*>(channelData[channel]),
|
|
frames);
|
|
}
|
|
MOZ_ASSERT(channelData.Length() == aOutputChannels);
|
|
} else if (channelData.Length() > aOutputChannels) {
|
|
// Down mix.
|
|
nsAutoTArray<AudioDataValue*, GUESS_AUDIO_CHANNELS> outChannelPtrs;
|
|
outChannelPtrs.SetLength(aOutputChannels);
|
|
uint32_t offsetSamples = 0;
|
|
for (uint32_t channel = 0; channel < aOutputChannels; channel++) {
|
|
outChannelPtrs[channel] =
|
|
PointerForOffsetInChannel(buf.Elements(), outBufferLength,
|
|
aOutputChannels, channel, offsetSamples);
|
|
}
|
|
AudioChannelsDownMix(channelData, outChannelPtrs.Elements(),
|
|
aOutputChannels, frames);
|
|
} else {
|
|
// The channel count is already what we want, just copy it over.
|
|
for (uint32_t channel = 0; channel < aOutputChannels; channel++) {
|
|
AudioDataValue* ptr =
|
|
PointerForOffsetInChannel(buf.Elements(), outBufferLength,
|
|
aOutputChannels, channel, offsetSamples);
|
|
PodCopy(ptr, reinterpret_cast<const AudioDataValue*>(channelData[channel]),
|
|
frames);
|
|
}
|
|
}
|
|
}
|
|
offsetSamples += frames;
|
|
}
|
|
|
|
if (offsetSamples) {
|
|
MOZ_ASSERT(offsetSamples == outBufferLength / aOutputChannels,
|
|
"We forgot to write some samples?");
|
|
aMixer.Mix(buf.Elements(), aOutputChannels, offsetSamples, aSampleRate);
|
|
}
|
|
}
|
|
|
|
void
|
|
AudioSegment::WriteTo(uint64_t aID, AudioMixer& aMixer, uint32_t aOutputChannels, uint32_t aSampleRate)
|
|
{
|
|
nsAutoTArray<AudioDataValue,AUDIO_PROCESSING_FRAMES*GUESS_AUDIO_CHANNELS> buf;
|
|
nsAutoTArray<const void*,GUESS_AUDIO_CHANNELS> channelData;
|
|
// Offset in the buffer that will end up sent to the AudioStream, in samples.
|
|
uint32_t offset = 0;
|
|
|
|
if (GetDuration() <= 0) {
|
|
MOZ_ASSERT(GetDuration() == 0);
|
|
return;
|
|
}
|
|
|
|
uint32_t outBufferLength = GetDuration() * aOutputChannels;
|
|
buf.SetLength(outBufferLength);
|
|
|
|
|
|
for (ChunkIterator ci(*this); !ci.IsEnded(); ci.Next()) {
|
|
AudioChunk& c = *ci;
|
|
uint32_t frames = c.mDuration;
|
|
|
|
// If we have written data in the past, or we have real (non-silent) data
|
|
// to write, we can proceed. Otherwise, it means we just started the
|
|
// AudioStream, and we don't have real data to write to it (just silence).
|
|
// To avoid overbuffering in the AudioStream, we simply drop the silence,
|
|
// here. The stream will underrun and output silence anyways.
|
|
if (c.mBuffer && c.mBufferFormat != AUDIO_FORMAT_SILENCE) {
|
|
channelData.SetLength(c.mChannelData.Length());
|
|
for (uint32_t i = 0; i < channelData.Length(); ++i) {
|
|
channelData[i] = c.mChannelData[i];
|
|
}
|
|
if (channelData.Length() < aOutputChannels) {
|
|
// Up-mix. Note that this might actually make channelData have more
|
|
// than aOutputChannels temporarily.
|
|
AudioChannelsUpMix(&channelData, aOutputChannels, gZeroChannel);
|
|
}
|
|
if (channelData.Length() > aOutputChannels) {
|
|
// Down-mix.
|
|
DownmixAndInterleave(channelData, c.mBufferFormat, frames,
|
|
c.mVolume, aOutputChannels, buf.Elements() + offset);
|
|
} else {
|
|
InterleaveAndConvertBuffer(channelData.Elements(), c.mBufferFormat,
|
|
frames, c.mVolume,
|
|
aOutputChannels,
|
|
buf.Elements() + offset);
|
|
}
|
|
} else {
|
|
// Assumes that a bit pattern of zeroes == 0.0f
|
|
memset(buf.Elements() + offset, 0, aOutputChannels * frames * sizeof(AudioDataValue));
|
|
}
|
|
|
|
offset += frames * aOutputChannels;
|
|
|
|
#if !defined(MOZILLA_XPCOMRT_API)
|
|
if (!c.mTimeStamp.IsNull()) {
|
|
TimeStamp now = TimeStamp::Now();
|
|
// would be more efficient to c.mTimeStamp to ms on create time then pass here
|
|
LogTime(AsyncLatencyLogger::AudioMediaStreamTrack, aID,
|
|
(now - c.mTimeStamp).ToMilliseconds(), c.mTimeStamp);
|
|
}
|
|
#endif // !defined(MOZILLA_XPCOMRT_API)
|
|
}
|
|
|
|
if (offset) {
|
|
aMixer.Mix(buf.Elements(), aOutputChannels, offset / aOutputChannels, aSampleRate);
|
|
}
|
|
}
|
|
|
|
} // namespace mozilla
|