bug 1474222 change ConvolverNode output to mono for single channel convolution r=padenot

This also returns to using a single convolver for processing of mono input,
which introduces complexity in up-mixing the state of the convolver when a
second channel is added.

MozReview-Commit-ID: KeBrAswQbtF

--HG--
extra : rebase_source : d793bd967e0291069e4e6cc418de53c4b4cf3253
This commit is contained in:
Karl Tomlinson 2018-08-06 21:24:15 +12:00
Родитель 53de4f8c06
Коммит 7ee8880ce6
2 изменённых файлов: 240 добавлений и 39 удалений

Просмотреть файл

@ -30,13 +30,64 @@ class ConvolverNodeEngine final : public AudioNodeEngine
public:
ConvolverNodeEngine(AudioNode* aNode, bool aNormalize)
: AudioNodeEngine(aNode)
, mLeftOverData(INT32_MIN)
, mSampleRate(0.0f)
, mUseBackgroundThreads(!aNode->Context()->IsOffline())
, mNormalize(aNormalize)
{
}
// Indicates how the right output channel is generated.
enum class RightConvolverMode {
// A right convolver is always used when there is more than one impulse
// response channel.
Always,
// With a single response channel, the mode may be either Direct or
// Difference. The decision on which to use is made when stereo input is
// received. Once the right convolver is in use, convolver state is
// suitable only for the selected mode, and so the mode cannot change
// until the right convolver contains only silent history.
//
// With Direct mode, each convolver processes a corresponding channel.
// This mode is selected when input is initially stereo or
// channelInterpretation is "discrete" at the time or starting the right
// convolver when input changes from non-silent mono to stereo.
Direct,
// Difference mode is selected if channelInterpretation is "speakers" at
// the time starting the right convolver when the input changes from mono
// to stereo.
//
// When non-silent input is initially mono, with a single response
// channel, the right output channel is not produced until input becomes
// stereo. Only a single convolver is used for mono processing. When
// stereo input arrives after mono input, output must be as if the mono
// signal remaining in the left convolver is up-mixed, but the right
// convolver has not been initialized with the history of the mono input.
// Copying the state of the left convolver into the right convolver is not
// desirable, because there is considerable state to copy, and the
// different convolvers are intended to process out of phase, which means
// that state from one convolver would not directly map to state in
// another convolver.
//
// Instead the distributive property of convolution is used to generate
// the right output channel using information in the left output channel.
// Using l and r to denote the left and right channel input signals, g the
// impulse response, and * convolution, the convolution of the right
// channel can be given by
//
// r * g = (l + (r - l)) * g
// = l * g + (r - l) * g
//
// The left convolver continues to process the left channel l to produce
// l * g. The right convolver processes the difference of input channel
// signals r - l to produce (r - l) * g. The outputs of the two
// convolvers are added to generate the right channel output r * g.
//
// The benefit of doing this is that the history of the r - l input for a
// "speakers" up-mixed mono signal is zero, and so an empty convolver
// already has exactly the right history for mixing the previous mono
// signal with the new stereo signal.
Difference
};
enum Parameters {
SAMPLE_RATE,
NORMALIZE
@ -73,17 +124,45 @@ public:
// Very large FFTs will have worse phase errors. Given these constraints 32768 is a good compromise.
const size_t MaxFFTSize = 32768;
mLeftOverData = INT32_MIN; // reset
// Reset.
mRemainingLeftOutput = INT32_MIN;
mRemainingRightOutput = 0;
mRemainingRightHistory = 0;
if (aBuffer.IsNull() || !mSampleRate) {
mReverb = nullptr;
return;
}
// Assume for now that convolution of channel difference is not required.
// Direct may change to Difference during processing.
mRightConvolverMode =
aBuffer.ChannelCount() == 1 ? RightConvolverMode::Direct
: RightConvolverMode::Always;
mReverb = new WebCore::Reverb(aBuffer, MaxFFTSize, mUseBackgroundThreads,
mNormalize, mSampleRate);
}
void AllocateReverbInput(const AudioBlock& aInput,
uint32_t aTotalChannelCount)
{
uint32_t inputChannelCount = aInput.ChannelCount();
MOZ_ASSERT(inputChannelCount <= aTotalChannelCount);
mReverbInput.AllocateChannels(aTotalChannelCount);
// Pre-multiply the input's volume
for (uint32_t i = 0; i < inputChannelCount; ++i) {
const float* src = static_cast<const float*>(aInput.mChannelData[i]);
float* dest = mReverbInput.ChannelFloatsForWrite(i);
AudioBlockCopyChannelWithScale(src, aInput.mVolume, dest);
}
// Fill remaining channels with silence
for (uint32_t i = inputChannelCount; i < aTotalChannelCount; ++i) {
float* dest = mReverbInput.ChannelFloatsForWrite(i);
std::fill_n(dest, WEBAUDIO_BLOCK_SIZE, 0.0f);
}
}
void ProcessBlock(AudioNodeStream* aStream,
GraphTime aFrom,
const AudioBlock& aInput,
@ -92,7 +171,7 @@ public:
bool IsActive() const override
{
return mLeftOverData != INT32_MIN;
return mRemainingLeftOutput != INT32_MIN;
}
size_t SizeOfExcludingThis(MallocSizeOf aMallocSizeOf) const override
@ -117,12 +196,34 @@ private:
// Keeping mReverbInput across process calls avoids unnecessary reallocation.
AudioBlock mReverbInput;
nsAutoPtr<WebCore::Reverb> mReverb;
int32_t mLeftOverData;
float mSampleRate;
// Tracks samples of the tail remaining to be output. INT32_MIN is a
// special value to indicate that the end of any previous tail has been
// handled.
int32_t mRemainingLeftOutput = INT32_MIN;
// mRemainingRightOutput and mRemainingRightHistory are only used when
// mRightOutputMode != Always. There is no special handling required at the
// end of tail times and so INT32_MIN is not used.
// mRemainingRightOutput tracks how much longer this node needs to continue
// to produce a right output channel.
int32_t mRemainingRightOutput = 0;
// mRemainingRightHistory tracks how much silent input would be required to
// drain the right convolver, which may sometimes be longer than the period
// a right output channel is required.
int32_t mRemainingRightHistory = 0;
float mSampleRate = 0.0f;
RightConvolverMode mRightConvolverMode = RightConvolverMode::Always;
bool mUseBackgroundThreads;
bool mNormalize;
};
static void
AddScaledLeftToRight(AudioBlock* aBlock, float aScale)
{
const float* left = static_cast<const float*>(aBlock->mChannelData[0]);
float* right = aBlock->ChannelFloatsForWrite(1);
AudioBlockAddChannelWithScale(left, aScale, right);
}
void
ConvolverNodeEngine::ProcessBlock(AudioNodeStream* aStream,
GraphTime aFrom,
@ -135,14 +236,16 @@ ConvolverNodeEngine::ProcessBlock(AudioNodeStream* aStream,
return;
}
uint32_t inputChannelCount = aInput.ChannelCount();
if (aInput.IsNull()) {
if (mLeftOverData > 0) {
mLeftOverData -= WEBAUDIO_BLOCK_SIZE;
mReverbInput.AllocateChannels(1);
WriteZeroesToAudioBlock(&mReverbInput, 0, WEBAUDIO_BLOCK_SIZE);
if (mRemainingLeftOutput > 0) {
mRemainingLeftOutput -= WEBAUDIO_BLOCK_SIZE;
AllocateReverbInput(aInput, 1); // floats for silence
} else {
if (mLeftOverData != INT32_MIN) {
mLeftOverData = INT32_MIN;
if (mRemainingLeftOutput != INT32_MIN) {
mRemainingLeftOutput = INT32_MIN;
MOZ_ASSERT(mRemainingRightOutput <= 0);
MOZ_ASSERT(mRemainingRightHistory <= 0);
aStream->ScheduleCheckForInactive();
RefPtr<PlayingRefChanged> refchanged =
new PlayingRefChanged(aStream, PlayingRefChanged::RELEASE);
@ -153,31 +256,138 @@ ConvolverNodeEngine::ProcessBlock(AudioNodeStream* aStream,
return;
}
} else {
if (aInput.mVolume != 1.0f) {
// Pre-multiply the input's volume
uint32_t numChannels = aInput.ChannelCount();
mReverbInput.AllocateChannels(numChannels);
for (uint32_t i = 0; i < numChannels; ++i) {
const float* src = static_cast<const float*>(aInput.mChannelData[i]);
float* dest = mReverbInput.ChannelFloatsForWrite(i);
AudioBlockCopyChannelWithScale(src, aInput.mVolume, dest);
}
} else {
mReverbInput = aInput;
}
if (mLeftOverData <= 0) {
if (mRemainingLeftOutput <= 0) {
RefPtr<PlayingRefChanged> refchanged =
new PlayingRefChanged(aStream, PlayingRefChanged::ADDREF);
aStream->Graph()->
DispatchToMainThreadAfterStreamStateUpdate(refchanged.forget());
}
mLeftOverData = mReverb->impulseResponseLength();
MOZ_ASSERT(mLeftOverData > 0);
// Use mVolume as a flag to detect whether AllocateReverbInput() gets
// called.
mReverbInput.mVolume = 0.0f;
// Special handling of input channel count changes is used when there is
// only a single impulse response channel. See RightConvolverMode.
if (mRightConvolverMode != RightConvolverMode::Always) {
ChannelInterpretation channelInterpretation =
aStream->GetChannelInterpretation();
if (inputChannelCount == 2) {
if (mRemainingRightHistory <= 0) {
// Will start the second convolver. Choose to convolve the right
// channel directly if there is no left tail to up-mix or up-mixing
// is "discrete".
mRightConvolverMode =
(mRemainingLeftOutput <= 0 ||
channelInterpretation == ChannelInterpretation::Discrete) ?
RightConvolverMode::Direct : RightConvolverMode::Difference;
}
// The extra WEBAUDIO_BLOCK_SIZE is subtracted below.
mRemainingRightOutput =
mReverb->impulseResponseLength() + WEBAUDIO_BLOCK_SIZE;
mRemainingRightHistory = mRemainingRightOutput;
if (mRightConvolverMode == RightConvolverMode::Difference) {
AllocateReverbInput(aInput, 2);
// Subtract left from right.
AddScaledLeftToRight(&mReverbInput, -1.0f);
}
} else if (mRemainingRightHistory > 0) {
// There is one channel of input, but a second convolver also
// requires input. Up-mix appropriately for the second convolver.
if ((mRightConvolverMode == RightConvolverMode::Difference) ^
(channelInterpretation == ChannelInterpretation::Discrete)) {
MOZ_ASSERT(
(mRightConvolverMode == RightConvolverMode::Difference &&
channelInterpretation == ChannelInterpretation::Speakers) ||
(mRightConvolverMode == RightConvolverMode::Direct &&
channelInterpretation == ChannelInterpretation::Discrete));
// The state is one of the following combinations:
// 1) Difference and speakers.
// Up-mixing gives r = l.
// The input to the second convolver is r - l.
// 2) Direct and discrete.
// Up-mixing gives r = 0.
// The input to the second convolver is r.
//
// In each case the input for the second convolver is silence, which
// will drain the convolver.
AllocateReverbInput(aInput, 2);
} else {
if (channelInterpretation == ChannelInterpretation::Discrete) {
MOZ_ASSERT(mRightConvolverMode == RightConvolverMode::Difference);
// channelInterpretation has changed since the second convolver
// was added. "discrete" up-mixing of input would produce a
// silent right channel r = 0, but the second convolver needs
// r - l for RightConvolverMode::Difference.
AllocateReverbInput(aInput, 2);
AddScaledLeftToRight(&mReverbInput, -1.0f);
} else {
MOZ_ASSERT(channelInterpretation ==
ChannelInterpretation::Speakers);
MOZ_ASSERT(mRightConvolverMode == RightConvolverMode::Direct);
// The Reverb will essentially up-mix the single input channel by
// feeding it into both convolvers.
}
// The second convolver does not have silent input, and so it will
// not drain. It will need to continue processing up-mixed input
// because the next input block may be stereo, which would be mixed
// with the signal remaining in the convolvers.
// The extra WEBAUDIO_BLOCK_SIZE is subtracted below.
mRemainingRightHistory =
mReverb->impulseResponseLength() + WEBAUDIO_BLOCK_SIZE;
}
}
}
if (mReverbInput.mVolume == 0.0f) { // not yet set
if (aInput.mVolume != 1.0f) {
AllocateReverbInput(aInput, inputChannelCount); // pre-multiply
} else {
mReverbInput = aInput;
}
}
mRemainingLeftOutput = mReverb->impulseResponseLength();
MOZ_ASSERT(mRemainingLeftOutput > 0);
}
aOutput->AllocateChannels(2);
// "The ConvolverNode produces a mono output only in the single case where
// there is a single input channel and a single-channel buffer."
uint32_t outputChannelCount = 2;
uint32_t reverbOutputChannelCount = 2;
if (mRightConvolverMode != RightConvolverMode::Always) {
// When the input changes from stereo to mono, the output continues to be
// stereo for the length of the tail time, during which the two channels
// may differ.
if (mRemainingRightOutput > 0) {
MOZ_ASSERT(mRemainingRightHistory > 0);
mRemainingRightOutput -= WEBAUDIO_BLOCK_SIZE;
} else {
outputChannelCount = 1;
}
// The second convolver keeps processing until it drains.
if (mRemainingRightHistory > 0) {
mRemainingRightHistory -= WEBAUDIO_BLOCK_SIZE;
} else {
reverbOutputChannelCount = 1;
}
}
// If there are two convolvers, then they each need an output buffer, even
// if the second convolver is only processing to keep history of up-mixed
// input.
aOutput->AllocateChannels(reverbOutputChannelCount);
mReverb->process(&mReverbInput, aOutput);
if (mRightConvolverMode == RightConvolverMode::Difference &&
outputChannelCount == 2) {
// Add left to right.
AddScaledLeftToRight(aOutput, 1.0f);
} else {
// Trim if outputChannelCount < reverbOutputChannelCount
aOutput->mChannelData.TruncateLength(outputChannelCount);
}
}
ConvolverNode::ConvolverNode(AudioContext* aContext)

Просмотреть файл

@ -1,10 +1,4 @@
[convolver-response-1-chan.html]
[X 1: Channel 1: Expected 0 for all values but found 1280 unexpected values: \n\tIndex\tActual\n\t[0\]\t-1.1920928955078125e-7\n\t[1\]\t-4.470348358154297e-8\n\t[2\]\t0.3311062455177307\n\t[3\]\t0.6248593926429749\n\t...and 1276 more errors.]
expected: FAIL
[< [1-channel input\] 1 out of 2 assertions were failed.]
expected: FAIL
[X 2: Channel 0 expected to be equal to the array [0,0,0.9458408951759338,0.8448333740234375,0.8210252523422241,0.8620985746383667,0.8430315852165222,0.855602502822876,0.7933436632156372,0.9865825176239014,0.3972480297088623,-0.7786127924919128,-0.9223549962043762,-0.7896472215652466,-0.8727429509162903,-0.8325281143188477...\] but differs in 966 places:\n\tIndex\tActual\t\t\tExpected\n\t[0\]\t2.9802322387695313e-8\t0.0000000000000000e+0\n\t[1\]\t-7.4505805969238281e-8\t0.0000000000000000e+0\n\t[2\]\t9.4584077596664429e-1\t9.4584089517593384e-1\n\t[3\]\t8.4483331441879272e-1\t8.4483337402343750e-1\n\t...and 962 more errors.]
expected: FAIL
@ -41,10 +35,7 @@
[< [5.1-channel input\] 2 out of 2 assertions were failed.]
expected: FAIL
[# AUDIT TASK RUNNER FINISHED: 5 out of 6 tasks were failed.]
expected: FAIL
[X 1: Channel 1: Expected 0 for all values but found 1279 unexpected values: \n\tIndex\tActual\n\t[1\]\t-2.9802322387695312e-8\n\t[2\]\t0.33110618591308594\n\t[3\]\t0.6248594522476196\n\t[4\]\t0.8481202721595764\n\t...and 1275 more errors.]
[# AUDIT TASK RUNNER FINISHED: 4 out of 6 tasks were failed.]
expected: FAIL
[X 2: Channel 0 expected to be equal to the array [0,0,0.9458407163619995,0.844833254814148,0.821025013923645,0.8620984554290771,0.8430314660072327,0.8556023836135864,0.7933435440063477,0.9865822792053223,0.39724797010421753,-0.7786126136779785,-0.9223548769950867,-0.7896471619606018,-0.8727428317070007,-0.8325279355049133...\] but differs in 993 places:\n\tIndex\tActual\t\t\tExpected\n\t[0\]\t-2.0861625671386719e-7\t0.0000000000000000e+0\n\t[1\]\t-2.9802322387695313e-8\t0.0000000000000000e+0\n\t[2\]\t9.4584059715270996e-1\t9.4584071636199951e-1\n\t[4\]\t8.2102489471435547e-1\t8.2102501392364502e-1\n\t...and 989 more errors.]