diff --git a/accessible/ipc/win/HandlerProvider.cpp b/accessible/ipc/win/HandlerProvider.cpp index 9197b3d4fc85..046b3d965699 100644 --- a/accessible/ipc/win/HandlerProvider.cpp +++ b/accessible/ipc/win/HandlerProvider.cpp @@ -135,8 +135,8 @@ HandlerProvider::GetHandlerPayloadSize( MOZ_ASSERT(mscom::IsCurrentThreadMTA()); if (!IsTargetInterfaceCacheable()) { - *aOutPayloadSize = mscom::StructToStream::GetEmptySize(); - return S_OK; + // No handler, so no payload for this instance. + return E_NOTIMPL; } MutexAutoLock lock(mMutex); @@ -378,6 +378,11 @@ bool HandlerProvider::IsTargetInterfaceCacheable() { HRESULT HandlerProvider::WriteHandlerPayload(NotNull aInterceptor, NotNull aStream) { + if (!IsTargetInterfaceCacheable()) { + // No handler, so no payload for this instance. + return E_NOTIMPL; + } + MutexAutoLock lock(mMutex); if (!mSerializer || !(*mSerializer)) { @@ -435,6 +440,13 @@ HandlerProvider::GetEffectiveOutParamIid(REFIID aCallIid, ULONG aCallMethod) { return NEWEST_IA2_IID; } + // IAccessible::get_accSelection + if ((aCallIid == IID_IAccessible || aCallIid == IID_IAccessible2 || + aCallIid == IID_IAccessible2_2 || aCallIid == IID_IAccessible2_3) && + aCallMethod == 19) { + return IID_IEnumVARIANT; + } + MOZ_ASSERT(false); return IID_IUnknown; } diff --git a/browser/base/content/browser.js b/browser/base/content/browser.js index 2bbae7036d57..629c533fb078 100644 --- a/browser/base/content/browser.js +++ b/browser/base/content/browser.js @@ -4155,10 +4155,8 @@ const BrowserSearch = { * allowed values. * @param type * (string) Indicates how the user selected the search item. - * @param where - * (string) Where was the search link opened (e.g. new tab, current tab, ..). */ - recordOneoffSearchInTelemetry(engine, source, type, where) { + recordOneoffSearchInTelemetry(engine, source, type) { try { const details = {type, isOneOff: true}; BrowserUsageTelemetry.recordSearch(gBrowser, engine, source, details); diff --git a/browser/base/content/urlbarBindings.xml b/browser/base/content/urlbarBindings.xml index 23b466251bfb..3997d6e4e066 100644 --- a/browser/base/content/urlbarBindings.xml +++ b/browser/base/content/urlbarBindings.xml @@ -699,8 +699,7 @@ file, You can obtain one at http://mozilla.org/MPL/2.0/. [url, postData] = this._parseAndRecordSearchEngineLoad(selectedOneOff.engine, this.oneOffSearchQuery, - event, where, - openUILinkParams); + event); } else if (action) { switch (action.type) { case "visiturl": @@ -776,8 +775,6 @@ file, You can obtain one at http://mozilla.org/MPL/2.0/. action.params.engineName, action.params.searchSuggestion || action.params.searchQuery, event, - where, - openUILinkParams, actionDetails ); break; @@ -904,8 +901,6 @@ file, You can obtain one at http://mozilla.org/MPL/2.0/. - - &contentBlocking.title; diff --git a/browser/components/search/content/search-one-offs.js b/browser/components/search/content/search-one-offs.js index 1ad2ef20b2e4..3db88e62abd2 100644 --- a/browser/components/search/content/search-one-offs.js +++ b/browser/components/search/content/search-one-offs.js @@ -987,13 +987,9 @@ class SearchOneOffs { * * @param {Event} aEvent * An event, like a click on a one-off button. - * @param {string} aOpenUILinkWhere - * The "where" passed to openUILink. - * @param {object} aOpenUILinkParams - * The "params" passed to openUILink. * @returns {boolean} True if telemetry was recorded and false if not. */ - maybeRecordTelemetry(aEvent, aOpenUILinkWhere, aOpenUILinkParams) { + maybeRecordTelemetry(aEvent) { if (!aEvent) { return false; } @@ -1029,11 +1025,7 @@ class SearchOneOffs { source += "-" + this.telemetryOrigin; } - let tabBackground = aOpenUILinkWhere == "tab" && - aOpenUILinkParams && - aOpenUILinkParams.inBackground; - let where = tabBackground ? "tab-background" : aOpenUILinkWhere; - BrowserSearch.recordOneoffSearchInTelemetry(engine, source, type, where); + BrowserSearch.recordOneoffSearchInTelemetry(engine, source, type); return true; } diff --git a/browser/components/search/content/searchbar.js b/browser/components/search/content/searchbar.js index c0f3895090b5..e9deb020561b 100644 --- a/browser/components/search/content/searchbar.js +++ b/browser/components/search/content/searchbar.js @@ -304,7 +304,7 @@ class MozSearchbar extends MozXULElement { if (!selection || (selection.index == -1)) { oneOffRecorded = this.textbox.popup.oneOffButtons - .maybeRecordTelemetry(aEvent, aWhere, aParams); + .maybeRecordTelemetry(aEvent); if (!oneOffRecorded) { let source = "unknown"; let type = "unknown"; @@ -325,8 +325,7 @@ class MozSearchbar extends MozXULElement { if (!aEngine) { aEngine = this.currentEngine; } - BrowserSearch.recordOneoffSearchInTelemetry(aEngine, source, type, - aWhere); + BrowserSearch.recordOneoffSearchInTelemetry(aEngine, source, type); } } diff --git a/browser/components/urlbar/UrlbarView.jsm b/browser/components/urlbar/UrlbarView.jsm index 2bbe7f518884..d8f31fc49ab4 100644 --- a/browser/components/urlbar/UrlbarView.jsm +++ b/browser/components/urlbar/UrlbarView.jsm @@ -29,7 +29,7 @@ class UrlbarView { this._mainContainer = this.panel.querySelector(".urlbarView-body-inner"); this._rows = this.panel.querySelector(".urlbarView-results"); - this._rows.addEventListener("click", this); + this._rows.addEventListener("mouseup", this); // For the horizontal fade-out effect, set the overflow attribute on result // rows when they overflow. @@ -346,7 +346,12 @@ class UrlbarView { } } - _on_click(event) { + _on_mouseup(event) { + if (event.button == 2) { + // Ignore right clicks. + return; + } + let row = event.target; while (!row.classList.contains("urlbarView-row")) { row = row.parentNode; diff --git a/browser/locales/en-US/chrome/browser/browser.dtd b/browser/locales/en-US/chrome/browser/browser.dtd index e1c0531d4395..a6398c4f745a 100644 --- a/browser/locales/en-US/chrome/browser/browser.dtd +++ b/browser/locales/en-US/chrome/browser/browser.dtd @@ -878,6 +878,8 @@ you can use these alternative items. Otherwise, their values should be empty. - + + diff --git a/dom/base/DocumentInlines.h b/dom/base/DocumentInlines.h index cc40513c5f50..3661747b449b 100644 --- a/dom/base/DocumentInlines.h +++ b/dom/base/DocumentInlines.h @@ -19,8 +19,8 @@ inline HTMLBodyElement* Document::GetBodyElement() { } template -size_t Document::FindDocStyleSheetInsertionPoint( - const nsTArray& aDocSheets, const StyleSheet& aSheet) { +size_t Document::FindDocStyleSheetInsertionPoint(const nsTArray& aDocSheets, + const StyleSheet& aSheet) { nsStyleSheetService* sheetService = nsStyleSheetService::GetInstance(); // lowest index first diff --git a/dom/base/IdentifierMapEntry.h b/dom/base/IdentifierMapEntry.h index ec446603987e..bac6287fce97 100644 --- a/dom/base/IdentifierMapEntry.h +++ b/dom/base/IdentifierMapEntry.h @@ -31,7 +31,7 @@ namespace mozilla { namespace dom { class Document; class Element; -} +} // namespace dom /** * Right now our identifier map entries contain information for 'name' diff --git a/dom/bindings/RemoteObjectProxy.h b/dom/bindings/RemoteObjectProxy.h index 6df792b68a48..14bfc3f822cd 100644 --- a/dom/bindings/RemoteObjectProxy.h +++ b/dom/bindings/RemoteObjectProxy.h @@ -175,8 +175,8 @@ class RemoteObjectProxy : public RemoteObjectProxyBase { using RemoteObjectProxyBase::RemoteObjectProxyBase; private: - bool DefinePropertiesAndFunctions( - JSContext* aCx, JS::Handle aHolder) const final { + bool DefinePropertiesAndFunctions(JSContext* aCx, + JS::Handle aHolder) const final { return JS_DefineProperties(aCx, aHolder, P) && JS_DefineFunctions(aCx, aHolder, F); } diff --git a/dom/html/HTMLFormSubmission.cpp b/dom/html/HTMLFormSubmission.cpp index 06d5b9a9445a..58d1082f96e7 100644 --- a/dom/html/HTMLFormSubmission.cpp +++ b/dom/html/HTMLFormSubmission.cpp @@ -823,24 +823,24 @@ void GetEnumAttr(nsGenericHTMLElement* aContent, nsAtom* atom, rv = aForm->GetActionURL(getter_AddRefs(actionURL), aOriginatingElement); NS_ENSURE_SUCCESS(rv, rv); - // Check if CSP allows this form-action - nsCOMPtr csp; - rv = aForm->NodePrincipal()->GetCsp(getter_AddRefs(csp)); - NS_ENSURE_SUCCESS(rv, rv); - if (csp) { - bool permitsFormAction = true; + // Check if CSP allows this form-action + nsCOMPtr csp; + rv = aForm->NodePrincipal()->GetCsp(getter_AddRefs(csp)); + NS_ENSURE_SUCCESS(rv, rv); + if (csp) { + bool permitsFormAction = true; - // form-action is only enforced if explicitly defined in the - // policy - do *not* consult default-src, see: - // http://www.w3.org/TR/CSP2/#directive-default-src - rv = csp->Permits(aForm, nullptr /* nsICSPEventListener */, actionURL, - nsIContentSecurityPolicy::FORM_ACTION_DIRECTIVE, true, - &permitsFormAction); - NS_ENSURE_SUCCESS(rv, rv); - if (!permitsFormAction) { - return NS_ERROR_CSP_FORM_ACTION_VIOLATION; - } - } + // form-action is only enforced if explicitly defined in the + // policy - do *not* consult default-src, see: + // http://www.w3.org/TR/CSP2/#directive-default-src + rv = csp->Permits(aForm, nullptr /* nsICSPEventListener */, actionURL, + nsIContentSecurityPolicy::FORM_ACTION_DIRECTIVE, true, + &permitsFormAction); + NS_ENSURE_SUCCESS(rv, rv); + if (!permitsFormAction) { + return NS_ERROR_CSP_FORM_ACTION_VIOLATION; + } + } // Get target // The target is the originating element formtarget attribute if the element diff --git a/dom/html/HTMLInputElement.h b/dom/html/HTMLInputElement.h index 3a12df77e810..086d812f8090 100644 --- a/dom/html/HTMLInputElement.h +++ b/dom/html/HTMLInputElement.h @@ -750,7 +750,8 @@ class HTMLInputElement final : public nsGenericHTMLFormElementWithState, /* * This locates the inner datetimebox UA Widget element and only the * UA Widget - * element. This should fold into GetDateTimeBoxElement() when the XBL binding is removed. + * element. This should fold into GetDateTimeBoxElement() when the XBL binding + * is removed. */ Element* GetDateTimeBoxElementInUAWidget(); diff --git a/dom/indexedDB/ActorsParent.cpp b/dom/indexedDB/ActorsParent.cpp index d70f40afada8..4a29e8d85792 100644 --- a/dom/indexedDB/ActorsParent.cpp +++ b/dom/indexedDB/ActorsParent.cpp @@ -33,6 +33,7 @@ #include "mozilla/SnappyUncompressInputStream.h" #include "mozilla/StaticPtr.h" #include "mozilla/storage.h" +#include "mozilla/Telemetry.h" #include "mozilla/Unused.h" #include "mozilla/UniquePtrExtensions.h" #include "mozilla/dom/ContentParent.h" @@ -8329,6 +8330,17 @@ nsresult DeserializeStructuredCloneFile(FileManager* aFileManager, RefPtr fileInfo = aFileManager->GetFileInfo(id); MOZ_ASSERT(fileInfo); + // XXX In bug 1432133, for some reasons FileInfo object cannot be got. This + // is just a short-term fix, and we are working on finding the real cause + // in bug 1519859. + if (!fileInfo) { + IDB_WARNING( + "Corrupt structured clone data detected in IndexedDB. Failing the " + "database request. Bug 1519859 will address this problem."); + Telemetry::ScalarAdd(Telemetry::ScalarID::IDB_FAILURE_FILEINFO_ERROR, 1); + + return NS_ERROR_DOM_INDEXEDDB_UNKNOWN_ERR; + } aFile->mFileInfo.swap(fileInfo); aFile->mType = type; diff --git a/dom/ipc/MemoryReportRequest.cpp b/dom/ipc/MemoryReportRequest.cpp index 2297a0491e95..c931550096c5 100644 --- a/dom/ipc/MemoryReportRequest.cpp +++ b/dom/ipc/MemoryReportRequest.cpp @@ -54,8 +54,8 @@ NS_IMPL_ISUPPORTS(MemoryReportRequestClient, nsIRunnable) const ReportCallback& aReportCallback, const FinishCallback& aFinishCallback) { RefPtr request = new MemoryReportRequestClient( - aGeneration, aAnonymize, aDMDFile, aProcessString, - aReportCallback, aFinishCallback); + aGeneration, aAnonymize, aDMDFile, aProcessString, aReportCallback, + aFinishCallback); DebugOnly rv; if (aMinimizeMemoryUsage) { @@ -127,8 +127,7 @@ class FinishReportingCallback final : public nsIFinishReportingCallback { explicit FinishReportingCallback(uint32_t aGeneration, const FinishCallback& aFinishCallback) - : mGeneration(aGeneration), - mFinishCallback(aFinishCallback) {} + : mGeneration(aGeneration), mFinishCallback(aFinishCallback) {} NS_IMETHOD Callback(nsISupports* aUnused) override { return mFinishCallback(mGeneration) ? NS_OK : NS_ERROR_FAILURE; diff --git a/dom/media/MediaManager.cpp b/dom/media/MediaManager.cpp index fa98abbcf159..19880df5dc48 100644 --- a/dom/media/MediaManager.cpp +++ b/dom/media/MediaManager.cpp @@ -4180,54 +4180,54 @@ SourceListener::InitializeAsync() { LOG("started all sources"); aHolder.Resolve(true, __func__); }) - ->Then(GetMainThreadSerialEventTarget(), __func__, - [self = RefPtr(this), this]() { - if (mStopped) { - // We were shut down during the async init - return SourceListenerPromise::CreateAndResolve(true, __func__); - } + ->Then( + GetMainThreadSerialEventTarget(), __func__, + [self = RefPtr(this), this]() { + if (mStopped) { + // We were shut down during the async init + return SourceListenerPromise::CreateAndResolve(true, __func__); + } - for (DeviceState* state : - {mAudioDeviceState.get(), mVideoDeviceState.get()}) { - if (!state) { - continue; - } - MOZ_DIAGNOSTIC_ASSERT(!state->mTrackEnabled); - MOZ_DIAGNOSTIC_ASSERT(!state->mDeviceEnabled); - MOZ_DIAGNOSTIC_ASSERT(!state->mStopped); + for (DeviceState* state : + {mAudioDeviceState.get(), mVideoDeviceState.get()}) { + if (!state) { + continue; + } + MOZ_DIAGNOSTIC_ASSERT(!state->mTrackEnabled); + MOZ_DIAGNOSTIC_ASSERT(!state->mDeviceEnabled); + MOZ_DIAGNOSTIC_ASSERT(!state->mStopped); - state->mDeviceEnabled = true; - state->mTrackEnabled = true; - state->mTrackEnabledTime = TimeStamp::Now(); + state->mDeviceEnabled = true; + state->mTrackEnabled = true; + state->mTrackEnabledTime = TimeStamp::Now(); - if (state == mVideoDeviceState.get() && - !mStream->IsDestroyed()) { - mStream->SetPullingEnabled(kVideoTrack, true); - } - } - return SourceListenerPromise::CreateAndResolve(true, __func__); - }, - [self = RefPtr(this), - this](RefPtr&& aResult) { - if (mStopped) { - return SourceListenerPromise::CreateAndReject( - std::move(aResult), __func__); - } + if (state == mVideoDeviceState.get() && !mStream->IsDestroyed()) { + mStream->SetPullingEnabled(kVideoTrack, true); + } + } + return SourceListenerPromise::CreateAndResolve(true, __func__); + }, + [self = RefPtr(this), + this](RefPtr&& aResult) { + if (mStopped) { + return SourceListenerPromise::CreateAndReject(std::move(aResult), + __func__); + } - for (DeviceState* state : - {mAudioDeviceState.get(), mVideoDeviceState.get()}) { - if (!state) { - continue; - } - MOZ_DIAGNOSTIC_ASSERT(!state->mTrackEnabled); - MOZ_DIAGNOSTIC_ASSERT(!state->mDeviceEnabled); - MOZ_DIAGNOSTIC_ASSERT(!state->mStopped); + for (DeviceState* state : + {mAudioDeviceState.get(), mVideoDeviceState.get()}) { + if (!state) { + continue; + } + MOZ_DIAGNOSTIC_ASSERT(!state->mTrackEnabled); + MOZ_DIAGNOSTIC_ASSERT(!state->mDeviceEnabled); + MOZ_DIAGNOSTIC_ASSERT(!state->mStopped); - state->mStopped = true; - } - return SourceListenerPromise::CreateAndReject(std::move(aResult), - __func__); - }); + state->mStopped = true; + } + return SourceListenerPromise::CreateAndReject(std::move(aResult), + __func__); + }); } void SourceListener::Stop() { diff --git a/dom/media/gtest/mp4_demuxer/TestParser.cpp b/dom/media/gtest/mp4_demuxer/TestParser.cpp index 7b34d8ddd948..75cb12873f5d 100644 --- a/dom/media/gtest/mp4_demuxer/TestParser.cpp +++ b/dom/media/gtest/mp4_demuxer/TestParser.cpp @@ -94,7 +94,7 @@ TEST(MP4Metadata, EmptyStream) { TEST(MoofParser, EmptyStream) { RefPtr stream = new TestStream(nullptr, 0); - MoofParser parser(stream, 0, false); + MoofParser parser(stream, 0, false, true); EXPECT_EQ(0u, parser.mOffset); EXPECT_TRUE(parser.ReachedEnd()); @@ -404,7 +404,7 @@ TEST(MoofParser, test_case_mp4) { RefPtr stream = new TestStream(buffer.Elements(), buffer.Length()); - MoofParser parser(stream, 0, false); + MoofParser parser(stream, 0, false, true); EXPECT_EQ(0u, parser.mOffset) << tests[test].mFilename; EXPECT_FALSE(parser.ReachedEnd()) << tests[test].mFilename; EXPECT_TRUE(parser.mInitRange.IsEmpty()) << tests[test].mFilename; diff --git a/dom/media/mediasource/ContainerParser.cpp b/dom/media/mediasource/ContainerParser.cpp index a48f784b4ad2..233f1acb8d74 100644 --- a/dom/media/mediasource/ContainerParser.cpp +++ b/dom/media/mediasource/ContainerParser.cpp @@ -526,7 +526,8 @@ class MP4ContainerParser : public ContainerParser, // consumers of ParseStartAndEndTimestamps to add their timestamp offset // manually. This allows the ContainerParser to be shared across different // timestampOffsets. - mParser = new MoofParser(mStream, 0, /* aIsAudio = */ false); + mParser = new MoofParser(mStream, 0, /* aIsAudio = */ false, + /* aIsMultitrackParser */ true); DDLINKCHILD("parser", mParser.get()); mInitData = new MediaByteBuffer(); mCompleteInitSegmentRange = MediaByteRange(); diff --git a/dom/media/mp4/MP4Metadata.cpp b/dom/media/mp4/MP4Metadata.cpp index 6e4422ef4771..953869d3e22c 100644 --- a/dom/media/mp4/MP4Metadata.cpp +++ b/dom/media/mp4/MP4Metadata.cpp @@ -460,7 +460,7 @@ MP4Metadata::ResultAndIndice MP4Metadata::GetTrackIndice( /*static*/ MP4Metadata::ResultAndByteBuffer MP4Metadata::Metadata( ByteStream* aSource) { - auto parser = mozilla::MakeUnique(aSource, 0, false); + auto parser = mozilla::MakeUnique(aSource, 0, false, true); RefPtr buffer = parser->Metadata(); if (!buffer) { return {MediaResult(NS_ERROR_DOM_MEDIA_METADATA_ERR, diff --git a/dom/media/mp4/MoofParser.cpp b/dom/media/mp4/MoofParser.cpp index 357040b0d4f1..082028baa5c2 100644 --- a/dom/media/mp4/MoofParser.cpp +++ b/dom/media/mp4/MoofParser.cpp @@ -55,7 +55,7 @@ bool MoofParser::RebuildFragmentedIndex(BoxContext& aContext) { ParseMoov(box); } else if (box.IsType("moof")) { Moof moof(box, mTrex, mMvhd, mMdhd, mEdts, mSinf, &mLastDecodeTime, - mIsAudio); + mIsAudio, mIsMultitrackParser); if (!moof.IsValid() && !box.Next().IsAvailable()) { // Moof isn't valid abort search for now. @@ -227,11 +227,11 @@ void MoofParser::ParseTrak(Box& aBox) { if (box.IsType("tkhd")) { tkhd = Tkhd(box); } else if (box.IsType("mdia")) { - if (!mTrex.mTrackId || tkhd.mTrackId == mTrex.mTrackId) { + if (mIsMultitrackParser || tkhd.mTrackId == mTrex.mTrackId) { ParseMdia(box, tkhd); } } else if (box.IsType("edts") && - (!mTrex.mTrackId || tkhd.mTrackId == mTrex.mTrackId)) { + (mIsMultitrackParser || tkhd.mTrackId == mTrex.mTrackId)) { mEdts = Edts(box); } } @@ -251,12 +251,8 @@ void MoofParser::ParseMvex(Box& aBox) { for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { if (box.IsType("trex")) { Trex trex = Trex(box); - if (!mTrex.mTrackId || trex.mTrackId == mTrex.mTrackId) { - auto trackId = mTrex.mTrackId; + if (mIsMultitrackParser || trex.mTrackId == mTrex.mTrackId) { mTrex = trex; - // Keep the original trackId, as should it be 0 we want to continue - // parsing all tracks. - mTrex.mTrackId = trackId; } } } @@ -299,8 +295,8 @@ void MoofParser::ParseStbl(Box& aBox) { } void MoofParser::ParseStsd(Box& aBox) { - if (mTrex.mTrackId == 0) { - // If mTrex.mTrackId is 0, then the parser is being used to read multiple + if (mIsMultitrackParser) { + // If mIsMultitrackParser, then the parser is being used to read multiple // tracks metadata, and it is not a sane operation to try and map multiple // sample description boxes, from different tracks, onto the parser, which // is modeled around storing metadata for a single track. @@ -353,12 +349,14 @@ class CtsComparator { }; Moof::Moof(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, - Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio) + Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio, + bool aIsMultitrackParser) : mRange(aBox.Range()), mTfhd(aTrex), mMaxRoundingError(35000) { nsTArray psshBoxes; for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { if (box.IsType("traf")) { - ParseTraf(box, aTrex, aMvhd, aMdhd, aEdts, aSinf, aDecodeTime, aIsAudio); + ParseTraf(box, aTrex, aMvhd, aMdhd, aEdts, aSinf, aDecodeTime, aIsAudio, + aIsMultitrackParser); } if (box.IsType("pssh")) { psshBoxes.AppendElement(box); @@ -506,14 +504,14 @@ bool Moof::ProcessCencAuxInfo(AtomType aScheme) { void Moof::ParseTraf(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, Sinf& aSinf, uint64_t* aDecodeTime, - bool aIsAudio) { + bool aIsAudio, bool aIsMultitrackParser) { MOZ_ASSERT(aDecodeTime); Tfdt tfdt; for (Box box = aBox.FirstChild(); box.IsAvailable(); box = box.Next()) { if (box.IsType("tfhd")) { mTfhd = Tfhd(box, aTrex); - } else if (!aTrex.mTrackId || mTfhd.mTrackId == aTrex.mTrackId) { + } else if (aIsMultitrackParser || mTfhd.mTrackId == aTrex.mTrackId) { if (box.IsType("tfdt")) { tfdt = Tfdt(box); } else if (box.IsType("sgpd")) { @@ -551,7 +549,7 @@ void Moof::ParseTraf(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, } } } - if (aTrex.mTrackId && mTfhd.mTrackId != aTrex.mTrackId) { + if (!aIsMultitrackParser && mTfhd.mTrackId != aTrex.mTrackId) { return; } // Now search for TRUN boxes. diff --git a/dom/media/mp4/MoofParser.h b/dom/media/mp4/MoofParser.h index bd0e323e8dc5..688679bc49ad 100644 --- a/dom/media/mp4/MoofParser.h +++ b/dom/media/mp4/MoofParser.h @@ -227,7 +227,8 @@ struct SampleDescriptionEntry { class Moof final : public Atom { public: Moof(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, - Sinf& aSinf, uint64_t* aDecoderTime, bool aIsAudio); + Sinf& aSinf, uint64_t* aDecoderTime, bool aIsAudio, + bool aIsMultitrackParser); bool GetAuxInfo(AtomType aType, FallibleTArray* aByteRanges); void FixRounding(const Moof& aMoof); @@ -248,7 +249,8 @@ class Moof final : public Atom { private: // aDecodeTime is updated to the end of the parsed TRAF on return. void ParseTraf(Box& aBox, Trex& aTrex, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, - Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio); + Sinf& aSinf, uint64_t* aDecodeTime, bool aIsAudio, + bool aIsMultitrackParser); // aDecodeTime is updated to the end of the parsed TRUN on return. Result ParseTrun(Box& aBox, Mvhd& aMvhd, Mdhd& aMdhd, Edts& aEdts, uint64_t* aDecodeTime, @@ -267,14 +269,18 @@ DDLoggedTypeDeclName(MoofParser); class MoofParser : public DecoderDoctorLifeLogger { public: - MoofParser(ByteStream* aSource, uint32_t aTrackId, bool aIsAudio) + MoofParser(ByteStream* aSource, uint32_t aTrackId, bool aIsAudio, + bool aIsMultitrackParser = false) : mSource(aSource), mOffset(0), mTrex(aTrackId), mIsAudio(aIsAudio), - mLastDecodeTime(0) { - // Setting the mTrex.mTrackId to 0 is a nasty work around for calculating - // the composition range for MSE. We need an array of tracks. + mLastDecodeTime(0), + mIsMultitrackParser(aIsMultitrackParser) { + // Setting mIsMultitrackParser is a nasty work around for calculating + // the composition range for MSE that causes the parser to parse multiple + // tracks. Ideally we'd store an array of tracks with different metadata + // for each. DDLINKCHILD("source", aSource); } bool RebuildFragmentedIndex(const mozilla::MediaByteRangeSet& aByteRanges); @@ -326,6 +332,7 @@ class MoofParser : public DecoderDoctorLifeLogger { nsTArray mMediaRanges; bool mIsAudio; uint64_t mLastDecodeTime; + bool mIsMultitrackParser; }; } // namespace mozilla diff --git a/dom/media/webrtc/MediaEngineWebRTCAudio.cpp b/dom/media/webrtc/MediaEngineWebRTCAudio.cpp index d01df3012c07..0136a03af6ca 100644 --- a/dom/media/webrtc/MediaEngineWebRTCAudio.cpp +++ b/dom/media/webrtc/MediaEngineWebRTCAudio.cpp @@ -605,20 +605,19 @@ nsresult MediaEngineWebRTCMicrophoneSource::Stop( } RefPtr that = this; - NS_DispatchToMainThread( - media::NewRunnableFrom([that, stream = mStream]() { - if (stream->IsDestroyed()) { - return NS_OK; - } + NS_DispatchToMainThread(media::NewRunnableFrom([that, stream = mStream]() { + if (stream->IsDestroyed()) { + return NS_OK; + } - stream->GraphImpl()->AppendMessage(MakeUnique( - that->mInputProcessing, StartStopMessage::Stop)); - CubebUtils::AudioDeviceID deviceID = that->mDeviceInfo->DeviceID(); - Maybe id = Some(deviceID); - stream->CloseAudioInput(id, that->mInputProcessing); + stream->GraphImpl()->AppendMessage(MakeUnique( + that->mInputProcessing, StartStopMessage::Stop)); + CubebUtils::AudioDeviceID deviceID = that->mDeviceInfo->DeviceID(); + Maybe id = Some(deviceID); + stream->CloseAudioInput(id, that->mInputProcessing); - return NS_OK; - })); + return NS_OK; + })); MOZ_ASSERT(mState == kStarted, "Should be started when stopping"); mState = kStopped; diff --git a/dom/power/PowerManagerService.cpp b/dom/power/PowerManagerService.cpp index 8aeda3dce185..49110e15a81c 100644 --- a/dom/power/PowerManagerService.cpp +++ b/dom/power/PowerManagerService.cpp @@ -152,8 +152,9 @@ already_AddRefed PowerManagerService::NewWakeLockOnBehalfOfProcess( NS_DEFINE_NAMED_CID(NS_POWERMANAGERSERVICE_CID); -NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR(nsIPowerManagerService, - mozilla::dom::power::PowerManagerService::GetInstance) +NS_GENERIC_FACTORY_SINGLETON_CONSTRUCTOR( + nsIPowerManagerService, + mozilla::dom::power::PowerManagerService::GetInstance) static const mozilla::Module::CIDEntry kPowerManagerCIDs[] = { // clang-format off @@ -171,13 +172,14 @@ static const mozilla::Module::ContractIDEntry kPowerManagerContracts[] = { // We mark the power module as being available in the GPU process because the // appshell depends on the power manager service. -static const mozilla::Module kPowerManagerModule = {mozilla::Module::kVersion, - kPowerManagerCIDs, - kPowerManagerContracts, - nullptr, - nullptr, - nullptr, - nullptr, - mozilla::Module::ALLOW_IN_GPU_PROCESS}; +static const mozilla::Module kPowerManagerModule = { + mozilla::Module::kVersion, + kPowerManagerCIDs, + kPowerManagerContracts, + nullptr, + nullptr, + nullptr, + nullptr, + mozilla::Module::ALLOW_IN_GPU_PROCESS}; NSMODULE_DEFN(nsPowerManagerModule) = &kPowerManagerModule; diff --git a/dom/script/LoadedScript.cpp b/dom/script/LoadedScript.cpp index c1b2296770ad..816070746b2a 100644 --- a/dom/script/LoadedScript.cpp +++ b/dom/script/LoadedScript.cpp @@ -40,9 +40,7 @@ NS_IMPL_CYCLE_COLLECTING_RELEASE(LoadedScript) LoadedScript::LoadedScript(ScriptKind aKind, ScriptFetchOptions* aFetchOptions, nsIURI* aBaseURL) - : mKind(aKind), - mFetchOptions(aFetchOptions), - mBaseURL(aBaseURL) { + : mKind(aKind), mFetchOptions(aFetchOptions), mBaseURL(aBaseURL) { MOZ_ASSERT(mFetchOptions); MOZ_ASSERT(mBaseURL); } diff --git a/dom/script/ScriptLoader.cpp b/dom/script/ScriptLoader.cpp index 507652d24c82..7ef3b5a3c320 100644 --- a/dom/script/ScriptLoader.cpp +++ b/dom/script/ScriptLoader.cpp @@ -2301,9 +2301,8 @@ nsresult ScriptLoader::FillCompileOptionsForRequest( bool isScriptElement = !aRequest->IsModuleRequest() || aRequest->AsModuleRequest()->IsTopLevel(); - aOptions->setIntroductionInfoToCaller(jsapi.cx(), - isScriptElement ? "scriptElement" - : "importedModule"); + aOptions->setIntroductionInfoToCaller( + jsapi.cx(), isScriptElement ? "scriptElement" : "importedModule"); aOptions->setFileAndLine(aRequest->mURL.get(), aRequest->mLineNo); aOptions->setIsRunOnce(true); aOptions->setNoScriptRval(true); diff --git a/dom/smil/SMILKeySpline.h b/dom/smil/SMILKeySpline.h index 6636927fbb17..2c27f6e9c118 100644 --- a/dom/smil/SMILKeySpline.h +++ b/dom/smil/SMILKeySpline.h @@ -27,10 +27,10 @@ class SMILKeySpline { * aX1, etc. are the x1, y1, x2, y2 cubic Bezier control points as defined * by SMILANIM 3.2.3. They must each be in the range 0.0 <= x <= 1.0 */ - SMILKeySpline(double aX1, double aY1, double aX2, double aY2) - : mX1(0), mY1(0), mX2(0), mY2(0) { - Init(aX1, aY1, aX2, aY2); - } + SMILKeySpline(double aX1, double aY1, double aX2, double aY2) + : mX1(0), mY1(0), mX2(0), mY2(0) { + Init(aX1, aY1, aX2, aY2); + } double X1() const { return mX1; } double Y1() const { return mY1; } diff --git a/dom/svg/SVGContentUtils.h b/dom/svg/SVGContentUtils.h index 451368aa2e8c..1f631e4c32cd 100644 --- a/dom/svg/SVGContentUtils.h +++ b/dom/svg/SVGContentUtils.h @@ -191,8 +191,7 @@ class SVGContentUtils { /* * Report a localized error message to the error console. */ - static nsresult ReportToConsole(dom::Document* doc, - const char* aWarning, + static nsresult ReportToConsole(dom::Document* doc, const char* aWarning, const char16_t** aParams, uint32_t aParamsLength); diff --git a/dom/svg/SVGIntegerPair.h b/dom/svg/SVGIntegerPair.h index 5dbb4243b649..c02791af5f31 100644 --- a/dom/svg/SVGIntegerPair.h +++ b/dom/svg/SVGIntegerPair.h @@ -77,8 +77,8 @@ class SVGIntegerPair { mIndex(aIndex) {} virtual ~DOMAnimatedInteger(); - SVGIntegerPair* mVal; // kept alive because it belongs to content - PairIndex mIndex; // are we the first or second integer + SVGIntegerPair* mVal; // kept alive because it belongs to content + PairIndex mIndex; // are we the first or second integer virtual int32_t BaseVal() override { return mVal->GetBaseValue(mIndex); } virtual void SetBaseVal(int32_t aValue) override { diff --git a/dom/svg/SVGNumberPair.h b/dom/svg/SVGNumberPair.h index 70abc15f4069..465151a59acd 100644 --- a/dom/svg/SVGNumberPair.h +++ b/dom/svg/SVGNumberPair.h @@ -78,8 +78,8 @@ class SVGNumberPair { mIndex(aIndex) {} virtual ~DOMAnimatedNumber(); - SVGNumberPair* mVal; // kept alive because it belongs to content - PairIndex mIndex; // are we the first or second number + SVGNumberPair* mVal; // kept alive because it belongs to content + PairIndex mIndex; // are we the first or second number virtual float BaseVal() override { return mVal->GetBaseValue(mIndex); } virtual void SetBaseVal(float aValue) override { diff --git a/dom/svg/SVGTests.cpp b/dom/svg/SVGTests.cpp index 39dd750a5137..cfd8b1c9bdf1 100644 --- a/dom/svg/SVGTests.cpp +++ b/dom/svg/SVGTests.cpp @@ -46,7 +46,8 @@ bool SVGTests::HasExtension(const nsAString& aExtension) const { if (aExtension.EqualsLiteral(str)) return true; SVG_SUPPORTED_EXTENSION("http://www.w3.org/1999/xhtml") nsNameSpaceManager* nameSpaceManager = nsNameSpaceManager::GetInstance(); - if (AsSVGElement()->IsInChromeDocument() || !nameSpaceManager->mMathMLDisabled) { + if (AsSVGElement()->IsInChromeDocument() || + !nameSpaceManager->mMathMLDisabled) { SVG_SUPPORTED_EXTENSION("http://www.w3.org/1998/Math/MathML") } #undef SVG_SUPPORTED_EXTENSION diff --git a/dom/workers/WorkerDebugger.cpp b/dom/workers/WorkerDebugger.cpp index b9925184b949..0a074d3f8f29 100644 --- a/dom/workers/WorkerDebugger.cpp +++ b/dom/workers/WorkerDebugger.cpp @@ -460,8 +460,7 @@ RefPtr WorkerDebugger::ReportPerformanceInfo() { RefPtr scriptURI = mWorkerPrivate->GetResolvedScriptURI(); if (NS_WARN_IF(!scriptURI)) { // This can happen at shutdown, let's stop here. - return PerformanceInfoPromise::CreateAndReject(NS_ERROR_FAILURE, - __func__); + return PerformanceInfoPromise::CreateAndReject(NS_ERROR_FAILURE, __func__); } nsCString url = scriptURI->GetSpecOrDefault(); diff --git a/dom/xbl/nsXBLService.cpp b/dom/xbl/nsXBLService.cpp index e7054753e325..f03082648b90 100644 --- a/dom/xbl/nsXBLService.cpp +++ b/dom/xbl/nsXBLService.cpp @@ -152,8 +152,8 @@ class nsXBLStreamListener final : public nsIStreamListener, AutoTArray mBindingRequests; nsWeakPtr mBoundDocument; - nsCOMPtr mSink; // Only set until OnStartRequest - nsCOMPtr mBindingDocument; // Only set until OnStartRequest + nsCOMPtr mSink; // Only set until OnStartRequest + nsCOMPtr mBindingDocument; // Only set until OnStartRequest }; /* Implementation file */ diff --git a/editor/libeditor/EditorUtils.cpp b/editor/libeditor/EditorUtils.cpp index 96f0598cf66f..720a23c1bcb4 100644 --- a/editor/libeditor/EditorUtils.cpp +++ b/editor/libeditor/EditorUtils.cpp @@ -35,9 +35,7 @@ DOMIterator::DOMIterator(nsINode& aNode MOZ_GUARD_OBJECT_NOTIFIER_PARAM_IN_IMPL) MOZ_ASSERT(NS_SUCCEEDED(rv)); } -nsresult DOMIterator::Init(nsRange& aRange) { - return mIter->Init(&aRange); -} +nsresult DOMIterator::Init(nsRange& aRange) { return mIter->Init(&aRange); } DOMIterator::DOMIterator(MOZ_GUARD_OBJECT_NOTIFIER_ONLY_PARAM_IN_IMPL) : mIter(&mPostOrderIter) { diff --git a/editor/spellchecker/TextServicesDocument.cpp b/editor/spellchecker/TextServicesDocument.cpp index ce4017dd1fdf..fb8772606957 100644 --- a/editor/spellchecker/TextServicesDocument.cpp +++ b/editor/spellchecker/TextServicesDocument.cpp @@ -6,8 +6,8 @@ #include "TextServicesDocument.h" #include "FilteredContentIterator.h" // for FilteredContentIterator -#include "mozilla/Assertions.h" // for MOZ_ASSERT, etc -#include "mozilla/EditorUtils.h" // for AutoTransactionBatchExternal +#include "mozilla/Assertions.h" // for MOZ_ASSERT, etc +#include "mozilla/EditorUtils.h" // for AutoTransactionBatchExternal #include "mozilla/dom/Element.h" #include "mozilla/dom/Selection.h" #include "mozilla/mozalloc.h" // for operator new, etc @@ -15,22 +15,22 @@ #include "nsAString.h" // for nsAString::Length, etc #include "nsContentUtils.h" // for nsContentUtils #include "nsComposeTxtSrvFilter.h" -#include "nsDebug.h" // for NS_ENSURE_TRUE, etc -#include "nsDependentSubstring.h" // for Substring -#include "nsError.h" // for NS_OK, NS_ERROR_FAILURE, etc -#include "nsGenericHTMLElement.h" // for nsGenericHTMLElement -#include "nsIContent.h" // for nsIContent, etc -#include "nsID.h" // for NS_GET_IID -#include "nsIEditor.h" // for nsIEditor, etc -#include "nsINode.h" // for nsINode -#include "nsISelectionController.h" // for nsISelectionController, etc -#include "nsISupportsBase.h" // for nsISupports -#include "nsISupportsUtils.h" // for NS_IF_ADDREF, NS_ADDREF, etc -#include "mozilla/intl/WordBreaker.h" // for WordRange, WordBreaker -#include "nsRange.h" // for nsRange -#include "nsString.h" // for nsString, nsAutoString -#include "nscore.h" // for nsresult, NS_IMETHODIMP, etc -#include "mozilla/UniquePtr.h" // for UniquePtr +#include "nsDebug.h" // for NS_ENSURE_TRUE, etc +#include "nsDependentSubstring.h" // for Substring +#include "nsError.h" // for NS_OK, NS_ERROR_FAILURE, etc +#include "nsGenericHTMLElement.h" // for nsGenericHTMLElement +#include "nsIContent.h" // for nsIContent, etc +#include "nsID.h" // for NS_GET_IID +#include "nsIEditor.h" // for nsIEditor, etc +#include "nsINode.h" // for nsINode +#include "nsISelectionController.h" // for nsISelectionController, etc +#include "nsISupportsBase.h" // for nsISupports +#include "nsISupportsUtils.h" // for NS_IF_ADDREF, NS_ADDREF, etc +#include "mozilla/intl/WordBreaker.h" // for WordRange, WordBreaker +#include "nsRange.h" // for nsRange +#include "nsString.h" // for nsString, nsAutoString +#include "nscore.h" // for nsresult, NS_IMETHODIMP, etc +#include "mozilla/UniquePtr.h" // for UniquePtr namespace mozilla { diff --git a/editor/spellchecker/TextServicesDocument.h b/editor/spellchecker/TextServicesDocument.h index b9ca4ac9e31c..94f249118e82 100644 --- a/editor/spellchecker/TextServicesDocument.h +++ b/editor/spellchecker/TextServicesDocument.h @@ -29,7 +29,7 @@ class TextEditor; namespace dom { class Document; class Element; -}; +}; // namespace dom /** * The TextServicesDocument presents the document in as a bunch of flattened diff --git a/gfx/gl/GLContextProviderEGL.cpp b/gfx/gl/GLContextProviderEGL.cpp index 774eb34a8e87..8db54f68e596 100644 --- a/gfx/gl/GLContextProviderEGL.cpp +++ b/gfx/gl/GLContextProviderEGL.cpp @@ -310,8 +310,7 @@ GLContextEGL::GLContextEGL(CreateContextFlags flags, const SurfaceCaps& caps, #endif } -void -GLContextEGL::OnMarkDestroyed() { +void GLContextEGL::OnMarkDestroyed() { if (mSurfaceOverride != EGL_NO_SURFACE) { SetEGLSurfaceOverride(EGL_NO_SURFACE); } diff --git a/gfx/layers/FrameMetrics.cpp b/gfx/layers/FrameMetrics.cpp index 5fea17ecacca..d6a7b9654c0a 100644 --- a/gfx/layers/FrameMetrics.cpp +++ b/gfx/layers/FrameMetrics.cpp @@ -17,7 +17,8 @@ void FrameMetrics::RecalculateLayoutViewportOffset() { if (!mIsRootContent) { return; } - KeepLayoutViewportEnclosingVisualViewport(GetVisualViewport(), mLayoutViewport); + KeepLayoutViewportEnclosingVisualViewport(GetVisualViewport(), + mLayoutViewport); } /* static */ void FrameMetrics::KeepLayoutViewportEnclosingVisualViewport( diff --git a/gfx/layers/LayersTypes.h b/gfx/layers/LayersTypes.h index 3e24bfe6ff1f..04eb5e472c21 100644 --- a/gfx/layers/LayersTypes.h +++ b/gfx/layers/LayersTypes.h @@ -13,7 +13,7 @@ #include "mozilla/DefineEnum.h" // for MOZ_DEFINE_ENUM #include "mozilla/gfx/Point.h" // for IntPoint #include "mozilla/Maybe.h" -#include "mozilla/TimeStamp.h" // for TimeStamp +#include "mozilla/TimeStamp.h" // for TimeStamp #include "mozilla/TypedEnumBits.h" #include "nsRegion.h" #include "nsStyleConsts.h" @@ -420,7 +420,7 @@ MOZ_DEFINE_ENUM_CLASS_WITH_BASE(CompositionPayloadType, uint8_t, ( // clang-format on struct CompositionPayload { - bool operator ==(const CompositionPayload& aOther) const { + bool operator==(const CompositionPayload& aOther) const { return mType == aOther.mType && mTimeStamp == aOther.mTimeStamp; } /* The type of payload that is in this composition */ diff --git a/gfx/layers/apz/util/APZCCallbackHelper.cpp b/gfx/layers/apz/util/APZCCallbackHelper.cpp index 3cd86ab34770..6463bbd31eb1 100644 --- a/gfx/layers/apz/util/APZCCallbackHelper.cpp +++ b/gfx/layers/apz/util/APZCCallbackHelper.cpp @@ -205,7 +205,8 @@ static ScreenMargin ScrollFrame(nsIContent* aContent, aRequest, actualScrollOffset); } } else if (aRequest.IsRootContent() && - aRequest.GetScrollOffset() != aRequest.GetLayoutViewport().TopLeft()) { + aRequest.GetScrollOffset() != + aRequest.GetLayoutViewport().TopLeft()) { // APZ uses the visual viewport's offset to calculate where to place the // display port, so the display port is misplaced when a pinch zoom occurs. // diff --git a/gfx/layers/ipc/CrossProcessCompositorBridgeParent.h b/gfx/layers/ipc/CrossProcessCompositorBridgeParent.h index 78de49757359..f9b864b6de19 100644 --- a/gfx/layers/ipc/CrossProcessCompositorBridgeParent.h +++ b/gfx/layers/ipc/CrossProcessCompositorBridgeParent.h @@ -170,8 +170,9 @@ class CrossProcessCompositorBridgeParent final void UpdatePaintTime(LayerTransactionParent* aLayerTree, const TimeDuration& aPaintTime) override; - void RegisterPayload(LayerTransactionParent* aLayerTree, - const InfallibleTArray& aPayload) override; + void RegisterPayload( + LayerTransactionParent* aLayerTree, + const InfallibleTArray& aPayload) override; PWebRenderBridgeParent* AllocPWebRenderBridgeParent( const wr::PipelineId& aPipelineId, diff --git a/gfx/layers/ipc/LayersMessageUtils.h b/gfx/layers/ipc/LayersMessageUtils.h index 5d3c2c6300a7..6241be8307a5 100644 --- a/gfx/layers/ipc/LayersMessageUtils.h +++ b/gfx/layers/ipc/LayersMessageUtils.h @@ -710,7 +710,7 @@ struct ParamTraits template <> struct ParamTraits : public PlainOldDataSerializer {}; - + template <> struct ParamTraits : public ContiguousEnumSerializerInclusive< @@ -719,8 +719,7 @@ struct ParamTraits mozilla::layers::kHighestCompositionPayloadType> {}; template <> -struct ParamTraits -{ +struct ParamTraits { typedef mozilla::layers::CompositionPayload paramType; static void Write(Message* aMsg, const paramType& aParam) { @@ -728,7 +727,8 @@ struct ParamTraits WriteParam(aMsg, aParam.mTimeStamp); } - static bool Read(const Message* aMsg, PickleIterator* aIter, paramType* aResult) { + static bool Read(const Message* aMsg, PickleIterator* aIter, + paramType* aResult) { return ReadParam(aMsg, aIter, &aResult->mType) && ReadParam(aMsg, aIter, &aResult->mTimeStamp); } diff --git a/gfx/layers/wr/ClipManager.cpp b/gfx/layers/wr/ClipManager.cpp index db715f11473b..88567990e4fc 100644 --- a/gfx/layers/wr/ClipManager.cpp +++ b/gfx/layers/wr/ClipManager.cpp @@ -20,7 +20,9 @@ //#define CLIP_LOG(...) printf_stderr("CLIP: " __VA_ARGS__) +// clang-format off //#define CLIP_LOG(...) if (XRE_IsContentProcess()) printf_stderr("CLIP: " __VA_ARGS__) +// clang-format on namespace mozilla { namespace layers { diff --git a/gfx/layers/wr/RenderRootStateManager.cpp b/gfx/layers/wr/RenderRootStateManager.cpp index 07f6db5c0696..bf4f4cba8753 100644 --- a/gfx/layers/wr/RenderRootStateManager.cpp +++ b/gfx/layers/wr/RenderRootStateManager.cpp @@ -11,71 +11,50 @@ namespace mozilla { namespace layers { -RenderRootStateManager::RenderRootStateManager(WebRenderLayerManager* aLayerManager) - : mLayerManager(aLayerManager) - , mDestroyed(false) -{ -} +RenderRootStateManager::RenderRootStateManager( + WebRenderLayerManager* aLayerManager) + : mLayerManager(aLayerManager), mDestroyed(false) {} -RenderRootStateManager::~RenderRootStateManager() -{} +RenderRootStateManager::~RenderRootStateManager() {} -// RenderRootStateManager shares its ref count with the WebRenderLayerManager that -// created it. You can think of the two classes as being one unit, except there -// are multiple RenderRootStateManagers per WebRenderLayerManager. Since we need -// to reference the WebRenderLayerManager and it needs to reference us, this -// avoids us needing to involve the cycle collector. -void -RenderRootStateManager::AddRef() -{ - mLayerManager->AddRef(); -} +// RenderRootStateManager shares its ref count with the WebRenderLayerManager +// that created it. You can think of the two classes as being one unit, except +// there are multiple RenderRootStateManagers per WebRenderLayerManager. Since +// we need to reference the WebRenderLayerManager and it needs to reference us, +// this avoids us needing to involve the cycle collector. +void RenderRootStateManager::AddRef() { mLayerManager->AddRef(); } -void -RenderRootStateManager::Release() -{ - mLayerManager->Release(); -} +void RenderRootStateManager::Release() { mLayerManager->Release(); } - -WebRenderBridgeChild* -RenderRootStateManager::WrBridge() const -{ +WebRenderBridgeChild* RenderRootStateManager::WrBridge() const { return mLayerManager->WrBridge(); } -WebRenderCommandBuilder& -RenderRootStateManager::CommandBuilder() -{ +WebRenderCommandBuilder& RenderRootStateManager::CommandBuilder() { return mLayerManager->CommandBuilder(); } RenderRootStateManager::WebRenderUserDataRefTable* -RenderRootStateManager::GetWebRenderUserDataTable() -{ +RenderRootStateManager::GetWebRenderUserDataTable() { return mLayerManager->GetWebRenderUserDataTable(); } -wr::IpcResourceUpdateQueue& -RenderRootStateManager::AsyncResourceUpdates() -{ +wr::IpcResourceUpdateQueue& RenderRootStateManager::AsyncResourceUpdates() { MOZ_ASSERT(NS_IsMainThread()); if (!mAsyncResourceUpdates) { mAsyncResourceUpdates.emplace(WrBridge()); RefPtr task = NewRunnableMethod( - "RenderRootStateManager::FlushAsyncResourceUpdates", - this, &RenderRootStateManager::FlushAsyncResourceUpdates); + "RenderRootStateManager::FlushAsyncResourceUpdates", this, + &RenderRootStateManager::FlushAsyncResourceUpdates); NS_DispatchToMainThread(task.forget()); } return mAsyncResourceUpdates.ref(); } -void -RenderRootStateManager::Destroy() -{ +void RenderRootStateManager::Destroy() { ClearAsyncAnimations(); if (WrBridge()) { @@ -90,9 +69,7 @@ RenderRootStateManager::Destroy() mDestroyed = true; } -void -RenderRootStateManager::FlushAsyncResourceUpdates() -{ +void RenderRootStateManager::FlushAsyncResourceUpdates() { MOZ_ASSERT(NS_IsMainThread()); if (!mAsyncResourceUpdates) { @@ -106,21 +83,16 @@ RenderRootStateManager::FlushAsyncResourceUpdates() mAsyncResourceUpdates.reset(); } -void -RenderRootStateManager::AddImageKeyForDiscard(wr::ImageKey key) -{ +void RenderRootStateManager::AddImageKeyForDiscard(wr::ImageKey key) { mImageKeysToDelete.AppendElement(key); } -void -RenderRootStateManager::AddBlobImageKeyForDiscard(wr::BlobImageKey key) -{ +void RenderRootStateManager::AddBlobImageKeyForDiscard(wr::BlobImageKey key) { mBlobImageKeysToDelete.AppendElement(key); } -void -RenderRootStateManager::DiscardImagesInTransaction(wr::IpcResourceUpdateQueue& aResources) -{ +void RenderRootStateManager::DiscardImagesInTransaction( + wr::IpcResourceUpdateQueue& aResources) { for (const auto& key : mImageKeysToDelete) { aResources.DeleteImage(key); } @@ -131,9 +103,7 @@ RenderRootStateManager::DiscardImagesInTransaction(wr::IpcResourceUpdateQueue& a mBlobImageKeysToDelete.Clear(); } -void -RenderRootStateManager::DiscardLocalImages() -{ +void RenderRootStateManager::DiscardLocalImages() { // Removes images but doesn't tell the parent side about them // This is useful in empty / failed transactions where we created // image keys but didn't tell the parent about them yet. @@ -141,16 +111,12 @@ RenderRootStateManager::DiscardLocalImages() mBlobImageKeysToDelete.Clear(); } -void -RenderRootStateManager::ClearCachedResources() -{ +void RenderRootStateManager::ClearCachedResources() { mActiveCompositorAnimationIds.clear(); mDiscardedCompositorAnimationsIds.Clear(); } -void -RenderRootStateManager::AddActiveCompositorAnimationId(uint64_t aId) -{ +void RenderRootStateManager::AddActiveCompositorAnimationId(uint64_t aId) { // In layers-free mode we track the active compositor animation ids on the // client side so that we don't try to discard the same animation id multiple // times. We could just ignore the multiple-discard on the parent side, but @@ -158,53 +124,42 @@ RenderRootStateManager::AddActiveCompositorAnimationId(uint64_t aId) mActiveCompositorAnimationIds.insert(aId); } -void -RenderRootStateManager::AddCompositorAnimationsIdForDiscard(uint64_t aId) -{ +void RenderRootStateManager::AddCompositorAnimationsIdForDiscard(uint64_t aId) { if (mActiveCompositorAnimationIds.erase(aId)) { - // For layers-free ensure we don't try to discard an animation id that wasn't - // active. We also remove it from mActiveCompositorAnimationIds so we don't - // discard it again unless it gets re-activated. + // For layers-free ensure we don't try to discard an animation id that + // wasn't active. We also remove it from mActiveCompositorAnimationIds so we + // don't discard it again unless it gets re-activated. mDiscardedCompositorAnimationsIds.AppendElement(aId); } } -void -RenderRootStateManager::DiscardCompositorAnimations() -{ - if (WrBridge()->IPCOpen() && - !mDiscardedCompositorAnimationsIds.IsEmpty()) { - WrBridge()-> - SendDeleteCompositorAnimations(mDiscardedCompositorAnimationsIds); +void RenderRootStateManager::DiscardCompositorAnimations() { + if (WrBridge()->IPCOpen() && !mDiscardedCompositorAnimationsIds.IsEmpty()) { + WrBridge()->SendDeleteCompositorAnimations( + mDiscardedCompositorAnimationsIds); } mDiscardedCompositorAnimationsIds.Clear(); } -void -RenderRootStateManager::RegisterAsyncAnimation(const wr::ImageKey& aKey, - SharedSurfacesAnimation* aAnimation) -{ +void RenderRootStateManager::RegisterAsyncAnimation( + const wr::ImageKey& aKey, SharedSurfacesAnimation* aAnimation) { mAsyncAnimations.insert(std::make_pair(wr::AsUint64(aKey), aAnimation)); } -void -RenderRootStateManager::DeregisterAsyncAnimation(const wr::ImageKey& aKey) -{ +void RenderRootStateManager::DeregisterAsyncAnimation( + const wr::ImageKey& aKey) { mAsyncAnimations.erase(wr::AsUint64(aKey)); } -void -RenderRootStateManager::ClearAsyncAnimations() -{ +void RenderRootStateManager::ClearAsyncAnimations() { for (const auto& i : mAsyncAnimations) { i.second->Invalidate(this); } mAsyncAnimations.clear(); } -void -RenderRootStateManager::WrReleasedImages(const nsTArray& aPairs) -{ +void RenderRootStateManager::WrReleasedImages( + const nsTArray& aPairs) { // A SharedSurfaceAnimation object's lifetime is tied to its owning // ImageContainer. When the ImageContainer is released, // SharedSurfaceAnimation::Destroy is called which should ensure it is removed @@ -223,54 +178,41 @@ RenderRootStateManager::WrReleasedImages(const nsTArrayAddWebRenderParentCommand(aCmd); } -void -RenderRootStateManager::UpdateResources(wr::IpcResourceUpdateQueue& aResources) -{ +void RenderRootStateManager::UpdateResources( + wr::IpcResourceUpdateQueue& aResources) { WrBridge()->UpdateResources(aResources); } -void -RenderRootStateManager::AddPipelineIdForAsyncCompositable(const wr::PipelineId& aPipelineId, - const CompositableHandle& aHandle) -{ +void RenderRootStateManager::AddPipelineIdForAsyncCompositable( + const wr::PipelineId& aPipelineId, const CompositableHandle& aHandle) { WrBridge()->AddPipelineIdForAsyncCompositable(aPipelineId, aHandle); } -void -RenderRootStateManager::AddPipelineIdForCompositable(const wr::PipelineId& aPipelineId, - const CompositableHandle& aHandle) -{ +void RenderRootStateManager::AddPipelineIdForCompositable( + const wr::PipelineId& aPipelineId, const CompositableHandle& aHandle) { WrBridge()->AddPipelineIdForCompositable(aPipelineId, aHandle); } -void -RenderRootStateManager::RemovePipelineIdForCompositable(const wr::PipelineId& aPipelineId) -{ +void RenderRootStateManager::RemovePipelineIdForCompositable( + const wr::PipelineId& aPipelineId) { WrBridge()->RemovePipelineIdForCompositable(aPipelineId); } - /// Release TextureClient that is bounded to ImageKey. - /// It is used for recycling TextureClient. -void -RenderRootStateManager::ReleaseTextureOfImage(const wr::ImageKey& aKey) -{ +/// Release TextureClient that is bounded to ImageKey. +/// It is used for recycling TextureClient. +void RenderRootStateManager::ReleaseTextureOfImage(const wr::ImageKey& aKey) { WrBridge()->ReleaseTextureOfImage(aKey); } -wr::FontInstanceKey -RenderRootStateManager::GetFontKeyForScaledFont(gfx::ScaledFont* aScaledFont, - wr::IpcResourceUpdateQueue* aResources) -{ +wr::FontInstanceKey RenderRootStateManager::GetFontKeyForScaledFont( + gfx::ScaledFont* aScaledFont, wr::IpcResourceUpdateQueue* aResources) { return WrBridge()->GetFontKeyForScaledFont(aScaledFont, aResources); } -wr::FontKey -RenderRootStateManager::GetFontKeyForUnscaledFont(gfx::UnscaledFont* aUnscaledFont, - wr::IpcResourceUpdateQueue* aResources) -{ +wr::FontKey RenderRootStateManager::GetFontKeyForUnscaledFont( + gfx::UnscaledFont* aUnscaledFont, wr::IpcResourceUpdateQueue* aResources) { return WrBridge()->GetFontKeyForUnscaledFont(aUnscaledFont, aResources); } -} // namespace layers -} // namespace mozilla \ No newline at end of file +} // namespace layers +} // namespace mozilla \ No newline at end of file diff --git a/gfx/layers/wr/RenderRootStateManager.h b/gfx/layers/wr/RenderRootStateManager.h index 44ec23f92738..c3e7afea233b 100644 --- a/gfx/layers/wr/RenderRootStateManager.h +++ b/gfx/layers/wr/RenderRootStateManager.h @@ -16,11 +16,11 @@ namespace mozilla { namespace layers { -class RenderRootStateManager -{ - typedef nsTHashtable> WebRenderUserDataRefTable; +class RenderRootStateManager { + typedef nsTHashtable> + WebRenderUserDataRefTable; -public: + public: void AddRef(); void Release(); @@ -32,10 +32,7 @@ public: WebRenderBridgeChild* WrBridge() const; WebRenderCommandBuilder& CommandBuilder(); WebRenderUserDataRefTable* GetWebRenderUserDataTable(); - WebRenderLayerManager* LayerManager() - { - return mLayerManager; - } + WebRenderLayerManager* LayerManager() { return mLayerManager; } void AddImageKeyForDiscard(wr::ImageKey key); void AddBlobImageKeyForDiscard(wr::BlobImageKey key); @@ -51,7 +48,8 @@ public: void AddCompositorAnimationsIdForDiscard(uint64_t aId); void DiscardCompositorAnimations(); - void RegisterAsyncAnimation(const wr::ImageKey& aKey, SharedSurfacesAnimation* aAnimation); + void RegisterAsyncAnimation(const wr::ImageKey& aKey, + SharedSurfacesAnimation* aAnimation); void DeregisterAsyncAnimation(const wr::ImageKey& aKey); void ClearAsyncAnimations(); void WrReleasedImages(const nsTArray& aPairs); @@ -66,19 +64,23 @@ public: /// Release TextureClient that is bounded to ImageKey. /// It is used for recycling TextureClient. void ReleaseTextureOfImage(const wr::ImageKey& aKey); - wr::FontInstanceKey GetFontKeyForScaledFont(gfx::ScaledFont* aScaledFont, - wr::IpcResourceUpdateQueue* aResources = nullptr); - wr::FontKey GetFontKeyForUnscaledFont(gfx::UnscaledFont* aUnscaledFont, - wr::IpcResourceUpdateQueue* aResources = nullptr); + wr::FontInstanceKey GetFontKeyForScaledFont( + gfx::ScaledFont* aScaledFont, + wr::IpcResourceUpdateQueue* aResources = nullptr); + wr::FontKey GetFontKeyForUnscaledFont( + gfx::UnscaledFont* aUnscaledFont, + wr::IpcResourceUpdateQueue* aResources = nullptr); void FlushAsyncResourceUpdates(); -private: + + private: ~RenderRootStateManager(); WebRenderLayerManager* mLayerManager; Maybe mAsyncResourceUpdates; nsTArray mImageKeysToDelete; nsTArray mBlobImageKeysToDelete; - std::unordered_map> mAsyncAnimations; + std::unordered_map> + mAsyncAnimations; // Set of compositor animation ids for which there are active animations (as // of the last transaction) on the compositor side. @@ -92,7 +94,7 @@ private: friend class WebRenderLayerManager; }; -} // namespace layers -} // namespace mozilla +} // namespace layers +} // namespace mozilla #endif /* GFX_RENDERROOTSTATEMANAGER_H */ diff --git a/gfx/layers/wr/WebRenderCommandBuilder.cpp b/gfx/layers/wr/WebRenderCommandBuilder.cpp index e477fca57e3b..fd8c26f02adb 100644 --- a/gfx/layers/wr/WebRenderCommandBuilder.cpp +++ b/gfx/layers/wr/WebRenderCommandBuilder.cpp @@ -1067,8 +1067,9 @@ static bool IsItemProbablyActive(nsDisplayItem* aItem, bool is2D = t.Is2D(&t2d); GP("active: %d\n", transformItem->MayBeAnimated(aDisplayListBuilder)); return transformItem->MayBeAnimated(aDisplayListBuilder, false) || - !is2D || HasActiveChildren(*transformItem->GetChildren(), - aDisplayListBuilder); + !is2D || + HasActiveChildren(*transformItem->GetChildren(), + aDisplayListBuilder); } case DisplayItemType::TYPE_OPACITY: { nsDisplayOpacity* opacityItem = static_cast(aItem); @@ -2303,8 +2304,7 @@ Maybe WebRenderCommandBuilder::BuildWrMaskImage( recorder->FlushItem(IntRect(0, 0, size.width, size.height)); TakeExternalSurfaces(recorder, maskData->mExternalSurfaces, - mManager->GetRenderRootStateManager(), - aResources); + mManager->GetRenderRootStateManager(), aResources); recorder->Finish(); Range bytes((uint8_t*)recorder->mOutputStream.mData, diff --git a/gfx/thebes/gfxFont.h b/gfx/thebes/gfxFont.h index d4a0cf8007e3..e779e822ae64 100644 --- a/gfx/thebes/gfxFont.h +++ b/gfx/thebes/gfxFont.h @@ -1940,9 +1940,7 @@ class gfxFont { // The return value is interpreted as a horizontal advance in 16.16 fixed // point format. - virtual int32_t GetGlyphWidth(uint16_t aGID) { - return -1; - } + virtual int32_t GetGlyphWidth(uint16_t aGID) { return -1; } bool IsSpaceGlyphInvisible(DrawTarget* aRefDrawTarget, const gfxTextRun* aTextRun); diff --git a/image/decoders/nsWebPDecoder.cpp b/image/decoders/nsWebPDecoder.cpp index 9589ec4703fe..88bdd7e081c4 100644 --- a/image/decoders/nsWebPDecoder.cpp +++ b/image/decoders/nsWebPDecoder.cpp @@ -308,10 +308,10 @@ void nsWebPDecoder::ApplyColorProfile(const char* aProfile, size_t aLength) { uint32_t profileSpace = qcms_profile_get_color_space(mInProfile); if (profileSpace == icSigGrayData) { // WebP doesn't produce grayscale data, this must be corrupt. - MOZ_LOG( - sWebPLog, LogLevel::Error, - ("[this=%p] nsWebPDecoder::ApplyColorProfile -- ignoring grayscale color profile\n", - this)); + MOZ_LOG(sWebPLog, LogLevel::Error, + ("[this=%p] nsWebPDecoder::ApplyColorProfile -- ignoring grayscale " + "color profile\n", + this)); return; } diff --git a/ipc/chromium/src/base/at_exit.cc b/ipc/chromium/src/base/at_exit.cc index 340d7b14d8ba..057d8b2678de 100644 --- a/ipc/chromium/src/base/at_exit.cc +++ b/ipc/chromium/src/base/at_exit.cc @@ -15,15 +15,13 @@ namespace base { // thread-safe access, since it will only be modified in testing. static AtExitManager* g_top_manager = NULL; -AtExitManager::AtExitManager() : lock_("AtExitManager"), - next_manager_(NULL) { +AtExitManager::AtExitManager() : lock_("AtExitManager"), next_manager_(NULL) { DCHECK(!g_top_manager); g_top_manager = this; } -AtExitManager::AtExitManager(bool shadow) : lock_("AtExitManager"), - next_manager_(g_top_manager) - { +AtExitManager::AtExitManager(bool shadow) + : lock_("AtExitManager"), next_manager_(g_top_manager) { DCHECK(shadow || !g_top_manager); g_top_manager = this; } diff --git a/ipc/chromium/src/base/time_win.cc b/ipc/chromium/src/base/time_win.cc index 302a9cb5e181..b7f2a267f92f 100644 --- a/ipc/chromium/src/base/time_win.cc +++ b/ipc/chromium/src/base/time_win.cc @@ -243,15 +243,13 @@ class NowSingleton { private: explicit NowSingleton(mozilla::StaticMutex& aMutex) - : lock_(aMutex) - , rollover_(TimeDelta::FromMilliseconds(0)) - , last_seen_(0) - { - } + : lock_(aMutex), + rollover_(TimeDelta::FromMilliseconds(0)), + last_seen_(0) {} ~NowSingleton() = default; mozilla::StaticMutex& lock_; // To protected last_seen_ and rollover_. - TimeDelta rollover_; // Accumulation of time lost due to rollover. + TimeDelta rollover_; // Accumulation of time lost due to rollover. DWORD last_seen_; // The last timeGetTime value we saw, to detect rollover. DISALLOW_COPY_AND_ASSIGN(NowSingleton); diff --git a/ipc/chromium/src/chrome/common/ipc_channel_posix.cc b/ipc/chromium/src/chrome/common/ipc_channel_posix.cc index c982acd44d51..4bfb58e1c8ce 100644 --- a/ipc/chromium/src/chrome/common/ipc_channel_posix.cc +++ b/ipc/chromium/src/chrome/common/ipc_channel_posix.cc @@ -150,9 +150,7 @@ class PipeMap { } private: - explicit PipeMap(mozilla::StaticMutex& aMutex) - : lock_(aMutex) - {} + explicit PipeMap(mozilla::StaticMutex& aMutex) : lock_(aMutex) {} ~PipeMap() = default; mozilla::StaticMutex& lock_; diff --git a/ipc/glue/CrashReporterHost.cpp b/ipc/glue/CrashReporterHost.cpp index d397a0a51381..3910c8578e3b 100644 --- a/ipc/glue/CrashReporterHost.cpp +++ b/ipc/glue/CrashReporterHost.cpp @@ -20,15 +20,20 @@ // in the code below. Making them equal also ensures that if new process // types are added, people will know they may need to add crash reporting // support in various places because compilation errors will be triggered here. -static_assert(nsICrashService::PROCESS_TYPE_MAIN == (int)GeckoProcessType_Default, +static_assert(nsICrashService::PROCESS_TYPE_MAIN == + (int)GeckoProcessType_Default, "GeckoProcessType enum is out of sync with nsICrashService!"); -static_assert(nsICrashService::PROCESS_TYPE_PLUGIN == (int)GeckoProcessType_Plugin, +static_assert(nsICrashService::PROCESS_TYPE_PLUGIN == + (int)GeckoProcessType_Plugin, "GeckoProcessType enum is out of sync with nsICrashService!"); -static_assert(nsICrashService::PROCESS_TYPE_CONTENT == (int)GeckoProcessType_Content, +static_assert(nsICrashService::PROCESS_TYPE_CONTENT == + (int)GeckoProcessType_Content, "GeckoProcessType enum is out of sync with nsICrashService!"); -static_assert(nsICrashService::PROCESS_TYPE_IPDLUNITTEST == (int)GeckoProcessType_IPDLUnitTest, +static_assert(nsICrashService::PROCESS_TYPE_IPDLUNITTEST == + (int)GeckoProcessType_IPDLUnitTest, "GeckoProcessType enum is out of sync with nsICrashService!"); -static_assert(nsICrashService::PROCESS_TYPE_GMPLUGIN == (int)GeckoProcessType_GMPlugin, +static_assert(nsICrashService::PROCESS_TYPE_GMPLUGIN == + (int)GeckoProcessType_GMPlugin, "GeckoProcessType enum is out of sync with nsICrashService!"); static_assert(nsICrashService::PROCESS_TYPE_GPU == (int)GeckoProcessType_GPU, "GeckoProcessType enum is out of sync with nsICrashService!"); @@ -36,14 +41,15 @@ static_assert(nsICrashService::PROCESS_TYPE_VR == (int)GeckoProcessType_VR, "GeckoProcessType enum is out of sync with nsICrashService!"); static_assert(nsICrashService::PROCESS_TYPE_RDD == (int)GeckoProcessType_RDD, "GeckoProcessType enum is out of sync with nsICrashService!"); -static_assert(nsICrashService::PROCESS_TYPE_SOCKET == (int)GeckoProcessType_Socket, +static_assert(nsICrashService::PROCESS_TYPE_SOCKET == + (int)GeckoProcessType_Socket, "GeckoProcessType enum is out of sync with nsICrashService!"); // Add new static asserts here if you add more process types. // Update this static assert as well. -static_assert(nsICrashService::PROCESS_TYPE_SOCKET + 1 == (int)GeckoProcessType_End, +static_assert(nsICrashService::PROCESS_TYPE_SOCKET + 1 == + (int)GeckoProcessType_End, "GeckoProcessType enum is out of sync with nsICrashService!"); - namespace mozilla { namespace ipc { @@ -116,14 +122,14 @@ bool CrashReporterHost::FinalizeCrashReport() { // unit tests), but that's OK. switch (mProcessType) { #define GECKO_PROCESS_TYPE(enum_name, string_name, xre_name) \ - case GeckoProcessType_##enum_name: \ - type.AssignLiteral(string_name); \ - break; + case GeckoProcessType_##enum_name: \ + type.AssignLiteral(string_name); \ + break; #include "mozilla/GeckoProcessTypes.h" #undef GECKO_PROCESS_TYPE - default: - NS_ERROR("unknown process type"); - break; + default: + NS_ERROR("unknown process type"); + break; } } annotations[CrashReporter::Annotation::ProcessType] = type; @@ -188,13 +194,13 @@ bool CrashReporterHost::FinalizeCrashReport() { } else { switch (aProcessType) { #define GECKO_PROCESS_TYPE(enum_name, string_name, xre_name) \ - case GeckoProcessType_##enum_name: \ - telemetryKey.AssignLiteral(string_name); \ - break; + case GeckoProcessType_##enum_name: \ + telemetryKey.AssignLiteral(string_name); \ + break; #include "mozilla/GeckoProcessTypes.h" #undef GECKO_PROCESS_TYPE - // We can't really hit this, thanks to the above switch, but having it here - // will placate the compiler. + // We can't really hit this, thanks to the above switch, but having it + // here will placate the compiler. default: NS_ERROR("unknown process type"); return; diff --git a/js/src/builtin/ModuleObject.cpp b/js/src/builtin/ModuleObject.cpp index 1a9b158db206..f239e064fc25 100644 --- a/js/src/builtin/ModuleObject.cpp +++ b/js/src/builtin/ModuleObject.cpp @@ -381,7 +381,8 @@ bool IndirectBindingMap::lookup(jsid name, ModuleEnvironmentObject** envOut, } SetProxyReservedSlot(object, ExportsSlot, ObjectValue(*exports)); - SetProxyReservedSlot(object, BindingsSlot, PrivateValue(rootedBindings.release())); + SetProxyReservedSlot(object, BindingsSlot, + PrivateValue(rootedBindings.release())); return &object->as(); } diff --git a/js/src/frontend/NameFunctions.cpp b/js/src/frontend/NameFunctions.cpp index b076942f05a9..ed1195e32918 100644 --- a/js/src/frontend/NameFunctions.cpp +++ b/js/src/frontend/NameFunctions.cpp @@ -985,8 +985,8 @@ class NameResolver { MOZ_ASSERT(parents[initialParents] == cur, "pushed child shouldn't change underneath us"); - AlwaysPoison(&parents[initialParents], 0xFF, sizeof(parents[initialParents]), - MemCheckKind::MakeUndefined); + AlwaysPoison(&parents[initialParents], 0xFF, + sizeof(parents[initialParents]), MemCheckKind::MakeUndefined); return true; } diff --git a/js/src/gc/GC.cpp b/js/src/gc/GC.cpp index 5860fc17cbab..442478a82bfc 100644 --- a/js/src/gc/GC.cpp +++ b/js/src/gc/GC.cpp @@ -592,7 +592,7 @@ inline size_t Arena::finalize(FreeOp* fop, AllocKind thingKind, } else { t->finalize(fop); AlwaysPoison(t, JS_SWEPT_TENURED_PATTERN, thingSize, - MemCheckKind::MakeUndefined); + MemCheckKind::MakeUndefined); gcTracer.traceTenuredFinalize(t); } } @@ -3587,8 +3587,8 @@ void GCRuntime::sweepFromBackgroundThread(AutoLockHelperThreadState& lock) { AutoUnlockHelperThreadState unlock(lock); sweepBackgroundThings(zones, freeLifoAlloc); - // The main thread may call queueZonesAndStartBackgroundSweep() while this is - // running so we must check there is no more work after releasing the + // The main thread may call queueZonesAndStartBackgroundSweep() while this + // is running so we must check there is no more work after releasing the // lock. } while (!backgroundSweepZones.ref().isEmpty()); } @@ -3642,8 +3642,7 @@ void GCRuntime::startBackgroundFree() { } void BackgroundFreeTask::run() { - AutoTraceLog logFreeing(TraceLoggerForCurrentThread(), - TraceLogger_GCFree); + AutoTraceLog logFreeing(TraceLoggerForCurrentThread(), TraceLogger_GCFree); AutoLockHelperThreadState lock; @@ -3666,16 +3665,15 @@ void GCRuntime::freeFromBackgroundThread(AutoLockHelperThreadState& lock) { lifoBlocks.freeAll(); - for (Nursery::BufferSet::Range r = buffers.all(); !r.empty(); r.popFront()) { + for (Nursery::BufferSet::Range r = buffers.all(); !r.empty(); + r.popFront()) { rt->defaultFreeOp()->free_(r.front()); } } while (!lifoBlocksToFree.ref().isEmpty() || !buffersToFreeAfterMinorGC.ref().empty()); } -void GCRuntime::waitBackgroundFreeEnd() { - freeTask.join(); -} +void GCRuntime::waitBackgroundFreeEnd() { freeTask.join(); } struct IsAboutToBeFinalizedFunctor { template @@ -6935,8 +6933,8 @@ static bool ShouldCleanUpEverything(JS::gcreason::Reason reason, } static bool ShouldSweepOnBackgroundThread(JS::gcreason::Reason reason) { - return reason != JS::gcreason::DESTROY_RUNTIME && - !gcTracer.traceEnabled() && CanUseExtraThreads(); + return reason != JS::gcreason::DESTROY_RUNTIME && !gcTracer.traceEnabled() && + CanUseExtraThreads(); } void GCRuntime::incrementalSlice(SliceBudget& budget, diff --git a/js/src/gc/Heap.h b/js/src/gc/Heap.h index a8f8703b5ac4..43ce7748da23 100644 --- a/js/src/gc/Heap.h +++ b/js/src/gc/Heap.h @@ -166,8 +166,9 @@ class FreeSpan { return nullptr; // The span is empty. } checkSpan(arena); - DebugOnlyPoison(reinterpret_cast(thing), JS_ALLOCATED_TENURED_PATTERN, - thingSize, MemCheckKind::MakeUndefined); + DebugOnlyPoison(reinterpret_cast(thing), + JS_ALLOCATED_TENURED_PATTERN, thingSize, + MemCheckKind::MakeUndefined); return reinterpret_cast(thing); } @@ -237,7 +238,7 @@ class Arena { */ static const size_t DELAYED_MARKING_FLAG_BITS = 3; static const size_t DELAYED_MARKING_ARENA_BITS = - JS_BITS_PER_WORD - 8 - DELAYED_MARKING_FLAG_BITS; + JS_BITS_PER_WORD - 8 - DELAYED_MARKING_FLAG_BITS; size_t onDelayedMarkingList_ : 1; size_t hasDelayedBlackMarking_ : 1; size_t hasDelayedGrayMarking_ : 1; diff --git a/js/src/gc/Marking.cpp b/js/src/gc/Marking.cpp index 9e4626f8982f..fd69bb5dceb1 100644 --- a/js/src/gc/Marking.cpp +++ b/js/src/gc/Marking.cpp @@ -974,7 +974,7 @@ static bool TraceKindParticipatesInCC(JS::TraceKind kind) { return DispatchTraceKindTyped(ParticipatesInCCFunctor(), kind); } -#endif // DEBUG +#endif // DEBUG template bool js::GCMarker::mark(T* thing) { diff --git a/js/src/gc/Nursery.cpp b/js/src/gc/Nursery.cpp index 88c413a88bf6..adb217fc9b61 100644 --- a/js/src/gc/Nursery.cpp +++ b/js/src/gc/Nursery.cpp @@ -72,8 +72,7 @@ inline void js::NurseryChunk::poisonAndInit(JSRuntime* rt, size_t extent) { MOZ_ASSERT(extent <= ChunkSize); MOZ_MAKE_MEM_UNDEFINED(this, extent); - Poison(this, JS_FRESH_NURSERY_PATTERN, extent, - MemCheckKind::MakeUndefined); + Poison(this, JS_FRESH_NURSERY_PATTERN, extent, MemCheckKind::MakeUndefined); new (&trailer) gc::ChunkTrailer(rt, &rt->gc.storeBuffer()); } @@ -182,9 +181,7 @@ bool js::Nursery::init(uint32_t maxNurseryBytes, AutoLockGCBgAlloc& lock) { return true; } -js::Nursery::~Nursery() { - disable(); -} +js::Nursery::~Nursery() { disable(); } void js::Nursery::enable() { MOZ_ASSERT(isEmpty()); diff --git a/js/src/gc/Nursery.h b/js/src/gc/Nursery.h index 0c95a7583557..cae18fdebfd3 100644 --- a/js/src/gc/Nursery.h +++ b/js/src/gc/Nursery.h @@ -312,8 +312,7 @@ class Nursery { } size_t sizeOfMallocedBuffers(mozilla::MallocSizeOf mallocSizeOf) const { size_t total = 0; - for (BufferSet::Range r = mallocedBuffers.all(); !r.empty(); - r.popFront()) { + for (BufferSet::Range r = mallocedBuffers.all(); !r.empty(); r.popFront()) { total += mallocSizeOf(r.front()); } total += mallocedBuffers.shallowSizeOfExcludingThis(mallocSizeOf); diff --git a/js/src/gc/Statistics.cpp b/js/src/gc/Statistics.cpp index 0eed2468e725..7be1f1480010 100644 --- a/js/src/gc/Statistics.cpp +++ b/js/src/gc/Statistics.cpp @@ -181,8 +181,9 @@ Phase Statistics::lookupChildPhase(PhaseKind phaseKind) const { } if (phase == Phase::NONE) { - MOZ_CRASH_UNSAFE_PRINTF("Child phase kind %u not found under current phase kind %u", - unsigned(phaseKind), unsigned(currentPhaseKind())); + MOZ_CRASH_UNSAFE_PRINTF( + "Child phase kind %u not found under current phase kind %u", + unsigned(phaseKind), unsigned(currentPhaseKind())); } return phase; diff --git a/js/src/jit-test/tests/debug/bug-1192401.js b/js/src/jit-test/tests/debug/bug-1192401.js index 82bc233d7962..d55d22b8e5bd 100644 --- a/js/src/jit-test/tests/debug/bug-1192401.js +++ b/js/src/jit-test/tests/debug/bug-1192401.js @@ -1,3 +1,4 @@ +// |jit-test| --more-compartments const dbg = new Debugger(); const g = evalcx("lazy"); dbg.addDebuggee(g); diff --git a/js/src/jit-test/tests/realms/basic.js b/js/src/jit-test/tests/realms/basic.js index d5a8560ccbf7..215b983da88c 100644 --- a/js/src/jit-test/tests/realms/basic.js +++ b/js/src/jit-test/tests/realms/basic.js @@ -78,3 +78,10 @@ function testTypedArrayLazyBuffer(global) { } testTypedArrayLazyBuffer(newGlobal()); testTypedArrayLazyBuffer(newGlobal({sameCompartmentAs: this})); + +function testEvalcx() { + var g = newGlobal(); + evalcx("this.x = 7", g); + assertEq(g.x, 7); +} +testEvalcx(); diff --git a/js/src/jit/BaselineCompiler.cpp b/js/src/jit/BaselineCompiler.cpp index 7ded43555916..1760bf6cd72e 100644 --- a/js/src/jit/BaselineCompiler.cpp +++ b/js/src/jit/BaselineCompiler.cpp @@ -1740,8 +1740,8 @@ bool BaselineCodeGen::emit_JSOP_FUNCTIONTHIS() { frame.pushThis(); - // In strict mode code or self-hosted functions, |this| is left alone. - if (script->strict() || (function() && function()->isSelfHostedBuiltin())) { + // In strict mode code, |this| is left alone. + if (script->strict()) { return true; } diff --git a/js/src/jit/CodeGenerator.cpp b/js/src/jit/CodeGenerator.cpp index 3ef02b960be0..e893cca3252e 100644 --- a/js/src/jit/CodeGenerator.cpp +++ b/js/src/jit/CodeGenerator.cpp @@ -5413,9 +5413,9 @@ void CodeGenerator::visitDefVar(LDefVar* lir) { JSScript* script = current->mir()->info().script(); jsbytecode* pc = lir->mir()->resumePoint()->pc(); - pushArg(ImmPtr(pc)); // jsbytecode* - pushArg(ImmGCPtr(script)); // JSScript* - pushArg(envChain); // JSObject* + pushArg(ImmPtr(pc)); // jsbytecode* + pushArg(ImmGCPtr(script)); // JSScript* + pushArg(envChain); // JSObject* callVM(DefVarInfo, lir); } diff --git a/js/src/jit/IonBuilder.cpp b/js/src/jit/IonBuilder.cpp index 5291ed2c551e..768fee7d92c9 100644 --- a/js/src/jit/IonBuilder.cpp +++ b/js/src/jit/IonBuilder.cpp @@ -12852,8 +12852,8 @@ AbortReasonOr IonBuilder::jsop_functionthis() { MOZ_ASSERT(info().funMaybeLazy()); MOZ_ASSERT(!info().funMaybeLazy()->isArrow()); - if (script()->strict() || info().funMaybeLazy()->isSelfHostedBuiltin()) { - // No need to wrap primitive |this| in strict mode or self-hosted code. + if (script()->strict()) { + // No need to wrap primitive |this| in strict mode. current->pushSlot(info().thisSlot()); return Ok(); } diff --git a/js/src/jit/JitFrames.cpp b/js/src/jit/JitFrames.cpp index 3a56f65b9542..5d0ecc9f9b75 100644 --- a/js/src/jit/JitFrames.cpp +++ b/js/src/jit/JitFrames.cpp @@ -150,8 +150,7 @@ class IonTryNoteFilter { class TryNoteIterIon : public TryNoteIter { public: TryNoteIterIon(JSContext* cx, const InlineFrameIterator& frame) - : TryNoteIter(cx, frame.script(), frame.pc(), - IonTryNoteFilter(frame)) {} + : TryNoteIter(cx, frame.script(), frame.pc(), IonTryNoteFilter(frame)) {} }; static void HandleExceptionIon(JSContext* cx, const InlineFrameIterator& frame, @@ -323,8 +322,7 @@ class BaselineTryNoteFilter { class TryNoteIterBaseline : public TryNoteIter { public: TryNoteIterBaseline(JSContext* cx, BaselineFrame* frame, jsbytecode* pc) - : TryNoteIter(cx, frame->script(), pc, BaselineTryNoteFilter(frame)) { - } + : TryNoteIter(cx, frame->script(), pc, BaselineTryNoteFilter(frame)) {} }; // Close all live iterators on a BaselineFrame due to exception unwinding. The diff --git a/js/src/jit/arm64/Assembler-arm64.h b/js/src/jit/arm64/Assembler-arm64.h index 0cc882292d96..0c1c39b89a4a 100644 --- a/js/src/jit/arm64/Assembler-arm64.h +++ b/js/src/jit/arm64/Assembler-arm64.h @@ -43,7 +43,7 @@ static constexpr FloatRegister ScratchDoubleReg = {FloatRegisters::d31, FloatRegisters::Double}; struct ScratchDoubleScope : public AutoFloatRegisterScope { explicit ScratchDoubleScope(MacroAssembler& masm) - : AutoFloatRegisterScope(masm, ScratchDoubleReg) {} + : AutoFloatRegisterScope(masm, ScratchDoubleReg) {} }; static constexpr FloatRegister ReturnFloat32Reg = {FloatRegisters::s0, @@ -52,7 +52,7 @@ static constexpr FloatRegister ScratchFloat32Reg = {FloatRegisters::s31, FloatRegisters::Single}; struct ScratchFloat32Scope : public AutoFloatRegisterScope { explicit ScratchFloat32Scope(MacroAssembler& masm) - : AutoFloatRegisterScope(masm, ScratchFloat32Reg) {} + : AutoFloatRegisterScope(masm, ScratchFloat32Reg) {} }; static constexpr Register InvalidReg{Registers::invalid_reg}; diff --git a/js/src/jit/arm64/MacroAssembler-arm64.cpp b/js/src/jit/arm64/MacroAssembler-arm64.cpp index e9c904e39d03..330405583e81 100644 --- a/js/src/jit/arm64/MacroAssembler-arm64.cpp +++ b/js/src/jit/arm64/MacroAssembler-arm64.cpp @@ -493,10 +493,10 @@ void MacroAssembler::storeRegsInMask(LiveRegisterSet set, Address dest, } else { MOZ_CRASH("Unknown register type."); } - } MOZ_ASSERT(numFpu == 0); - // Padding to keep the stack aligned, taken from the x64 and mips64 implementations. + // Padding to keep the stack aligned, taken from the x64 and mips64 + // implementations. diffF -= diffF % sizeof(uintptr_t); MOZ_ASSERT(diffF == 0); } diff --git a/js/src/jsapi.cpp b/js/src/jsapi.cpp index e888f8de21ae..e26cded0732f 100644 --- a/js/src/jsapi.cpp +++ b/js/src/jsapi.cpp @@ -3557,8 +3557,8 @@ CompileOptions& CompileOptions::setIntroductionInfoToCaller( DescribeScriptedCallerForCompilation(cx, &maybeScript, &filename, &lineno, &pcOffset, &mutedErrors); if (filename) { - return setIntroductionInfo(filename, introductionType, lineno, - maybeScript, pcOffset); + return setIntroductionInfo(filename, introductionType, lineno, maybeScript, + pcOffset); } else { return setIntroductionType(introductionType); } diff --git a/js/src/jsfriendapi.cpp b/js/src/jsfriendapi.cpp index 0ce63d9e98ba..8aac0f009ddc 100644 --- a/js/src/jsfriendapi.cpp +++ b/js/src/jsfriendapi.cpp @@ -1328,9 +1328,11 @@ JS_FRIEND_API void js::SetWindowProxy(JSContext* cx, HandleObject global, CHECK_THREAD(cx); cx->check(global, windowProxy); - MOZ_ASSERT(IsWindowProxy(windowProxy)); - global->as().setWindowProxy(windowProxy); + + GlobalObject& globalObj = global->as(); + globalObj.setWindowProxy(windowProxy); + globalObj.lexicalEnvironment().setWindowProxyThisValue(windowProxy); } JS_FRIEND_API JSObject* js::ToWindowIfWindowProxy(JSObject* obj) { diff --git a/js/src/shell/js.cpp b/js/src/shell/js.cpp index ac4563db8969..06ce50c10c13 100644 --- a/js/src/shell/js.cpp +++ b/js/src/shell/js.cpp @@ -3767,6 +3767,13 @@ static void SetStandardRealmOptions(JS::RealmOptions& options) { static JSObject* NewSandbox(JSContext* cx, bool lazy) { JS::RealmOptions options; SetStandardRealmOptions(options); + + if (defaultToSameCompartment) { + options.creationOptions().setExistingCompartment(cx->global()); + } else { + options.creationOptions().setNewCompartmentAndZone(); + } + RootedObject obj(cx, JS_NewGlobalObject(cx, &sandbox_class, nullptr, JS::DontFireOnNewGlobalHook, options)); @@ -3848,17 +3855,13 @@ static bool EvalInContext(JSContext* cx, unsigned argc, Value* vp) { DescribeScriptedCaller(cx, &filename, &lineno); { - Maybe ar; - unsigned flags; - JSObject* unwrapped = UncheckedUnwrap(sobj, true, &flags); - if (flags & Wrapper::CROSS_COMPARTMENT) { - sobj = unwrapped; - ar.emplace(cx, sobj); - } + sobj = UncheckedUnwrap(sobj, true); + + JSAutoRealm ar(cx, sobj); sobj = ToWindowIfWindowProxy(sobj); - if (!(sobj->getClass()->flags & JSCLASS_IS_GLOBAL)) { + if (!JS_IsGlobalObject(sobj)) { JS_ReportErrorASCII(cx, "Invalid scope argument to evalcx"); return false; } diff --git a/js/src/vm/Debugger.cpp b/js/src/vm/Debugger.cpp index 0b2b5efc8e4f..cb53ae7c01e8 100644 --- a/js/src/vm/Debugger.cpp +++ b/js/src/vm/Debugger.cpp @@ -4213,9 +4213,9 @@ static T* findDebuggerInVector(Debugger* dbg, // a ReadBarriered version for findDebuggerInVector // TODO: Bug 1515934 - findDebuggerInVector triggers read barriers. -static ReadBarriered* -findDebuggerInVector(Debugger* dbg, - Vector, 0, js::SystemAllocPolicy>* vec) { +static ReadBarriered* findDebuggerInVector( + Debugger* dbg, + Vector, 0, js::SystemAllocPolicy>* vec) { ReadBarriered* p; for (p = vec->begin(); p != vec->end(); p++) { if (p->unbarrieredGet() == dbg) { diff --git a/js/src/vm/EnvironmentObject.cpp b/js/src/vm/EnvironmentObject.cpp index 6cc412cbb064..e2077a25a62f 100644 --- a/js/src/vm/EnvironmentObject.cpp +++ b/js/src/vm/EnvironmentObject.cpp @@ -1133,17 +1133,20 @@ bool LexicalEnvironmentObject::isExtensible() const { Value LexicalEnvironmentObject::thisValue() const { MOZ_ASSERT(isExtensible()); Value v = getReservedSlot(THIS_VALUE_OR_SCOPE_SLOT); - if (v.isObject()) { - // A WindowProxy may have been attached after this environment was - // created so check ToWindowProxyIfWindow again. For example, - // GlobalObject::createInternal will construct its lexical environment - // before SetWindowProxy can be called. - // See also: js::GetThisValue / js::GetThisValueOfLexical - return ObjectValue(*ToWindowProxyIfWindow(&v.toObject())); - } + + // Windows must never be exposed to script. setWindowProxyThisValue should + // have set this to the WindowProxy. + MOZ_ASSERT_IF(v.isObject(), !IsWindow(&v.toObject())); + return v; } +void LexicalEnvironmentObject::setWindowProxyThisValue(JSObject* obj) { + MOZ_ASSERT(isGlobal()); + MOZ_ASSERT(IsWindowProxy(obj)); + setReservedSlot(THIS_VALUE_OR_SCOPE_SLOT, ObjectValue(*obj)); +} + const Class LexicalEnvironmentObject::class_ = { "LexicalEnvironment", JSCLASS_HAS_RESERVED_SLOTS(LexicalEnvironmentObject::RESERVED_SLOTS) | diff --git a/js/src/vm/EnvironmentObject.h b/js/src/vm/EnvironmentObject.h index 510d277c884e..a9ec2237c658 100644 --- a/js/src/vm/EnvironmentObject.h +++ b/js/src/vm/EnvironmentObject.h @@ -567,6 +567,8 @@ class LexicalEnvironmentObject : public EnvironmentObject { return enclosingEnvironment().as(); } + void setWindowProxyThisValue(JSObject* obj); + // Global and non-syntactic lexical scopes are extensible. All other // lexical scopes are not. bool isExtensible() const; diff --git a/js/src/vm/Interpreter.cpp b/js/src/vm/Interpreter.cpp index 6d81f9389299..cb60b56ad06a 100644 --- a/js/src/vm/Interpreter.cpp +++ b/js/src/vm/Interpreter.cpp @@ -119,12 +119,14 @@ bool js::GetFunctionThis(JSContext* cx, AbstractFramePtr frame, MOZ_ASSERT(frame.isFunctionFrame()); MOZ_ASSERT(!frame.callee()->isArrow()); - if (frame.thisArgument().isObject() || frame.callee()->strict() || - frame.callee()->isSelfHostedBuiltin()) { + if (frame.thisArgument().isObject() || frame.callee()->strict()) { res.set(frame.thisArgument()); return true; } + MOZ_ASSERT(!frame.callee()->isSelfHostedBuiltin(), + "Self-hosted builtins must be strict"); + RootedValue thisv(cx, frame.thisArgument()); // If there is a NSVO on environment chain, use it as basis for fallback @@ -1105,8 +1107,7 @@ class InterpreterTryNoteFilter { } }; -class TryNoteIterInterpreter - : public TryNoteIter { +class TryNoteIterInterpreter : public TryNoteIter { public: TryNoteIterInterpreter(JSContext* cx, const InterpreterRegs& regs) : TryNoteIter(cx, regs.fp()->script(), regs.pc, diff --git a/js/src/vm/Interpreter.h b/js/src/vm/Interpreter.h index 3c5f40eea019..c4e3d2bd4ca5 100644 --- a/js/src/vm/Interpreter.h +++ b/js/src/vm/Interpreter.h @@ -415,7 +415,6 @@ class MOZ_STACK_CLASS TryNoteIter { uint32_t start = tn_->start; uint32_t length = tn_->length; return offset - start < length; - } bool done() const { return tn_ == tnEnd_; } const JSTryNote* operator*() const { return tn_; } diff --git a/js/src/vm/JSScript-inl.h b/js/src/vm/JSScript-inl.h index 3f1b89e90114..052141a40496 100644 --- a/js/src/vm/JSScript-inl.h +++ b/js/src/vm/JSScript-inl.h @@ -119,7 +119,7 @@ inline js::GlobalObject& JSScript::global() const { return *realm()->maybeGlobal(); } -inline bool JSScript::hasGlobal(const js::GlobalObject *global) const { +inline bool JSScript::hasGlobal(const js::GlobalObject* global) const { return global == realm()->unsafeUnbarrieredMaybeGlobal(); } diff --git a/js/src/vm/JSScript.cpp b/js/src/vm/JSScript.cpp index 800b074fc235..173fc50d53c1 100644 --- a/js/src/vm/JSScript.cpp +++ b/js/src/vm/JSScript.cpp @@ -1382,7 +1382,8 @@ ScriptSourceObject* ScriptSourceObject::createInternal(JSContext* cx, obj->initReservedSlot(ELEMENT_SLOT, MagicValue(JS_GENERIC_MAGIC)); obj->initReservedSlot(ELEMENT_PROPERTY_SLOT, MagicValue(JS_GENERIC_MAGIC)); obj->initReservedSlot(INTRODUCTION_SCRIPT_SLOT, MagicValue(JS_GENERIC_MAGIC)); - obj->initReservedSlot(INTRODUCTION_SOURCE_OBJECT_SLOT, MagicValue(JS_GENERIC_MAGIC)); + obj->initReservedSlot(INTRODUCTION_SOURCE_OBJECT_SLOT, + MagicValue(JS_GENERIC_MAGIC)); return obj; } diff --git a/js/src/vm/SavedStacks.cpp b/js/src/vm/SavedStacks.cpp index 9cf2a8e7efe8..5f21b3c6b799 100644 --- a/js/src/vm/SavedStacks.cpp +++ b/js/src/vm/SavedStacks.cpp @@ -1677,8 +1677,8 @@ bool SavedStacks::getLocation(JSContext* cx, const FrameIter& iter, } void SavedStacks::chooseSamplingProbability(Realm* realm) { - // Use unbarriered version to prevent triggering read barrier while collecting, - // this is safe as long as global does not escape. + // Use unbarriered version to prevent triggering read barrier while + // collecting, this is safe as long as global does not escape. GlobalObject* global = realm->unsafeUnbarrieredMaybeGlobal(); if (!global) { return; @@ -1703,8 +1703,7 @@ void SavedStacks::chooseSamplingProbability(Realm* realm) { if (dbgp->trackingAllocationSites && dbgp->enabled) { foundAnyDebuggers = true; - probability = - std::max(dbgp->allocationSamplingProbability, probability); + probability = std::max(dbgp->allocationSamplingProbability, probability); } } MOZ_ASSERT(foundAnyDebuggers); diff --git a/js/src/vm/SelfHosting.cpp b/js/src/vm/SelfHosting.cpp index 796226606a21..647680ba9b36 100644 --- a/js/src/vm/SelfHosting.cpp +++ b/js/src/vm/SelfHosting.cpp @@ -3347,6 +3347,7 @@ bool JSRuntime::cloneSelfHostedFunctionScript(JSContext* cx, MOZ_ASSERT(sourceFun->nargs() == targetFun->nargs()); MOZ_ASSERT(sourceScript->hasRest() == targetFun->nonLazyScript()->hasRest()); + MOZ_ASSERT(targetFun->strict(), "Self-hosted builtins must be strict"); // The target function might have been relazified after its flags changed. targetFun->setFlags(targetFun->flags() | sourceFun->flags()); diff --git a/js/src/wasm/AsmJS.cpp b/js/src/wasm/AsmJS.cpp index dd913ad89eda..b786b26d5be8 100644 --- a/js/src/wasm/AsmJS.cpp +++ b/js/src/wasm/AsmJS.cpp @@ -1325,7 +1325,7 @@ class MOZ_STACK_CLASS JS_HAZ_ROOTED ModuleValidatorShared { using MathNameMap = HashMap; using ArrayViewVector = Vector; -protected: + protected: JSContext* cx_; CodeNode* moduleFunctionNode_; PropertyName* moduleFunctionName_; diff --git a/js/src/wasm/WasmCompile.cpp b/js/src/wasm/WasmCompile.cpp index 40cb0732a57a..f77f88dc4904 100644 --- a/js/src/wasm/WasmCompile.cpp +++ b/js/src/wasm/WasmCompile.cpp @@ -259,8 +259,10 @@ static const double arm64IonBytecodesPerMs = 750; // Estimate static const double x64DesktopTierCutoff = x64IonBytecodesPerMs * tierCutoffMs; static const double x86DesktopTierCutoff = x86IonBytecodesPerMs * tierCutoffMs; static const double x86MobileTierCutoff = x86DesktopTierCutoff / 2; // Guess -static const double arm32MobileTierCutoff = arm32IonBytecodesPerMs * tierCutoffMs; -static const double arm64MobileTierCutoff = arm64IonBytecodesPerMs * tierCutoffMs; +static const double arm32MobileTierCutoff = + arm32IonBytecodesPerMs * tierCutoffMs; +static const double arm64MobileTierCutoff = + arm64IonBytecodesPerMs * tierCutoffMs; static double CodesizeCutoff(SystemClass cls) { switch (cls) { diff --git a/layout/build/nsLayoutModule.cpp b/layout/build/nsLayoutModule.cpp index 29a297280029..c684d3f21464 100644 --- a/layout/build/nsLayoutModule.cpp +++ b/layout/build/nsLayoutModule.cpp @@ -701,12 +701,9 @@ static void LayoutModuleDtor() { xpcModuleDtor(); } -static const mozilla::Module kLayoutModule = {mozilla::Module::kVersion, - kLayoutCIDs, - kLayoutContracts, - kLayoutCategories, - nullptr, - Initialize, - LayoutModuleDtor}; +static const mozilla::Module kLayoutModule = { + mozilla::Module::kVersion, kLayoutCIDs, kLayoutContracts, + kLayoutCategories, nullptr, Initialize, + LayoutModuleDtor}; NSMODULE_DEFN(nsLayoutModule) = &kLayoutModule; diff --git a/layout/generic/nsIFrame.h b/layout/generic/nsIFrame.h index ca3da16def0e..8cfd76b64857 100644 --- a/layout/generic/nsIFrame.h +++ b/layout/generic/nsIFrame.h @@ -14,29 +14,30 @@ #endif #if (defined(XP_WIN) && !defined(HAVE_64BIT_BUILD)) || defined(ANDROID) -// Blink's magic depth limit from its HTML parser (513) plus as much as fits in the -// default run-time stack on armv7 Android on Dalvik when using display: block minus -// a bit just to be sure. The Dalvik default stack crashes at 588. ART can do a few -// frames more. Using the same number for 32-bit Windows for consistency. Over there, -// Blink's magic depth of 513 doesn't fit in the default stack of 1 MB, but this magic -// depth fits when the default is grown by mere 192 KB (tested in 64 KB increments). +// Blink's magic depth limit from its HTML parser (513) plus as much as fits in +// the default run-time stack on armv7 Android on Dalvik when using display: +// block minus a bit just to be sure. The Dalvik default stack crashes at 588. +// ART can do a few frames more. Using the same number for 32-bit Windows for +// consistency. Over there, Blink's magic depth of 513 doesn't fit in the +// default stack of 1 MB, but this magic depth fits when the default is grown by +// mere 192 KB (tested in 64 KB increments). // // 32-bit Windows has a different limit compared to 64-bit desktop, because the -// default stack size affects all threads and consumes address space. Fixing that -// is bug 1257522. +// default stack size affects all threads and consumes address space. Fixing +// that is bug 1257522. // -// 32-bit Android on ARM already happens to have defaults that are close enough to -// what makes sense as a temporary measure on Windows, so adjusting the Android -// stack can be a follow-up. The stack on 64-bit ARM needs adjusting in any case -// before 64-bit ARM can become tier-1. See bug 1400811. +// 32-bit Android on ARM already happens to have defaults that are close enough +// to what makes sense as a temporary measure on Windows, so adjusting the +// Android stack can be a follow-up. The stack on 64-bit ARM needs adjusting in +// any case before 64-bit ARM can become tier-1. See bug 1400811. // -// Ideally, we'd get rid of this smaller limit and make 32-bit Windows and Android -// capable of working with the Linux/Mac/Win64 number below. +// Ideally, we'd get rid of this smaller limit and make 32-bit Windows and +// Android capable of working with the Linux/Mac/Win64 number below. #define MAX_REFLOW_DEPTH 585 #else -// Blink's magic depth limit from its HTML parser times two. Also just about fits -// within the system default runtime stack limit of 8 MB on 64-bit Mac and Linux with -// display: table-cell. +// Blink's magic depth limit from its HTML parser times two. Also just about +// fits within the system default runtime stack limit of 8 MB on 64-bit Mac and +// Linux with display: table-cell. #define MAX_REFLOW_DEPTH 1026 #endif diff --git a/layout/generic/nsTextFrame.cpp b/layout/generic/nsTextFrame.cpp index deed951aa410..82e0ceef5c7f 100644 --- a/layout/generic/nsTextFrame.cpp +++ b/layout/generic/nsTextFrame.cpp @@ -1898,7 +1898,7 @@ bool BuildTextRunsScanner::ContinueTextRunAcrossFrames(nsTextFrame* aFrame1, }; const nsIFrame* ancestor = - nsLayoutUtils::FindNearestCommonAncestorFrame(aFrame1, aFrame2); + nsLayoutUtils::FindNearestCommonAncestorFrame(aFrame1, aFrame2); MOZ_ASSERT(ancestor); // Map inline-end and inline-start to physical sides for checking presence diff --git a/layout/style/nsDOMCSSDeclaration.h b/layout/style/nsDOMCSSDeclaration.h index 42d774d6f0d9..93f2aad5ee08 100644 --- a/layout/style/nsDOMCSSDeclaration.h +++ b/layout/style/nsDOMCSSDeclaration.h @@ -33,7 +33,7 @@ class Rule; namespace dom { class Document; class Element; -} +} // namespace dom struct MutationClosureData { MutationClosureData() : mClosure(nullptr), mElement(nullptr), mModType(0) {} diff --git a/media/libdav1d/asm/moz.build b/media/libdav1d/asm/moz.build index ab6706eeebe5..c1d708b6133e 100644 --- a/media/libdav1d/asm/moz.build +++ b/media/libdav1d/asm/moz.build @@ -21,10 +21,7 @@ CFLAGS += [ '-I%s/dist/include/dav1d/' % TOPOBJDIR, ] -if CONFIG['CC_TYPE'] == 'clang': - CFLAGS += ['-mstack-alignment=32'] -elif CONFIG['CC_TYPE'] == 'gcc': - CFLAGS += ['-mpreferred-stack-boundary=5'] +# This is Linux only for now # Attaching config.asm file if CONFIG['CPU_ARCH'] == 'x86': @@ -32,6 +29,12 @@ if CONFIG['CPU_ARCH'] == 'x86': SOURCES += ['x86_32/config.asm'] if CONFIG['CPU_ARCH'] == 'x86_64': + # Change the default stack aligment (16) to 32 + if CONFIG['CC_TYPE'] == 'clang': + CFLAGS += ['-mstack-alignment=32'] + elif CONFIG['CC_TYPE'] == 'gcc': + CFLAGS += ['-mpreferred-stack-boundary=5'] + if CONFIG['OS_TARGET'] == 'Darwin': ASFLAGS += ['-I%s/media/libdav1d/asm/x86_64/osx/' % TOPSRCDIR] SOURCES += ['x86_64/osx/config.asm'] diff --git a/media/libdav1d/asm/x86_32/config.asm b/media/libdav1d/asm/x86_32/config.asm index b89f5d4fde7f..16624aa533dd 100644 --- a/media/libdav1d/asm/x86_32/config.asm +++ b/media/libdav1d/asm/x86_32/config.asm @@ -5,5 +5,7 @@ %define ARCH_X86_64 0 -%define STACK_ALIGNMENT 32 +%define PIC 1 + +%define STACK_ALIGNMENT 16 diff --git a/media/libdav1d/asm/x86_64/config.asm b/media/libdav1d/asm/x86_64/config.asm index c48ececbb73f..26c26f785c97 100644 --- a/media/libdav1d/asm/x86_64/config.asm +++ b/media/libdav1d/asm/x86_64/config.asm @@ -5,7 +5,5 @@ %define ARCH_X86_64 1 -%define PIC 1 - %define STACK_ALIGNMENT 32 diff --git a/media/libdav1d/config.h b/media/libdav1d/config.h index ebb4b99cc32a..9ad7fac94955 100644 --- a/media/libdav1d/config.h +++ b/media/libdav1d/config.h @@ -36,7 +36,8 @@ #define CONFIG_8BPC 1 // Enable asm -#if ARCH_X86_64 == 1 && defined(__linux__) && !defined(__ANDROID__) +#if (ARCH_x86_32 == 1 || ARCH_X86_64 == 1) && defined(__linux__) && \ + !defined(__ANDROID__) #define HAVE_ASM 1 #else #define HAVE_ASM 0 @@ -53,7 +54,7 @@ #endif // unistd.h is used by tools, which we do not -// built, so we do not really care. +// build, so we do not really care. #define HAVE_UNISTD_H 1 // Important when asm is enabled @@ -61,5 +62,8 @@ #define PREFIX 1 #endif -// aligment is 32 in evry case +#if ARCH_X86_32 == 1 && defined(__linux__) && !defined(__ANDROID__) +#define STACK_ALIGNMENT 16 +#else #define STACK_ALIGNMENT 32 +#endif diff --git a/media/libdav1d/moz.build b/media/libdav1d/moz.build index b1c0fadf2ed6..1984dec3cba2 100644 --- a/media/libdav1d/moz.build +++ b/media/libdav1d/moz.build @@ -25,16 +25,19 @@ SOURCES += [ ] # Enable ASM on Linux for now. -if CONFIG['OS_TARGET'] == 'Linux' and CONFIG['CPU_ARCH'] == 'x86_64': +if CONFIG['OS_TARGET'] == 'Linux' and (CONFIG['CPU_ARCH'] in ('x86', 'x86_64')): + # Default stack aligment is 16 bytes DIRS += ['asm'] - if CONFIG['CC_TYPE'] == 'clang': - CFLAGS += ['-mstack-alignment=32'] - SOURCES['../../third_party/dav1d/src/lib.c'].flags += ['-mstackrealign'] - SOURCES['../../third_party/dav1d/src/thread_task.c'].flags += ['-mstackrealign'] - elif CONFIG['CC_TYPE'] == 'gcc': - CFLAGS += ['-mpreferred-stack-boundary=5'] - SOURCES['../../third_party/dav1d/src/lib.c'].flags += ['-mincoming-stack-boundary=4'] - SOURCES['../../third_party/dav1d/src/thread_task.c'].flags += ['-mincoming-stack-boundary=4'] + if CONFIG['CPU_ARCH'] == 'x86_64': + # Update stack aligment to 32 bytes + if CONFIG['CC_TYPE'] == 'clang': + CFLAGS += ['-mstack-alignment=32'] + SOURCES['../../third_party/dav1d/src/lib.c'].flags += ['-mstackrealign'] + SOURCES['../../third_party/dav1d/src/thread_task.c'].flags += ['-mstackrealign'] + elif CONFIG['CC_TYPE'] == 'gcc': + CFLAGS += ['-mpreferred-stack-boundary=5'] + SOURCES['../../third_party/dav1d/src/lib.c'].flags += ['-mincoming-stack-boundary=4'] + SOURCES['../../third_party/dav1d/src/thread_task.c'].flags += ['-mincoming-stack-boundary=4'] # common sources SOURCES += [ diff --git a/media/libdav1d/moz.yaml b/media/libdav1d/moz.yaml index c360cddff213..3c6111989010 100644 --- a/media/libdav1d/moz.yaml +++ b/media/libdav1d/moz.yaml @@ -20,7 +20,7 @@ origin: # Human-readable identifier for this version/release # Generally "version NNN", "tag SSS", "bookmark SSS" - release: commit b53a99b97f93d0eb15d1f532739ca062fe44b4ca + release: commit f813285c1d1a5421e0180efbb7cbdd377cd31c69 (2019-01-13T22:08:25.000Z). # The package's license, where possible using the mnemonic from # https://spdx.org/licenses/ diff --git a/mobile/android/components/geckoview/GeckoViewHistory.h b/mobile/android/components/geckoview/GeckoViewHistory.h index d10001af3d1d..b12681633dfc 100644 --- a/mobile/android/components/geckoview/GeckoViewHistory.h +++ b/mobile/android/components/geckoview/GeckoViewHistory.h @@ -21,7 +21,7 @@ namespace mozilla { namespace dom { class Document; } -} +} // namespace mozilla struct VisitedURI { nsCOMPtr mURI; diff --git a/modules/libpref/init/all.js b/modules/libpref/init/all.js index fe2f474329e2..923df8f160e2 100644 --- a/modules/libpref/init/all.js +++ b/modules/libpref/init/all.js @@ -1905,11 +1905,10 @@ pref("network.http.rcwn.max_wait_before_racing_ms", 500); // all available active connections. pref("network.http.focused_window_transaction_ratio", "0.9"); -// XXX Disable for intranet downloading issue. // This is the size of the flow control window (KB) (i.e., the amount of data // that the parent can send to the child before getting an ack). 0 for disable // the flow control. -pref("network.http.send_window_size", 0); +pref("network.http.send_window_size", 1024); // Whether or not we give more priority to active tab. // Note that this requires restart for changes to take effect. diff --git a/netwerk/base/nsIOService.cpp b/netwerk/base/nsIOService.cpp index f5c3e44e78b3..3d4eb8c6ef77 100644 --- a/netwerk/base/nsIOService.cpp +++ b/netwerk/base/nsIOService.cpp @@ -449,7 +449,8 @@ void nsIOService::NotifySocketProcessPrefsChanged(const char *aName) { dom::Pref pref(nsCString(aName), /* isLocked */ false, null_t(), null_t()); Preferences::GetPreference(&pref); auto sendPrefUpdate = [pref]() { - Unused << gIOService->mSocketProcess->GetActor()->SendPreferenceUpdate(pref); + Unused << gIOService->mSocketProcess->GetActor()->SendPreferenceUpdate( + pref); }; CallOrWaitForSocketProcess(sendPrefUpdate); } @@ -469,13 +470,14 @@ void nsIOService::OnProcessLaunchComplete(SocketProcessHost *aHost, if (!mPendingEvents.IsEmpty()) { nsTArray> pendingEvents; mPendingEvents.SwapElements(pendingEvents); - for (auto& func : pendingEvents) { + for (auto &func : pendingEvents) { func(); } } } -void nsIOService::CallOrWaitForSocketProcess(const std::function& aFunc) { +void nsIOService::CallOrWaitForSocketProcess( + const std::function &aFunc) { MOZ_ASSERT(NS_IsMainThread()); if (IsSocketProcessLaunchComplete() && SocketProcessReady()) { aFunc(); @@ -499,9 +501,9 @@ void nsIOService::OnProcessUnexpectedShutdown(SocketProcessHost *aHost) { RefPtr nsIOService::GetSocketProcessMemoryReporter() { // Check the prefs here again, since we don't want to create // SocketProcessMemoryReporter for some tests. - if (!Preferences::GetBool("network.process.enabled") || !SocketProcessReady()) { + if (!Preferences::GetBool("network.process.enabled") || + !SocketProcessReady()) { return nullptr; - } return new SocketProcessMemoryReporter(); diff --git a/netwerk/ipc/SocketProcessHost.cpp b/netwerk/ipc/SocketProcessHost.cpp index 63490c025a05..fb547bca77a0 100644 --- a/netwerk/ipc/SocketProcessHost.cpp +++ b/netwerk/ipc/SocketProcessHost.cpp @@ -62,8 +62,8 @@ class OfflineObserver final : public nsIObserver { } else if (!strcmp(aTopic, NS_XPCOM_WILL_SHUTDOWN_OBSERVER_ID)) { nsCOMPtr obs = mozilla::services::GetObserverService(); - obs->RemoveObserver(this, NS_IPC_IOSERVICE_SET_OFFLINE_TOPIC); - obs->RemoveObserver(this, NS_XPCOM_WILL_SHUTDOWN_OBSERVER_ID); + obs->RemoveObserver(this, NS_IPC_IOSERVICE_SET_OFFLINE_TOPIC); + obs->RemoveObserver(this, NS_XPCOM_WILL_SHUTDOWN_OBSERVER_ID); } return NS_OK; diff --git a/netwerk/protocol/http/HttpChannelChild.cpp b/netwerk/protocol/http/HttpChannelChild.cpp index 9553ee1084ba..5fd128351070 100644 --- a/netwerk/protocol/http/HttpChannelChild.cpp +++ b/netwerk/protocol/http/HttpChannelChild.cpp @@ -880,7 +880,11 @@ void HttpChannelChild::OnTransportAndData(const nsresult& channelStatus, bool HttpChannelChild::NeedToReportBytesRead() { if (mCacheNeedToReportBytesReadInitialized) { - return mNeedToReportBytesRead; + // No need to send SendRecvBytes when diversion starts since the parent + // process will suspend for diversion triggered in during OnStrartRequest at + // child side, which is earlier. Parent will take over the flow control + // after the diverting starts. Sending |SendBytesRead| is redundant. + return mNeedToReportBytesRead && !mDivertingToParent; } // Might notify parent for partial cache, and the IPC message is ignored by diff --git a/netwerk/protocol/http/HttpChannelParent.cpp b/netwerk/protocol/http/HttpChannelParent.cpp index 000bb854c1f5..d3eae115e90b 100644 --- a/netwerk/protocol/http/HttpChannelParent.cpp +++ b/netwerk/protocol/http/HttpChannelParent.cpp @@ -1609,6 +1609,7 @@ HttpChannelParent::OnDataAvailable(nsIRequest* aRequest, nsISupports* aContext, // We're going to run out of sending window size if (mSendWindowSize > 0 && mSendWindowSize <= count) { MOZ_ASSERT(!mSuspendedForFlowControl); + LOG((" suspend the channel due to e10s backpressure")); Unused << mChannel->Suspend(); mSuspendedForFlowControl = true; mHasSuspendedByBackPressure = true; @@ -1652,7 +1653,8 @@ bool HttpChannelParent::NeedFlowControl() { mozilla::ipc::IPCResult HttpChannelParent::RecvBytesRead( const int32_t& aCount) { - if (!NeedFlowControl()) { + // no more flow control after diviersion starts + if (!NeedFlowControl() || mDivertingFromChild) { return IPC_OK(); } @@ -1661,6 +1663,7 @@ mozilla::ipc::IPCResult HttpChannelParent::RecvBytesRead( if (mSendWindowSize <= 0 && mSendWindowSize + aCount > 0) { MOZ_ASSERT(mSuspendedForFlowControl); + LOG((" resume the channel due to e10s backpressure relief")); Unused << mChannel->Resume(); mSuspendedForFlowControl = false; @@ -2076,6 +2079,14 @@ nsresult HttpChannelParent::SuspendForDiversion() { rv = mParentListener->SuspendForDiversion(); MOZ_ASSERT(NS_SUCCEEDED(rv)); + // After we suspend for diversion, we don't need the flow control since the + // channel is suspended until all the data is consumed and no more e10s later. + // No point to have another redundant suspension. + if (mSuspendedForFlowControl) { + Unused << mChannel->Resume(); + mSuspendedForFlowControl = false; + } + // Once this is set, no more OnStart/OnData/OnStop callbacks should be sent // to the child. mDivertingFromChild = true; diff --git a/parser/htmlparser/nsParserModule.cpp b/parser/htmlparser/nsParserModule.cpp index a60d0e56233f..39e4c86841d5 100644 --- a/parser/htmlparser/nsParserModule.cpp +++ b/parser/htmlparser/nsParserModule.cpp @@ -15,8 +15,7 @@ NS_GENERIC_FACTORY_CONSTRUCTOR(nsParser) NS_DEFINE_NAMED_CID(NS_PARSER_CID); static const mozilla::Module::CIDEntry kParserCIDs[] = { - {&kNS_PARSER_CID, false, nullptr, nsParserConstructor}, - {nullptr}}; + {&kNS_PARSER_CID, false, nullptr, nsParserConstructor}, {nullptr}}; static nsresult Initialize() { nsresult rv = nsHTMLTags::AddRefTable(); diff --git a/services/common/docs/RemoteSettings.rst b/services/common/docs/RemoteSettings.rst index 12a546501ed4..81938df92f8c 100644 --- a/services/common/docs/RemoteSettings.rst +++ b/services/common/docs/RemoteSettings.rst @@ -150,6 +150,27 @@ And once done: #. Wait for Firefox to pick-up the changes for your settings key +Global Notifications +==================== + +The polling for changes process sends two notifications that observers can register to: + +* ``remote-settings:changes-poll-start``: Polling for changes is starting. triggered either by the scheduled timer or a push broadcast. +* ``remote-settings:changes-poll-end``: Polling for changes has ended + +.. code-block:: javascript + + const observer = { + observe(aSubject, aTopic, aData) { + Services.obs.removeObserver(this, "remote-settings:changes-poll-start"); + + const { expectedTimestamp } = JSON.parse(aData); + console.log("Polling started", expectedTimestamp ? "from push broadcast" : "by scheduled trigger"); + }, + }; + Services.obs.addObserver(observer, "remote-settings:changes-poll-start"); + + Advanced Options ================ diff --git a/services/settings/remote-settings.js b/services/settings/remote-settings.js index 770d0dd97d9d..d6ac8e88053d 100644 --- a/services/settings/remote-settings.js +++ b/services/settings/remote-settings.js @@ -168,6 +168,8 @@ function remoteSettingsFunction() { } } + Services.obs.notifyObservers(null, "remote-settings:changes-poll-start", JSON.stringify({ expectedTimestamp })); + const lastEtag = gPrefs.getCharPref(PREF_SETTINGS_LAST_ETAG, ""); let pollResult; @@ -226,6 +228,7 @@ function remoteSettingsFunction() { // the one in the local database. try { await client.maybeSync(last_modified, { loadDump }); + // Save last time this client was successfully synced. Services.prefs.setIntPref(client.lastCheckTimePref, checkedServerTimeInSeconds); } catch (e) { @@ -245,7 +248,7 @@ function remoteSettingsFunction() { gPrefs.setCharPref(PREF_SETTINGS_LAST_ETAG, currentEtag); } - Services.obs.notifyObservers(null, "remote-settings-changes-polled"); + Services.obs.notifyObservers(null, "remote-settings:changes-poll-end"); }; /** diff --git a/services/settings/test/unit/test_remote_settings_poll.js b/services/settings/test/unit/test_remote_settings_poll.js index 4ed434624ee8..8a422f179ab0 100644 --- a/services/settings/test/unit/test_remote_settings_poll.js +++ b/services/settings/test/unit/test_remote_settings_poll.js @@ -60,6 +60,30 @@ function run_test() { add_task(clear_state); + +add_task(async function test_an_event_is_sent_on_start() { + server.registerPathHandler(CHANGES_PATH, (request, response) => { + response.write(JSON.stringify({ data: [] })); + response.setHeader("ETag", '"42"'); + response.setHeader("Date", (new Date()).toUTCString()); + response.setStatusLine(null, 200, "OK"); + }); + let notificationObserved = null; + const observer = { + observe(aSubject, aTopic, aData) { + Services.obs.removeObserver(this, "remote-settings:changes-poll-start"); + notificationObserved = JSON.parse(aData); + }, + }; + Services.obs.addObserver(observer, "remote-settings:changes-poll-start"); + + await RemoteSettings.pollChanges({ expectedTimestamp: 13 }); + + Assert.equal(notificationObserved.expectedTimestamp, 13, "start notification should have been observed"); +}); +add_task(clear_state); + + add_task(async function test_check_success() { const startHistogram = getUptakeTelemetrySnapshot(TELEMETRY_HISTOGRAM_KEY); const serverTime = 8000; @@ -86,15 +110,15 @@ add_task(async function test_check_success() { let maybeSyncCalled = false; c.maybeSync = () => { maybeSyncCalled = true; }; - // Ensure that the remote-settings-changes-polled notification works + // Ensure that the remote-settings:changes-poll-end notification works let notificationObserved = false; const observer = { observe(aSubject, aTopic, aData) { - Services.obs.removeObserver(this, "remote-settings-changes-polled"); + Services.obs.removeObserver(this, "remote-settings:changes-poll-end"); notificationObserved = true; }, }; - Services.obs.addObserver(observer, "remote-settings-changes-polled"); + Services.obs.addObserver(observer, "remote-settings:changes-poll-end"); await RemoteSettings.pollChanges(); @@ -131,7 +155,7 @@ add_task(async function test_update_timer_interface() { }])); await new Promise((resolve) => { - const e = "remote-settings-changes-polled"; + const e = "remote-settings:changes-poll-end"; const changesPolledObserver = { observe(aSubject, aTopic, aData) { Services.obs.removeObserver(this, e); @@ -164,15 +188,15 @@ add_task(async function test_check_up_to_date() { Services.prefs.setCharPref(PREF_LAST_ETAG, '"1100"'); - // Ensure that the remote-settings-changes-polled notification is sent. + // Ensure that the remote-settings:changes-poll-end notification is sent. let notificationObserved = false; const observer = { observe(aSubject, aTopic, aData) { - Services.obs.removeObserver(this, "remote-settings-changes-polled"); + Services.obs.removeObserver(this, "remote-settings:changes-poll-end"); notificationObserved = true; }, }; - Services.obs.addObserver(observer, "remote-settings-changes-polled"); + Services.obs.addObserver(observer, "remote-settings:changes-poll-end"); // If server has no change, a 304 is received, maybeSync() is not called. let maybeSyncCalled = false; @@ -257,6 +281,7 @@ add_task(async function test_client_last_check_is_saved() { }); add_task(clear_state); + add_task(async function test_success_with_partial_list() { function partialList(request, response) { const entries = [{ @@ -353,11 +378,11 @@ add_task(async function test_server_error() { let notificationObserved = false; const observer = { observe(aSubject, aTopic, aData) { - Services.obs.removeObserver(this, "remote-settings-changes-polled"); + Services.obs.removeObserver(this, "remote-settings:changes-poll-end"); notificationObserved = true; }, }; - Services.obs.addObserver(observer, "remote-settings-changes-polled"); + Services.obs.addObserver(observer, "remote-settings:changes-poll-end"); Services.prefs.setIntPref(PREF_LAST_UPDATE, 42); // pollChanges() fails with adequate error and no notification. diff --git a/storage/mozStorageConnection.cpp b/storage/mozStorageConnection.cpp index c2dbf951f238..f35a5fc712ef 100644 --- a/storage/mozStorageConnection.cpp +++ b/storage/mozStorageConnection.cpp @@ -603,8 +603,10 @@ nsresult Connection::initialize() { } #ifdef MOZ_SQLITE_FTS3_TOKENIZER - srv = ::sqlite3_db_config(mDBConn, SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER, 1, 0); - MOZ_ASSERT(srv == SQLITE_OK, "SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER should be enabled"); + srv = + ::sqlite3_db_config(mDBConn, SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER, 1, 0); + MOZ_ASSERT(srv == SQLITE_OK, + "SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER should be enabled"); #endif // Do not set mDatabaseFile or mFileURL here since this is a "memory" @@ -642,8 +644,10 @@ nsresult Connection::initialize(nsIFile *aDatabaseFile) { } #ifdef MOZ_SQLITE_FTS3_TOKENIZER - srv = ::sqlite3_db_config(mDBConn, SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER, 1, 0); - MOZ_ASSERT(srv == SQLITE_OK, "SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER should be enabled"); + srv = + ::sqlite3_db_config(mDBConn, SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER, 1, 0); + MOZ_ASSERT(srv == SQLITE_OK, + "SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER should be enabled"); #endif // Do not set mFileURL here since this is database does not have an associated @@ -676,8 +680,10 @@ nsresult Connection::initialize(nsIFileURL *aFileURL) { } #ifdef MOZ_SQLITE_FTS3_TOKENIZER - srv = ::sqlite3_db_config(mDBConn, SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER, 1, 0); - MOZ_ASSERT(srv == SQLITE_OK, "SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER should be enabled"); + srv = + ::sqlite3_db_config(mDBConn, SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER, 1, 0); + MOZ_ASSERT(srv == SQLITE_OK, + "SQLITE_DBCONFIG_ENABLE_FTS3_TOKENIZER should be enabled"); #endif // Set both mDatabaseFile and mFileURL here. diff --git a/testing/geckodriver/src/marionette.rs b/testing/geckodriver/src/marionette.rs index cd8000ba7f29..4ed383e36d60 100644 --- a/testing/geckodriver/src/marionette.rs +++ b/testing/geckodriver/src/marionette.rs @@ -20,10 +20,10 @@ use std::sync::Mutex; use std::thread; use std::time; use webdriver::capabilities::CapabilitiesMatching; -use webdriver::command::WebDriverCommand::{AcceptAlert, AddCookie, CloseWindow, DeleteCookie, - DeleteCookies, DeleteSession, DismissAlert, - ElementClear, ElementClick, ElementSendKeys, - ExecuteAsyncScript, ExecuteScript, +use webdriver::command::WebDriverCommand::{AcceptAlert, AddCookie, NewWindow, CloseWindow, + DeleteCookie, DeleteCookies, DeleteSession, + DismissAlert, ElementClear, ElementClick, + ElementSendKeys, ExecuteAsyncScript, ExecuteScript, Extension, FindElement, FindElementElement, FindElementElements, FindElements, FullscreenWindow, Get, GetActiveElement, GetAlertText, GetCSSValue, @@ -41,11 +41,11 @@ use webdriver::command::WebDriverCommand::{AcceptAlert, AddCookie, CloseWindow, use webdriver::command::{ActionsParameters, AddCookieParameters, GetNamedCookieParameters, GetParameters, JavascriptCommandParameters, LocatorParameters, NewSessionParameters, SwitchToFrameParameters, SwitchToWindowParameters, - TimeoutsParameters, WindowRectParameters}; + TimeoutsParameters, WindowRectParameters, NewWindowParameters}; use webdriver::command::{WebDriverCommand, WebDriverMessage}; use webdriver::common::{Cookie, FrameId, WebElement, ELEMENT_KEY, FRAME_KEY, WINDOW_KEY}; use webdriver::error::{ErrorStatus, WebDriverError, WebDriverResult}; -use webdriver::response::{CloseWindowResponse, CookieResponse, CookiesResponse, +use webdriver::response::{NewWindowResponse, CloseWindowResponse, CookieResponse, CookiesResponse, ElementRectResponse, NewSessionResponse, TimeoutsResponse, ValueResponse, WebDriverResponse, WindowRectResponse}; use webdriver::server::{Session, WebDriverHandler}; @@ -522,6 +522,28 @@ impl MarionetteSession { } Status => panic!("Got status command that should already have been handled"), GetWindowHandles => WebDriverResponse::Generic(resp.to_value_response(false)?), + NewWindow(_) => { + let handle: String = try_opt!( + try_opt!( + resp.result.get("handle"), + ErrorStatus::UnknownError, + "Failed to find handle field" + ).as_str(), + ErrorStatus::UnknownError, + "Failed to interpret handle as string" + ).into(); + let typ: String = try_opt!( + try_opt!( + resp.result.get("type"), + ErrorStatus::UnknownError, + "Failed to find type field" + ).as_str(), + ErrorStatus::UnknownError, + "Failed to interpret type as string" + ).into(); + + WebDriverResponse::NewWindow(NewWindowResponse { handle, typ }) + } CloseWindow => { let data = try_opt!( resp.result.as_array(), @@ -788,6 +810,7 @@ impl MarionetteCommand { (Some("WebDriver:AcceptDialog"), None) } AddCookie(ref x) => (Some("WebDriver:AddCookie"), Some(x.to_marionette())), + NewWindow(ref x) => (Some("WebDriver:NewWindow"), Some(x.to_marionette())), CloseWindow => (Some("WebDriver:CloseWindow"), None), DeleteCookie(ref x) => { let mut data = Map::new(); @@ -1425,6 +1448,16 @@ impl ToMarionette for LocatorParameters { } } +impl ToMarionette for NewWindowParameters { + fn to_marionette(&self) -> WebDriverResult> { + let mut data = Map::new(); + if let Some(ref x) = self.type_hint { + data.insert("type".to_string(), serde_json::to_value(x)?); + } + Ok(data) + } +} + impl ToMarionette for SwitchToFrameParameters { fn to_marionette(&self) -> WebDriverResult> { let mut data = Map::new(); diff --git a/testing/web-platform/meta/mediacapture-streams/MediaStream-default-feature-policy.https.html.ini b/testing/web-platform/meta/mediacapture-streams/MediaStream-default-feature-policy.https.html.ini index 2405cb2b0ea4..531d3cd3cdf3 100644 --- a/testing/web-platform/meta/mediacapture-streams/MediaStream-default-feature-policy.https.html.ini +++ b/testing/web-platform/meta/mediacapture-streams/MediaStream-default-feature-policy.https.html.ini @@ -5,7 +5,13 @@ [Default "camera" feature policy ["self"\] disallows cross-origin iframes.] expected: FAIL - [Default "camera; microphone" feature policy ["self"\] disallows cross-origin iframes.] + [Default "camera;microphone" feature policy ["self"\] disallows cross-origin iframes.] + expected: FAIL + + [Feature policy "microphone" can be enabled in cross-origin iframes using "allow" attribute.] + expected: FAIL + + [Feature policy "camera" can be enabled in cross-origin iframes using "allow" attribute.] expected: FAIL @@ -16,6 +22,11 @@ [Default "camera" feature policy ["self"\] disallows cross-origin iframes.] expected: FAIL - [Default "camera; microphone" feature policy ["self"\] disallows cross-origin iframes.] + [Default "camera;microphone" feature policy ["self"\] disallows cross-origin iframes.] expected: FAIL + [Feature policy "microphone" can be enabled in cross-origin iframes using "allow" attribute.] + expected: FAIL + + [Feature policy "camera" can be enabled in cross-origin iframes using "allow" attribute.] + expected: FAIL diff --git a/testing/web-platform/meta/webdriver/tests/new_window/new_window.py.ini b/testing/web-platform/meta/webdriver/tests/new_window/new_window.py.ini new file mode 100644 index 000000000000..e1e2e0f51dfa --- /dev/null +++ b/testing/web-platform/meta/webdriver/tests/new_window/new_window.py.ini @@ -0,0 +1,2 @@ +[new_window.py] + disabled: os == "android": Fennec doesn't support opening new windows diff --git a/testing/web-platform/tests/feature-policy/resources/featurepolicy.js b/testing/web-platform/tests/feature-policy/resources/featurepolicy.js index 38ae4184096b..e0f0faf6ca2c 100644 --- a/testing/web-platform/tests/feature-policy/resources/featurepolicy.js +++ b/testing/web-platform/tests/feature-policy/resources/featurepolicy.js @@ -82,7 +82,7 @@ function test_feature_availability_with_post_message_result( // tests the feature availability and posts the result back to the parent. // Otherwise, does nothing. function test_feature_in_iframe(feature_name, feature_promise_factory) { - if (location.hash.includes(feature_name)) { + if (location.hash.endsWith(`#${feature_name}`)) { feature_promise_factory().then( () => window.parent.postMessage('#OK', '*'), (e) => window.parent.postMessage('#' + e.name, '*')); diff --git a/testing/web-platform/tests/mediacapture-streams/MediaStream-default-feature-policy.https.html b/testing/web-platform/tests/mediacapture-streams/MediaStream-default-feature-policy.https.html index 21e3f5b9af85..2e38b9e6864d 100644 --- a/testing/web-platform/tests/mediacapture-streams/MediaStream-default-feature-policy.https.html +++ b/testing/web-platform/tests/mediacapture-streams/MediaStream-default-feature-policy.https.html @@ -7,56 +7,73 @@ diff --git a/testing/web-platform/tests/webdriver/tests/new_window/__init__.py b/testing/web-platform/tests/webdriver/tests/new_window/__init__.py new file mode 100644 index 000000000000..e16014597cf8 --- /dev/null +++ b/testing/web-platform/tests/webdriver/tests/new_window/__init__.py @@ -0,0 +1,10 @@ +def opener(session): + return session.execute_script(""" + return window.opener; + """) + + +def window_name(session): + return session.execute_script(""" + return window.name; + """) diff --git a/testing/web-platform/tests/webdriver/tests/new_window/new.py b/testing/web-platform/tests/webdriver/tests/new_window/new.py new file mode 100644 index 000000000000..0abdeac614e2 --- /dev/null +++ b/testing/web-platform/tests/webdriver/tests/new_window/new.py @@ -0,0 +1,52 @@ +import pytest + +from webdriver.transport import Response + +from tests.support.asserts import assert_error, assert_success + + +def new_window(session, type_hint=None): + return session.transport.send( + "POST", "session/{session_id}/window/new".format(**vars(session)), + {"type": type_hint}) + + +def test_null_parameter_value(session, http): + path = "/session/{session_id}/window/new".format(**vars(session)) + with http.post(path, None) as response: + assert_error(Response.from_http(response), "invalid argument") + + +def test_no_browsing_context(session, closed_window): + response = new_window(session) + assert_error(response, "no such window") + + +@pytest.mark.parametrize("type_hint", [True, 42, 4.2, [], {}]) +def test_type_with_invalid_type(session, type_hint): + response = new_window(session, type_hint) + assert_error(response, "invalid argument") + + +def test_type_with_null_value(session): + original_handles = session.handles + + response = new_window(session, type_hint=None) + value = assert_success(response) + handles = session.handles + assert len(handles) == len(original_handles) + 1 + assert value["handle"] in handles + assert value["handle"] not in original_handles + assert value["type"] in ["tab", "window"] + + +def test_type_with_unknown_value(session): + original_handles = session.handles + + response = new_window(session, type_hint="foo") + value = assert_success(response) + handles = session.handles + assert len(handles) == len(original_handles) + 1 + assert value["handle"] in handles + assert value["handle"] not in original_handles + assert value["type"] in ["tab", "window"] diff --git a/testing/web-platform/tests/webdriver/tests/new_window/new_tab.py b/testing/web-platform/tests/webdriver/tests/new_window/new_tab.py new file mode 100644 index 000000000000..fbb249fd7836 --- /dev/null +++ b/testing/web-platform/tests/webdriver/tests/new_window/new_tab.py @@ -0,0 +1,48 @@ +from tests.support.asserts import assert_success + +from . import opener, window_name + + +def new_window(session, type_hint=None): + return session.transport.send( + "POST", "session/{session_id}/window/new".format(**vars(session)), + {"type": type_hint}) + + +def test_new_tab(session): + original_handles = session.handles + + response = new_window(session, type_hint="tab") + value = assert_success(response) + handles = session.handles + assert len(handles) == len(original_handles) + 1 + assert value["handle"] in handles + assert value["handle"] not in original_handles + assert value["type"] == "tab" + + +def test_new_tab_opens_about_blank(session): + response = new_window(session, type_hint="tab") + value = assert_success(response) + assert value["type"] == "tab" + + session.handle = value["handle"] + assert session.url == "about:blank" + + +def test_new_tab_sets_no_window_name(session): + response = new_window(session, type_hint="tab") + value = assert_success(response) + assert value["type"] == "tab" + + session.handle = value["handle"] + assert window_name(session) == "" + + +def test_new_tab_sets_no_opener(session): + response = new_window(session, type_hint="tab") + value = assert_success(response) + assert value["type"] == "tab" + + session.handle = value["handle"] + assert opener(session) is None diff --git a/testing/web-platform/tests/webdriver/tests/new_window/new_window.py b/testing/web-platform/tests/webdriver/tests/new_window/new_window.py new file mode 100644 index 000000000000..eb4db6729eca --- /dev/null +++ b/testing/web-platform/tests/webdriver/tests/new_window/new_window.py @@ -0,0 +1,48 @@ +from tests.support.asserts import assert_success + +from . import opener, window_name + + +def new_window(session, type_hint=None): + return session.transport.send( + "POST", "session/{session_id}/window/new".format(**vars(session)), + {"type": type_hint}) + + +def test_type_with_window(session): + original_handles = session.handles + + response = new_window(session, type_hint="window") + value = assert_success(response) + handles = session.handles + assert len(handles) == len(original_handles) + 1 + assert value["handle"] in handles + assert value["handle"] not in original_handles + assert value["type"] == "window" + + +def test_new_window_opens_about_blank(session): + response = new_window(session, type_hint="window") + value = assert_success(response) + assert value["type"] == "window" + + session.handle = value["handle"] + assert session.url == "about:blank" + + +def test_new_window_sets_no_window_name(session): + response = new_window(session, type_hint="window") + value = assert_success(response) + assert value["type"] == "window" + + session.handle = value["handle"] + assert window_name(session) == "" + + +def test_new_window_sets_no_opener(session): + response = new_window(session, type_hint="window") + value = assert_success(response) + assert value["type"] == "window" + + session.handle = value["handle"] + assert opener(session) is None diff --git a/testing/web-platform/tests/webdriver/tests/new_window/user_prompts.py b/testing/web-platform/tests/webdriver/tests/new_window/user_prompts.py new file mode 100644 index 000000000000..0d841468ee48 --- /dev/null +++ b/testing/web-platform/tests/webdriver/tests/new_window/user_prompts.py @@ -0,0 +1,121 @@ +# META: timeout=long + +import pytest + +from tests.support.asserts import assert_dialog_handled, assert_error, assert_success + + +def new_window(session, type_hint=None): + return session.transport.send( + "POST", "session/{session_id}/window/new".format(**vars(session)), + {"type": type_hint}) + + +@pytest.fixture +def check_user_prompt_closed_without_exception(session, create_dialog): + def check_user_prompt_closed_without_exception(dialog_type, retval): + original_handles = session.handles + + create_dialog(dialog_type, text=dialog_type) + + response = new_window(session) + value = assert_success(response) + + handles = session.handles + assert len(handles) == len(original_handles) + 1 + assert value["handle"] in handles + assert value["handle"] not in original_handles + + assert_dialog_handled(session, expected_text=dialog_type, expected_retval=retval) + + return check_user_prompt_closed_without_exception + + +@pytest.fixture +def check_user_prompt_closed_with_exception(session, create_dialog): + def check_user_prompt_closed_with_exception(dialog_type, retval): + original_handles = session.handles + + create_dialog(dialog_type, text=dialog_type) + + response = new_window(session) + assert_error(response, "unexpected alert open") + + assert_dialog_handled(session, expected_text=dialog_type, expected_retval=retval) + + assert len(session.handles) == len(original_handles) + + return check_user_prompt_closed_with_exception + + +@pytest.fixture +def check_user_prompt_not_closed_but_exception(session, create_dialog): + def check_user_prompt_not_closed_but_exception(dialog_type): + original_handles = session.handles + + create_dialog(dialog_type, text=dialog_type) + + response = new_window(session) + assert_error(response, "unexpected alert open") + + assert session.alert.text == dialog_type + session.alert.dismiss() + + assert len(session.handles) == len(original_handles) + + return check_user_prompt_not_closed_but_exception + + +@pytest.mark.capabilities({"unhandledPromptBehavior": "accept"}) +@pytest.mark.parametrize("dialog_type, retval", [ + ("alert", None), + ("confirm", True), + ("prompt", ""), +]) +def test_accept(check_user_prompt_closed_without_exception, dialog_type, retval): + check_user_prompt_closed_without_exception(dialog_type, retval) + + +@pytest.mark.capabilities({"unhandledPromptBehavior": "accept and notify"}) +@pytest.mark.parametrize("dialog_type, retval", [ + ("alert", None), + ("confirm", True), + ("prompt", ""), +]) +def test_accept_and_notify(check_user_prompt_closed_with_exception, dialog_type, retval): + check_user_prompt_closed_with_exception(dialog_type, retval) + + +@pytest.mark.capabilities({"unhandledPromptBehavior": "dismiss"}) +@pytest.mark.parametrize("dialog_type, retval", [ + ("alert", None), + ("confirm", False), + ("prompt", None), +]) +def test_dismiss(check_user_prompt_closed_without_exception, dialog_type, retval): + check_user_prompt_closed_without_exception(dialog_type, retval) + + +@pytest.mark.capabilities({"unhandledPromptBehavior": "dismiss and notify"}) +@pytest.mark.parametrize("dialog_type, retval", [ + ("alert", None), + ("confirm", False), + ("prompt", None), +]) +def test_dismiss_and_notify(check_user_prompt_closed_with_exception, dialog_type, retval): + check_user_prompt_closed_with_exception(dialog_type, retval) + + +@pytest.mark.capabilities({"unhandledPromptBehavior": "ignore"}) +@pytest.mark.parametrize("dialog_type", ["alert", "confirm", "prompt"]) +def test_ignore(check_user_prompt_not_closed_but_exception, dialog_type): + check_user_prompt_not_closed_but_exception(dialog_type) + + +@pytest.mark.parametrize("dialog_type, retval", [ + ("alert", None), + ("confirm", False), + ("prompt", None), +]) +def test_default(check_user_prompt_closed_with_exception, dialog_type, retval): + check_user_prompt_closed_with_exception(dialog_type, retval) diff --git a/testing/webdriver/src/command.rs b/testing/webdriver/src/command.rs index 4ec200f4647c..cc4a95971fe0 100644 --- a/testing/webdriver/src/command.rs +++ b/testing/webdriver/src/command.rs @@ -23,6 +23,7 @@ pub enum WebDriverCommand { GetPageSource, GetWindowHandle, GetWindowHandles, + NewWindow(NewWindowParameters), CloseWindow, GetWindowRect, SetWindowRect(WindowRectParameters), @@ -120,6 +121,7 @@ impl WebDriverMessage { Route::GetPageSource => WebDriverCommand::GetPageSource, Route::GetWindowHandle => WebDriverCommand::GetWindowHandle, Route::GetWindowHandles => WebDriverCommand::GetWindowHandles, + Route::NewWindow => WebDriverCommand::NewWindow(serde_json::from_str(raw_body)?), Route::CloseWindow => WebDriverCommand::CloseWindow, Route::GetTimeouts => WebDriverCommand::GetTimeouts, Route::SetTimeouts => WebDriverCommand::SetTimeouts(serde_json::from_str(raw_body)?), @@ -469,6 +471,12 @@ impl CapabilitiesMatching for NewSessionParameters { } } +#[derive(Debug, PartialEq, Serialize, Deserialize)] +pub struct NewWindowParameters { + #[serde(rename = "type")] + pub type_hint: Option, +} + #[derive(Debug, PartialEq, Serialize, Deserialize)] pub struct SendKeysParameters { pub text: String, @@ -939,6 +947,45 @@ mod tests { check_deserialize(&json, &data); } + #[test] + fn test_json_new_window_parameters_without_type() { + let json = r#"{}"#; + let data = NewWindowParameters { type_hint: None }; + + check_deserialize(&json, &data); + } + + #[test] + fn test_json_new_window_parameters_with_optional_null_type() { + let json = r#"{"type":null}"#; + let data = NewWindowParameters { type_hint: None }; + + check_deserialize(&json, &data); + } + + #[test] + fn test_json_new_window_parameters_with_supported_type() { + let json = r#"{"type":"tab"}"#; + let data = NewWindowParameters { type_hint: Some("tab".into()) }; + + check_deserialize(&json, &data); + } + + #[test] + fn test_json_new_window_parameters_with_unknown_type() { + let json = r#"{"type":"foo"}"#; + let data = NewWindowParameters { type_hint: Some("foo".into()) }; + + check_deserialize(&json, &data); + } + + #[test] + fn test_json_new_window_parameters_with_invalid_type() { + let json = r#"{"type":3}"#; + + assert!(serde_json::from_str::(&json).is_err()); + } + #[test] fn test_json_send_keys_parameters_with_value() { let json = r#"{"text":"foo"}"#; diff --git a/testing/webdriver/src/httpapi.rs b/testing/webdriver/src/httpapi.rs index cc362bd0b8c5..53688c30445c 100644 --- a/testing/webdriver/src/httpapi.rs +++ b/testing/webdriver/src/httpapi.rs @@ -25,6 +25,7 @@ fn standard_routes() -> Vec<(Method, &'static str, R "/session/{sessionId}/window/handles", Route::GetWindowHandles, ), + (Method::POST, "/session/{sessionId}/window/new", Route::NewWindow), (Method::DELETE, "/session/{sessionId}/window", Route::CloseWindow), ( Method::GET, @@ -229,6 +230,7 @@ pub enum Route { GetPageSource, GetWindowHandle, GetWindowHandles, + NewWindow, CloseWindow, GetWindowSize, // deprecated SetWindowSize, // deprecated diff --git a/testing/webdriver/src/response.rs b/testing/webdriver/src/response.rs index 0604a0010414..293058e387ee 100644 --- a/testing/webdriver/src/response.rs +++ b/testing/webdriver/src/response.rs @@ -5,6 +5,7 @@ use serde_json::Value; #[derive(Debug, PartialEq, Serialize)] #[serde(untagged, remote = "Self")] pub enum WebDriverResponse { + NewWindow(NewWindowResponse), CloseWindow(CloseWindowResponse), Cookie(CookieResponse), Cookies(CookiesResponse), @@ -32,6 +33,13 @@ impl Serialize for WebDriverResponse { } } +#[derive(Debug, PartialEq, Serialize)] +pub struct NewWindowResponse { + pub handle: String, + #[serde(rename = "type")] + pub typ: String, +} + #[derive(Debug, PartialEq, Serialize)] pub struct CloseWindowResponse(pub Vec); @@ -133,6 +141,17 @@ mod tests { use crate::test::check_serialize; use serde_json; + #[test] + fn test_json_new_window_response() { + let json = r#"{"value":{"handle":"42","type":"window"}}"#; + let data = WebDriverResponse::NewWindow(NewWindowResponse { + handle: "42".into(), + typ: "window".into(), + }); + + check_serialize(&json, &data); + } + #[test] fn test_json_close_window_response() { let json = r#"{"value":["1234"]}"#; diff --git a/third_party/dav1d/.gitlab-ci.yml b/third_party/dav1d/.gitlab-ci.yml index ca0fa634a006..8d4233cc5672 100644 --- a/third_party/dav1d/.gitlab-ci.yml +++ b/third_party/dav1d/.gitlab-ci.yml @@ -1,7 +1,18 @@ stages: + - style - build - test +style-check: + image: registry.videolan.org:5000/dav1d-debian-unstable:20181114201132 + stage: style + tags: + - debian + - amd64 + script: + - git grep -n -e $'\t' --or -e $'\r' -- . ':(exclude)*/compat/*' && exit 1 + - /bin/true + build-debian: image: registry.videolan.org:5000/dav1d-debian-unstable:20181114201132 stage: build @@ -24,6 +35,19 @@ build-debian-static: - ninja -C build - cd build && meson test -v +build-debian32: + image: registry.videolan.org:5000/dav1d-debian-unstable:20181218135732 + stage: build + tags: + - debian + - amd64 + script: + - meson build --buildtype release + --werror + --cross-file /opt/crossfiles/linux32.meson + - ninja -C build + - cd build && meson test -v + build-win32: image: registry.videolan.org:5000/dav1d-debian-unstable:20181114201132 stage: build diff --git a/third_party/dav1d/include/common/attributes.h b/third_party/dav1d/include/common/attributes.h index c6c28904af52..25e7b7eb359b 100644 --- a/third_party/dav1d/include/common/attributes.h +++ b/third_party/dav1d/include/common/attributes.h @@ -38,15 +38,16 @@ #define ATTR_ALIAS #endif -#if ARCH_X86 +#if ARCH_X86_64 +/* x86-64 needs 32-byte alignment for AVX2. */ #define ALIGN_32_VAL 32 #define ALIGN_16_VAL 16 -#elif ARCH_ARM || ARCH_AARCH64 -// ARM doesn't benefit from anything more than 16 byte alignment. +#elif ARCH_X86_32 || ARCH_ARM || ARCH_AARCH64 +/* ARM doesn't benefit from anything more than 16-byte alignment. */ #define ALIGN_32_VAL 16 #define ALIGN_16_VAL 16 #else -// No need for extra alignment on platforms without assembly. +/* No need for extra alignment on platforms without assembly. */ #define ALIGN_32_VAL 8 #define ALIGN_16_VAL 8 #endif diff --git a/third_party/dav1d/include/compat/msvc/stdatomic.h b/third_party/dav1d/include/compat/msvc/stdatomic.h index 28fe40cbff36..979ee2ba8234 100644 --- a/third_party/dav1d/include/compat/msvc/stdatomic.h +++ b/third_party/dav1d/include/compat/msvc/stdatomic.h @@ -23,10 +23,10 @@ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ - -#ifndef MSCVER_STDATOMIC_H_ -#define MSCVER_STDATOMIC_H_ - + +#ifndef MSCVER_STDATOMIC_H_ +#define MSCVER_STDATOMIC_H_ + #if !defined(__cplusplus) && defined(_MSC_VER) #pragma warning(push) @@ -37,34 +37,34 @@ # include_next #else /* ! stdatomic.h */ -#include - -#include "common/attributes.h" - -typedef volatile LONG __declspec(align(32)) atomic_int; -typedef volatile ULONG __declspec(align(32)) atomic_uint; - -typedef enum { - memory_order_relaxed, - memory_order_acquire -} msvc_atomic_memory_order; - -#define atomic_init(p_a, v) do { *(p_a) = (v); } while(0) -#define atomic_store(p_a, v) InterlockedExchange((LONG*)p_a, v) -#define atomic_load(p_a) InterlockedCompareExchange((LONG*)p_a, 0, 0) -#define atomic_load_explicit(p_a, mo) atomic_load(p_a) - -/* - * TODO use a special call to increment/decrement - * using InterlockedIncrement/InterlockedDecrement - */ -#define atomic_fetch_add(p_a, inc) InterlockedExchangeAdd(p_a, inc) -#define atomic_fetch_sub(p_a, dec) InterlockedExchangeAdd(p_a, -(dec)) - -#endif /* ! stdatomic.h */ - -#pragma warning(pop) - -#endif /* !defined(__cplusplus) && defined(_MSC_VER) */ - -#endif /* MSCVER_STDATOMIC_H_ */ +#include + +#include "common/attributes.h" + +typedef volatile LONG __declspec(align(32)) atomic_int; +typedef volatile ULONG __declspec(align(32)) atomic_uint; + +typedef enum { + memory_order_relaxed, + memory_order_acquire +} msvc_atomic_memory_order; + +#define atomic_init(p_a, v) do { *(p_a) = (v); } while(0) +#define atomic_store(p_a, v) InterlockedExchange((LONG*)p_a, v) +#define atomic_load(p_a) InterlockedCompareExchange((LONG*)p_a, 0, 0) +#define atomic_load_explicit(p_a, mo) atomic_load(p_a) + +/* + * TODO use a special call to increment/decrement + * using InterlockedIncrement/InterlockedDecrement + */ +#define atomic_fetch_add(p_a, inc) InterlockedExchangeAdd(p_a, inc) +#define atomic_fetch_sub(p_a, dec) InterlockedExchangeAdd(p_a, -(dec)) + +#endif /* ! stdatomic.h */ + +#pragma warning(pop) + +#endif /* !defined(__cplusplus) && defined(_MSC_VER) */ + +#endif /* MSCVER_STDATOMIC_H_ */ diff --git a/third_party/dav1d/include/dav1d/common.h b/third_party/dav1d/include/dav1d/common.h index 373493049f47..4fd6df3e5c55 100644 --- a/third_party/dav1d/include/dav1d/common.h +++ b/third_party/dav1d/include/dav1d/common.h @@ -43,6 +43,14 @@ #endif #endif +/** + * A reference-counted object wrapper for a user-configurable pointer. + */ +typedef struct Dav1dUserData { + const uint8_t *data; ///< data pointer + struct Dav1dRef *ref; ///< allocation origin +} Dav1dUserData; + /** * Input packet metadata which are copied from the input data used to * decode each image into the matching structure of the output image @@ -56,6 +64,7 @@ typedef struct Dav1dDataProps { int64_t duration; ///< container duration of input data, 0 if unknown (default) int64_t offset; ///< stream offset of input data, -1 if unknown (default) size_t size; ///< packet size, default Dav1dData.sz + struct Dav1dUserData user_data; ///< user-configurable data, default NULL members } Dav1dDataProps; #endif // __DAV1D_COMMON_H__ diff --git a/third_party/dav1d/include/dav1d/data.h b/third_party/dav1d/include/dav1d/data.h index a78dc4a3c0ff..63f21fd9a5fd 100644 --- a/third_party/dav1d/include/dav1d/data.h +++ b/third_party/dav1d/include/dav1d/data.h @@ -58,19 +58,50 @@ DAV1D_API uint8_t * dav1d_data_create(Dav1dData *data, size_t sz); * @param sz Size of the data. * @param free_callback Function to be called when we release our last * reference to this data. In this callback, $buf will be - * the $buf argument to this function, and $user_data - * will be the $user_data input argument to this function. - * @param user_data Opaque parameter passed to free_callback(). + * the $buf argument to this function, and $cookie will + * be the $cookie input argument to this function. + * @param cookie Opaque parameter passed to free_callback(). * * @return 0 on success. A negative errno value on error. */ DAV1D_API int dav1d_data_wrap(Dav1dData *data, const uint8_t *buf, size_t sz, - void (*free_callback)(const uint8_t *buf, void *user_data), - void *user_data); + void (*free_callback)(const uint8_t *buf, void *cookie), + void *cookie); + +/** + * Wrap a user-provided data pointer into a reference counted object. + * + * data->m.user_data field will initialized to wrap the provided $user_data + * pointer. + * + * $free_callback will be called on the same thread that released the last + * reference. If frame threading is used, make sure $free_callback is + * thread-safe. + * + * @param data Input context. + * @param user_data The user data to be wrapped. + * @param free_callback Function to be called when we release our last + * reference to this data. In this callback, $user_data + * will be the $user_data argument to this function, and + * $cookie will be the $cookie input argument to this + * function. + * @param cookie Opaque parameter passed to $free_callback. + * + * @return 0 on success. A negative errno value on error. + */ +DAV1D_API int dav1d_data_wrap_user_data(Dav1dData *data, + const uint8_t *user_data, + void (*free_callback)(const uint8_t *user_data, + void *cookie), + void *cookie); /** * Free the data reference. * + * The reference count for data->m.user_data will be decremented (if it has been + * initialized with dav1d_data_wrap_user_data). The $data object will be memset + * to 0. + * * @param data Input context. */ DAV1D_API void dav1d_data_unref(Dav1dData *data); diff --git a/third_party/dav1d/meson.build b/third_party/dav1d/meson.build index 03a279f59219..7512023b891f 100644 --- a/third_party/dav1d/meson.build +++ b/third_party/dav1d/meson.build @@ -147,6 +147,9 @@ if (host_machine.cpu_family() == 'aarch64' or if cc.has_function('getauxval', prefix : '#include ', args : test_args) cdata.set('HAVE_GETAUXVAL', 1) endif + if cc.has_function('elf_aux_info', prefix : '#include ', args : test_args) + cdata.set('HAVE_ELF_AUX_INFO', 1) + endif endif # Compiler flag tests @@ -167,6 +170,12 @@ optional_arguments = [ '-Wno-unused-parameter', '-Werror=missing-prototypes', ] +if cc.get_id() == 'msvc' + optional_arguments += [ + '-wd4028', # parameter different from declaration + '-wd4996' # use of POSIX functions + ] +endif if (get_option('buildtype') != 'debug' and get_option('buildtype') != 'plain') optional_arguments += '-fomit-frame-pointer' @@ -191,30 +200,35 @@ stackalign_flag = [] stackrealign_flag = [] if host_machine.cpu_family().startswith('x86') - if cc.has_argument('-mpreferred-stack-boundary=5') - stackalign_flag = ['-mpreferred-stack-boundary=5'] - stackrealign_flag = ['-mincoming-stack-boundary=4'] - cdata_asm.set('STACK_ALIGNMENT', 32) - cdata.set('STACK_ALIGNMENT', 32) - elif cc.has_argument('-mpreferred-stack-boundary=4') - stackalign_flag = ['-mpreferred-stack-boundary=4'] - stackrealign_flag = ['-mincoming-stack-boundary=4'] - cdata_asm.set('STACK_ALIGNMENT', 16) - cdata.set('STACK_ALIGNMENT', 16) - elif cc.has_argument('-mstack-alignment=32') - stackalign_flag = ['-mstack-alignment=32'] - stackrealign_flag = ['-mstackrealign'] - cdata_asm.set('STACK_ALIGNMENT', 32) - cdata.set('STACK_ALIGNMENT', 32) - else - if host_machine.cpu_family() == 'x86_64' - cdata_asm.set('STACK_ALIGNMENT', 16) - cdata.set('STACK_ALIGNMENT', 16) + if host_machine.cpu_family() == 'x86_64' + if cc.has_argument('-mpreferred-stack-boundary=5') + stackalign_flag = ['-mpreferred-stack-boundary=5'] + stackrealign_flag = ['-mincoming-stack-boundary=4'] + stack_alignment = 32 + elif cc.has_argument('-mstack-alignment=32') + stackalign_flag = ['-mstack-alignment=32'] + stackrealign_flag = ['-mstackrealign'] + stack_alignment = 32 else - cdata_asm.set('STACK_ALIGNMENT', 4) - cdata.set('STACK_ALIGNMENT', 4) + stack_alignment = 16 + endif + else + if host_machine.system() == 'linux' or host_machine.system() == 'darwin' + stack_alignment = 16 + elif cc.has_argument('-mpreferred-stack-boundary=4') + stackalign_flag = ['-mpreferred-stack-boundary=4'] + stackrealign_flag = ['-mincoming-stack-boundary=2'] + stack_alignment = 16 + elif cc.has_argument('-mstack-alignment=16') + stackalign_flag = ['-mstack-alignment=16'] + stackrealign_flag = ['-mstackrealign'] + stack_alignment = 16 + else + stack_alignment = 4 endif endif + cdata_asm.set('STACK_ALIGNMENT', stack_alignment) + cdata.set('STACK_ALIGNMENT', stack_alignment) endif cdata.set10('ARCH_AARCH64', host_machine.cpu_family() == 'aarch64') @@ -256,13 +270,12 @@ if host_machine.cpu_family().startswith('x86') cdata.set10('ARCH_X86_64', true) cdata_asm.set10('ARCH_X86_32', false) cdata.set10('ARCH_X86_32', false) - - cdata_asm.set10('PIC', true) else cdata_asm.set10('ARCH_X86_64', false) cdata.set10('ARCH_X86_64', false) cdata_asm.set10('ARCH_X86_32', true) cdata.set10('ARCH_X86_32', true) + cdata_asm.set10('PIC', true) endif else cdata.set10('ARCH_X86', false) diff --git a/third_party/dav1d/src/arm/cpu.c b/third_party/dav1d/src/arm/cpu.c index 2f2d633e172d..a78e347f0dd2 100644 --- a/third_party/dav1d/src/arm/cpu.c +++ b/third_party/dav1d/src/arm/cpu.c @@ -37,6 +37,11 @@ #endif #define NEON_HWCAP HWCAP_ARM_NEON +#elif defined(HAVE_ELF_AUX_INFO) && ARCH_ARM +#include + +#define NEON_HWCAP HWCAP_NEON + #elif defined(__ANDROID__) #include #include @@ -72,9 +77,15 @@ unsigned dav1d_get_cpu_flags_arm(void) { unsigned flags = 0; #if ARCH_AARCH64 flags |= DAV1D_ARM_CPU_FLAG_NEON; +#elif defined(__ARM_NEON) + flags |= DAV1D_ARM_CPU_FLAG_NEON; #elif defined(HAVE_GETAUXVAL) && ARCH_ARM unsigned long hw_cap = getauxval(AT_HWCAP); flags |= (hw_cap & NEON_HWCAP) ? DAV1D_ARM_CPU_FLAG_NEON : 0; +#elif defined(HAVE_ELF_AUX_INFO) && ARCH_ARM + unsigned long hw_cap = 0; + elf_aux_info(AT_HWCAP, &hw_cap, sizeof(hw_cap)); + flags |= (hw_cap & NEON_HWCAP) ? DAV1D_ARM_CPU_FLAG_NEON : 0; #elif defined(__ANDROID__) flags |= parse_proc_cpuinfo("neon") ? DAV1D_ARM_CPU_FLAG_NEON : 0; #elif defined(__APPLE__) diff --git a/third_party/dav1d/src/data.c b/third_party/dav1d/src/data.c index 2bc293e3ceb4..726f7de0d99b 100644 --- a/third_party/dav1d/src/data.c +++ b/third_party/dav1d/src/data.c @@ -27,6 +27,7 @@ #include "config.h" +#include #include #include #include @@ -39,7 +40,7 @@ #include "src/data.h" #include "src/ref.h" -uint8_t * dav1d_data_create(Dav1dData *const buf, const size_t sz) { +uint8_t *dav1d_data_create_internal(Dav1dData *const buf, const size_t sz) { validate_input_or_ret(buf != NULL, NULL); buf->ref = dav1d_ref_create(sz); @@ -49,29 +50,65 @@ uint8_t * dav1d_data_create(Dav1dData *const buf, const size_t sz) { buf->m.timestamp = INT64_MIN; buf->m.duration = 0; buf->m.offset = -1; + buf->m.user_data.data = NULL; + buf->m.user_data.ref = NULL; return buf->ref->data; } -int dav1d_data_wrap(Dav1dData *const buf, const uint8_t *const ptr, const size_t sz, - void (*free_callback)(const uint8_t *data, void *user_data), - void *user_data) +int dav1d_data_wrap_internal(Dav1dData *const buf, const uint8_t *const ptr, + const size_t sz, + void (*const free_callback)(const uint8_t *data, + void *cookie), + void *const cookie) { validate_input_or_ret(buf != NULL, -EINVAL); validate_input_or_ret(ptr != NULL, -EINVAL); validate_input_or_ret(free_callback != NULL, -EINVAL); - buf->ref = dav1d_ref_wrap(ptr, free_callback, user_data); + buf->ref = dav1d_ref_wrap(ptr, free_callback, cookie); if (!buf->ref) return -ENOMEM; buf->data = ptr; buf->sz = buf->m.size = sz; buf->m.timestamp = INT64_MIN; buf->m.duration = 0; buf->m.offset = -1; + buf->m.user_data.data = NULL; + buf->m.user_data.ref = NULL; return 0; } +int dav1d_data_wrap_user_data_internal(Dav1dData *const buf, + const uint8_t *const user_data, + void (*const free_callback)(const uint8_t *user_data, + void *cookie), + void *const cookie) +{ + validate_input_or_ret(buf != NULL, -EINVAL); + validate_input_or_ret(free_callback != NULL, -EINVAL); + + buf->m.user_data.ref = dav1d_ref_wrap(user_data, free_callback, cookie); + if (!buf->m.user_data.ref) return -ENOMEM; + buf->m.user_data.data = user_data; + + return 0; +} + + +void dav1d_data_ref(Dav1dData *const dst, const Dav1dData *const src) { + validate_input(dst != NULL); + validate_input(dst->data == NULL); + validate_input(src != NULL); + + if (src->ref) { + validate_input(src->data != NULL); + dav1d_ref_inc(src->ref); + } + if (src->m.user_data.ref) dav1d_ref_inc(src->m.user_data.ref); + *dst = *src; +} + void dav1d_data_move_ref(Dav1dData *const dst, Dav1dData *const src) { validate_input(dst != NULL); validate_input(dst->data == NULL); @@ -84,12 +121,25 @@ void dav1d_data_move_ref(Dav1dData *const dst, Dav1dData *const src) { memset(src, 0, sizeof(*src)); } -void dav1d_data_unref(Dav1dData *const buf) { +void dav1d_data_props_copy(Dav1dDataProps *const dst, + const Dav1dDataProps *const src) +{ + assert(dst != NULL); + assert(src != NULL); + + dav1d_ref_dec(&dst->user_data.ref); + *dst = *src; + if (dst->user_data.ref) dav1d_ref_inc(dst->user_data.ref); +} + +void dav1d_data_unref_internal(Dav1dData *const buf) { validate_input(buf != NULL); + struct Dav1dRef *user_data_ref = buf->m.user_data.ref; if (buf->ref) { validate_input(buf->data != NULL); dav1d_ref_dec(&buf->ref); } memset(buf, 0, sizeof(*buf)); + dav1d_ref_dec(&user_data_ref); } diff --git a/third_party/dav1d/src/data.h b/third_party/dav1d/src/data.h index 40bd17daa49f..65f24d6f12d6 100644 --- a/third_party/dav1d/src/data.h +++ b/third_party/dav1d/src/data.h @@ -30,9 +30,29 @@ #include "dav1d/data.h" +void dav1d_data_ref(Dav1dData *dst, const Dav1dData *src); + /** * Move a data reference. */ void dav1d_data_move_ref(Dav1dData *dst, Dav1dData *src); +/** + * Copy the source properties to the destitionatin and increase the + * user_data's reference count (if it's not NULL). + */ +void dav1d_data_props_copy(Dav1dDataProps *dst, const Dav1dDataProps *src); + +uint8_t *dav1d_data_create_internal(Dav1dData *buf, size_t sz); +int dav1d_data_wrap_internal(Dav1dData *buf, const uint8_t *ptr, size_t sz, + void (*free_callback)(const uint8_t *data, + void *user_data), + void *user_data); +int dav1d_data_wrap_user_data_internal(Dav1dData *buf, + const uint8_t *user_data, + void (*free_callback)(const uint8_t *user_data, + void *cookie), + void *cookie); +void dav1d_data_unref_internal(Dav1dData *buf); + #endif /* __DAV1D_SRC_DATA_H__ */ diff --git a/third_party/dav1d/src/decode.c b/third_party/dav1d/src/decode.c index 5524c21d2b4d..3cafdc73da4e 100644 --- a/third_party/dav1d/src/decode.c +++ b/third_party/dav1d/src/decode.c @@ -1264,16 +1264,17 @@ static int decode_b(Dav1dTileContext *const t, } int src_left = t->bx * 4 + (b->mv[0].x >> 3); int src_top = t->by * 4 + (b->mv[0].y >> 3); - int src_right = src_left + w4 * 4; - int src_bottom = src_top + h4 * 4; + int src_right = src_left + bw4 * 4; + int src_bottom = src_top + bh4 * 4; + const int border_right = ((ts->tiling.col_end + (bw4 - 1)) & ~(bw4 - 1)) * 4; // check against left or right tile boundary and adjust if necessary if (src_left < border_left) { src_right += border_left - src_left; src_left += border_left - src_left; - } else if (src_right > ts->tiling.col_end * 4) { - src_left -= src_right - ts->tiling.col_end * 4; - src_right -= src_right - ts->tiling.col_end * 4; + } else if (src_right > border_right) { + src_left -= src_right - border_right; + src_right -= src_right - border_right; } // check against top tile boundary and adjust if necessary if (src_top < border_top) { @@ -1905,6 +1906,61 @@ static int decode_b(Dav1dTileContext *const t, return 0; } +#if defined(__has_feature) +#if __has_feature(memory_sanitizer) + +#include + +static int checked_decode_b(Dav1dTileContext *const t, + const enum BlockLevel bl, + const enum BlockSize bs, + const enum BlockPartition bp, + const enum EdgeFlags intra_edge_flags) +{ + const Dav1dFrameContext *const f = t->f; + const int err = decode_b(t, bl, bs, bp, intra_edge_flags); + + if (err == 0 && !(f->frame_thread.pass & 1)) { + const int ss_ver = f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420; + const int ss_hor = f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444; + const uint8_t *const b_dim = dav1d_block_dimensions[bs]; + const int bw4 = b_dim[0], bh4 = b_dim[1]; + const int w4 = imin(bw4, f->bw - t->bx), h4 = imin(bh4, f->bh - t->by); + const int has_chroma = f->seq_hdr->layout != DAV1D_PIXEL_LAYOUT_I400 && + (bw4 > ss_hor || t->bx & 1) && + (bh4 > ss_ver || t->by & 1); + + for (int p = 0; p < 1 + 2 * has_chroma; p++) { + const int ss_ver = p && f->cur.p.layout == DAV1D_PIXEL_LAYOUT_I420; + const int ss_hor = p && f->cur.p.layout != DAV1D_PIXEL_LAYOUT_I444; + const int stride = f->cur.stride[!!p]; + const int bx = t->bx & ~ss_hor; + const int by = t->by & ~ss_ver; + const int width = w4 << (2 - ss_hor + (bw4 == ss_hor)); + const int height = h4 << (2 - ss_ver + (bh4 == ss_ver)); + + const uint8_t *data = f->cur.data[p] + (by << (2 - ss_ver)) * stride + + (bx << (2 - ss_hor + !!f->seq_hdr->hbd)); + + for (int y = 0; y < height; data += stride, y++) { + const size_t line_sz = width << !!f->seq_hdr->hbd; + if (__msan_test_shadow(data, line_sz) != -1) { + fprintf(stderr, "B[%d](%d, %d) w4:%d, h4:%d, row:%d\n", + p, bx, by, w4, h4, y); + __msan_check_mem_is_initialized(data, line_sz); + } + } + } + } + + return err; +} + +#define decode_b checked_decode_b + +#endif /* defined(__has_feature) */ +#endif /* __has_feature(memory_sanitizer) */ + static int decode_sb(Dav1dTileContext *const t, const enum BlockLevel bl, const EdgeNode *const node) { @@ -2946,7 +3002,7 @@ error: dav1d_ref_dec(&f->ref_mvs_ref[i]); } - dav1d_picture_unref(&f->cur); + dav1d_picture_unref_internal(&f->cur); dav1d_thread_picture_unref(&f->sr_cur); dav1d_cdf_thread_unref(&f->in_cdf); if (f->frame_hdr->refresh_context) { @@ -2960,7 +3016,7 @@ error: dav1d_ref_dec(&f->frame_hdr_ref); for (int i = 0; i < f->n_tile_data; i++) - dav1d_data_unref(&f->tile[i].data); + dav1d_data_unref_internal(&f->tile[i].data); return retval; } @@ -3124,19 +3180,13 @@ int dav1d_submit_frame(Dav1dContext *const c) { // allocate frame res = dav1d_thread_picture_alloc(&f->sr_cur, f->frame_hdr->width[1], f->frame_hdr->height, - f->seq_hdr->layout, bpc, + f->seq_hdr, f->seq_hdr_ref, + f->frame_hdr, f->frame_hdr_ref, + bpc, &f->tile[0].data.m, c->n_fc > 1 ? &f->frame_thread.td : NULL, f->frame_hdr->show_frame, &c->allocator); if (res < 0) goto error; - f->sr_cur.p.m = f->tile[0].data.m; - f->sr_cur.p.frame_hdr = f->frame_hdr; - f->sr_cur.p.frame_hdr_ref = f->frame_hdr_ref; - dav1d_ref_inc(f->frame_hdr_ref); - f->sr_cur.p.seq_hdr = f->seq_hdr; - f->sr_cur.p.seq_hdr_ref = f->seq_hdr_ref; - dav1d_ref_inc(f->seq_hdr_ref); - if (f->frame_hdr->super_res.enabled) { res = dav1d_picture_alloc_copy(&f->cur, f->frame_hdr->width[0], &f->sr_cur.p); if (res < 0) goto error; @@ -3300,7 +3350,7 @@ int dav1d_submit_frame(Dav1dContext *const c) { if (c->n_fc == 1) { const unsigned refresh_frame_flags = f->frame_hdr->refresh_frame_flags; if ((res = dav1d_decode_frame(f)) < 0) { - dav1d_picture_unref(&c->out); + dav1d_picture_unref_internal(&c->out); for (int i = 0; i < 8; i++) { if (refresh_frame_flags & (1 << i)) { if (c->refs[i].p.p.data[0]) @@ -3328,17 +3378,17 @@ error: dav1d_ref_dec(&f->ref_mvs_ref[i]); } if (c->n_fc == 1) - dav1d_picture_unref(&c->out); + dav1d_picture_unref_internal(&c->out); else dav1d_thread_picture_unref(out_delayed); - dav1d_picture_unref(&f->cur); + dav1d_picture_unref_internal(&f->cur); dav1d_thread_picture_unref(&f->sr_cur); dav1d_ref_dec(&f->mvs_ref); dav1d_ref_dec(&f->seq_hdr_ref); dav1d_ref_dec(&f->frame_hdr_ref); for (int i = 0; i < f->n_tile_data; i++) - dav1d_data_unref(&f->tile[i].data); + dav1d_data_unref_internal(&f->tile[i].data); f->n_tile_data = 0; if (c->n_fc > 1) { diff --git a/third_party/dav1d/src/ext/x86/x86inc.asm b/third_party/dav1d/src/ext/x86/x86inc.asm index eab4e53080ac..b249f2a792dd 100644 --- a/third_party/dav1d/src/ext/x86/x86inc.asm +++ b/third_party/dav1d/src/ext/x86/x86inc.asm @@ -89,16 +89,13 @@ %endif %endmacro -%if WIN64 - %define PIC -%elif ARCH_X86_64 == 0 -; x86_32 doesn't require PIC. -; Some distros prefer shared objects to be PIC, but nothing breaks if -; the code contains a few textrels, so we'll skip that complexity. - %undef PIC -%endif -%ifdef PIC +%if ARCH_X86_64 + %define PIC 1 ; always use PIC on x86-64 default rel +%elifidn __OUTPUT_FORMAT__,win32 + %define PIC 0 ; PIC isn't used on 32-bit Windows +%elifndef PIC + %define PIC 0 %endif %ifdef __NASM_VER__ @@ -220,6 +217,18 @@ DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 %define gprsize 4 %endif +%macro LEA 2 +%if ARCH_X86_64 + lea %1, [%2] +%elif PIC + call $+5 ; special-cased to not affect the RSB on most CPU:s + pop %1 + add %1, (%2)-$+1 +%else + mov %1, %2 +%endif +%endmacro + %macro PUSH 1 push %1 %ifidn rstk, rsp @@ -673,7 +682,7 @@ DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, jna, jnae, jb, jbe, jnb, jnbe, jc, jnc, js, jns, jo, jno, jp, jnp -%macro TAIL_CALL 2 ; callee, is_nonadjacent +%macro TAIL_CALL 1-2 1 ; callee, is_nonadjacent %if has_epilogue call %1 RET diff --git a/third_party/dav1d/src/ipred_tmpl.c b/third_party/dav1d/src/ipred_tmpl.c index 53135fd46e46..dff3ec92bef4 100644 --- a/third_party/dav1d/src/ipred_tmpl.c +++ b/third_party/dav1d/src/ipred_tmpl.c @@ -422,7 +422,7 @@ static void ipred_z1_c(pixel *dst, const ptrdiff_t stride, const int enable_intra_edge_filter = angle >> 10; angle &= 511; assert(angle < 90); - int dx = dav1d_dr_intra_derivative[angle]; + int dx = dav1d_dr_intra_derivative[angle >> 1]; pixel top_out[(64 + 64) * 2]; const pixel *top; int max_base_x; @@ -476,8 +476,8 @@ static void ipred_z2_c(pixel *dst, const ptrdiff_t stride, const int enable_intra_edge_filter = angle >> 10; angle &= 511; assert(angle > 90 && angle < 180); - int dy = dav1d_dr_intra_derivative[angle - 90]; - int dx = dav1d_dr_intra_derivative[180 - angle]; + int dy = dav1d_dr_intra_derivative[(angle - 90) >> 1]; + int dx = dav1d_dr_intra_derivative[(180 - angle) >> 1]; const int upsample_left = enable_intra_edge_filter ? get_upsample(width + height, 180 - angle, is_sm) : 0; const int upsample_above = enable_intra_edge_filter ? @@ -557,7 +557,7 @@ static void ipred_z3_c(pixel *dst, const ptrdiff_t stride, const int enable_intra_edge_filter = angle >> 10; angle &= 511; assert(angle > 180); - int dy = dav1d_dr_intra_derivative[270 - angle]; + int dy = dav1d_dr_intra_derivative[(270 - angle) >> 1]; pixel left_out[(64 + 64) * 2]; const pixel *left; int max_base_y; diff --git a/third_party/dav1d/src/itx_tmpl.c b/third_party/dav1d/src/itx_tmpl.c index e8f76227c980..bec9b1b3e149 100644 --- a/third_party/dav1d/src/itx_tmpl.c +++ b/third_party/dav1d/src/itx_tmpl.c @@ -58,7 +58,6 @@ inv_txfm_add_c(pixel *dst, const ptrdiff_t stride, const int bitdepth = bitdepth_from_max(bitdepth_max); const int row_clip_max = (1 << (bitdepth + 8 - 1)) - 1; const int col_clip_max = (1 << (imax(bitdepth + 6, 16) - 1)) -1; - const int col_clip_min = -col_clip_max - 1; if (w != sw) memset(&in_mem[sw], 0, (w - sw) * sizeof(*in_mem)); const int rnd1 = (1 << shift1) >> 1; @@ -74,8 +73,12 @@ inv_txfm_add_c(pixel *dst, const ptrdiff_t stride, first_1d_fn(&coeff[i], sh, &tmp[i * w], 1, row_clip_max); } for (j = 0; j < w; j++) +#if BITDEPTH == 8 + tmp[i * w + j] = (tmp[i * w + j] + (rnd1)) >> shift1; +#else tmp[i * w + j] = iclip((tmp[i * w + j] + (rnd1)) >> shift1, - col_clip_min, col_clip_max); + -col_clip_max - 1, col_clip_max); +#endif } if (h != sh) memset(&tmp[sh * w], 0, w * (h - sh) * sizeof(*tmp)); diff --git a/third_party/dav1d/src/lib.c b/third_party/dav1d/src/lib.c index 5aeed3ee9973..01ff06d2638f 100644 --- a/third_party/dav1d/src/lib.c +++ b/third_party/dav1d/src/lib.c @@ -194,11 +194,11 @@ int dav1d_parse_sequence_header(Dav1dSequenceHeader *const out, dav1d_default_settings(&s); Dav1dContext *c; - res = dav1d_open(&c, &s); + res = dav1d_open(&c, &s); if (res < 0) return res; if (ptr) { - res = dav1d_data_wrap(&buf, ptr, sz, dummy_free, NULL); + res = dav1d_data_wrap_internal(&buf, ptr, sz, dummy_free, NULL); if (res < 0) goto error; } @@ -220,7 +220,7 @@ int dav1d_parse_sequence_header(Dav1dSequenceHeader *const out, res = 0; error: - dav1d_data_unref(&buf); + dav1d_data_unref_internal(&buf); dav1d_close(&c); return res; @@ -257,8 +257,8 @@ static int output_image(Dav1dContext *const c, Dav1dPicture *const out, // Apply film grain to a new copy of the image to avoid corrupting refs int res = dav1d_picture_alloc_copy(out, in->p.w, in); if (res < 0) { - dav1d_picture_unref(in); - dav1d_picture_unref(out); + dav1d_picture_unref_internal(in); + dav1d_picture_unref_internal(out); return res; } @@ -278,7 +278,7 @@ static int output_image(Dav1dContext *const c, Dav1dPicture *const out, assert(0); } - dav1d_picture_unref(in); + dav1d_picture_unref_internal(in); return 0; } @@ -290,7 +290,7 @@ static int output_picture_ready(Dav1dContext *const c) { if (c->operating_point_idc && !c->all_layers) { const int max_spatial_id = ulog2(c->operating_point_idc >> 8); if (max_spatial_id > c->out.frame_hdr->spatial_id) { - dav1d_picture_unref(&c->out); + dav1d_picture_unref_internal(&c->out); return 0; } } @@ -346,12 +346,12 @@ int dav1d_get_picture(Dav1dContext *const c, Dav1dPicture *const out) while (in->sz > 0) { res = dav1d_parse_obus(c, in, 0); if (res < 0) { - dav1d_data_unref(in); + dav1d_data_unref_internal(in); } else { assert((size_t)res <= in->sz); in->sz -= res; in->data += res; - if (!in->sz) dav1d_data_unref(in); + if (!in->sz) dav1d_data_unref_internal(in); } if (output_picture_ready(c)) break; @@ -369,7 +369,7 @@ int dav1d_get_picture(Dav1dContext *const c, Dav1dPicture *const out) } void dav1d_flush(Dav1dContext *const c) { - dav1d_data_unref(&c->in); + dav1d_data_unref_internal(&c->in); c->drain = 0; if (c->n_fc == 1) return; @@ -482,7 +482,7 @@ void dav1d_close(Dav1dContext **const c_out) { dav1d_free_aligned(f->lf.lr_lpf_line); } dav1d_free_aligned(c->fc); - dav1d_data_unref(&c->in); + dav1d_data_unref_internal(&c->in); if (c->n_fc > 1) { for (unsigned n = 0; n < c->n_fc; n++) if (c->frame_thread.out_delayed[n].p.data[0]) @@ -490,7 +490,7 @@ void dav1d_close(Dav1dContext **const c_out) { free(c->frame_thread.out_delayed); } for (int n = 0; n < c->n_tile_data; n++) - dav1d_data_unref(&c->tile[n].data); + dav1d_data_unref_internal(&c->tile[n].data); for (int n = 0; n < 8; n++) { dav1d_cdf_thread_unref(&c->cdf[n]); if (c->refs[n].p.p.data[0]) @@ -503,3 +503,36 @@ void dav1d_close(Dav1dContext **const c_out) { dav1d_freep_aligned(c_out); } + +void dav1d_picture_unref(Dav1dPicture *const p) { + dav1d_picture_unref_internal(p); +} + +uint8_t *dav1d_data_create(Dav1dData *const buf, const size_t sz) { + return dav1d_data_create_internal(buf, sz); +} + +int dav1d_data_wrap(Dav1dData *const buf, const uint8_t *const ptr, + const size_t sz, + void (*const free_callback)(const uint8_t *data, + void *user_data), + void *const user_data) +{ + return dav1d_data_wrap_internal(buf, ptr, sz, free_callback, user_data); +} + +int dav1d_data_wrap_user_data(Dav1dData *const buf, + const uint8_t *const user_data, + void (*const free_callback)(const uint8_t *user_data, + void *cookie), + void *const cookie) +{ + return dav1d_data_wrap_user_data_internal(buf, + user_data, + free_callback, + cookie); +} + +void dav1d_data_unref(Dav1dData *const buf) { + dav1d_data_unref_internal(buf); +} diff --git a/third_party/dav1d/src/lr_apply_tmpl.c b/third_party/dav1d/src/lr_apply_tmpl.c index dc3fbb5ef457..18141b05c7e4 100644 --- a/third_party/dav1d/src/lr_apply_tmpl.c +++ b/third_party/dav1d/src/lr_apply_tmpl.c @@ -47,7 +47,8 @@ static void backup_lpf(const Dav1dFrameContext *const f, pixel *dst, const ptrdiff_t dst_stride, const pixel *src, const ptrdiff_t src_stride, const int ss_ver, const int sb128, - int row, const int row_h, const int src_w, const int ss_hor) + int row, const int row_h, const int src_w, + const int h, const int ss_hor) { const int dst_w = f->frame_hdr->super_res.enabled ? (f->frame_hdr->width[1] + ss_hor) >> ss_hor : src_w; @@ -74,18 +75,25 @@ static void backup_lpf(const Dav1dFrameContext *const f, if (f->frame_hdr->super_res.enabled) { while (row + stripe_h <= row_h) { + const int n_lines = 4 - (row + stripe_h + 1 == h); f->dsp->mc.resize(dst, dst_stride, src, src_stride, - dst_w, src_w, 4, f->resize_step[ss_hor], + dst_w, src_w, n_lines, f->resize_step[ss_hor], f->resize_start[ss_hor] HIGHBD_CALL_SUFFIX); row += stripe_h; // unmodified stripe_h for the 1st stripe stripe_h = 64 >> ss_ver; src += stripe_h * PXSTRIDE(src_stride); - dst += 4 * PXSTRIDE(dst_stride); + dst += n_lines * PXSTRIDE(dst_stride); + if (n_lines == 3) { + pixel_copy(dst, &dst[-PXSTRIDE(dst_stride)], dst_w); + dst += PXSTRIDE(dst_stride); + } } } else { while (row + stripe_h <= row_h) { + const int n_lines = 4 - (row + stripe_h + 1 == h); for (int i = 0; i < 4; i++) { - pixel_copy(dst, src, src_w); + pixel_copy(dst, i == n_lines ? &dst[-PXSTRIDE(dst_stride)] : + src, src_w); dst += PXSTRIDE(dst_stride); src += PXSTRIDE(src_stride); } @@ -110,20 +118,20 @@ void bytefn(dav1d_lr_copy_lpf)(Dav1dFrameContext *const f, ((f->frame_hdr->restoration.type[2] != DAV1D_RESTORATION_NONE) << 2); if (restore_planes & LR_RESTORE_Y) { - const int h = f->bh << 2; + const int h = f->cur.p.h; const int w = f->bw << 2; - const int row_h = imin((sby + 1) << (6 + f->seq_hdr->sb128), h - 4); + const int row_h = imin((sby + 1) << (6 + f->seq_hdr->sb128), h - 1); const int y_stripe = (sby << (6 + f->seq_hdr->sb128)) - offset; backup_lpf(f, f->lf.lr_lpf_line_ptr[0], lr_stride, src[0] - offset * PXSTRIDE(src_stride[0]), src_stride[0], - 0, f->seq_hdr->sb128, y_stripe, row_h, w, 0); + 0, f->seq_hdr->sb128, y_stripe, row_h, w, h, 0); } if (restore_planes & (LR_RESTORE_U | LR_RESTORE_V)) { const int ss_ver = f->sr_cur.p.p.layout == DAV1D_PIXEL_LAYOUT_I420; const int ss_hor = f->sr_cur.p.p.layout != DAV1D_PIXEL_LAYOUT_I444; - const int h = f->bh << (2 - ss_ver); + const int h = (f->cur.p.h + ss_ver) >> ss_ver; const int w = f->bw << (2 - ss_hor); - const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128), h - 4); + const int row_h = imin((sby + 1) << ((6 - ss_ver) + f->seq_hdr->sb128), h - 1); const ptrdiff_t offset_uv = offset >> ss_ver; const int y_stripe = (sby << ((6 - ss_ver) + f->seq_hdr->sb128)) - offset_uv; @@ -131,12 +139,12 @@ void bytefn(dav1d_lr_copy_lpf)(Dav1dFrameContext *const f, if (restore_planes & LR_RESTORE_U) { backup_lpf(f, f->lf.lr_lpf_line_ptr[1], lr_stride, src[1] - offset_uv * PXSTRIDE(src_stride[1]), src_stride[1], - ss_ver, f->seq_hdr->sb128, y_stripe, row_h, w, ss_hor); + ss_ver, f->seq_hdr->sb128, y_stripe, row_h, w, h, ss_hor); } if (restore_planes & LR_RESTORE_V) { backup_lpf(f, f->lf.lr_lpf_line_ptr[2], lr_stride, src[2] - offset_uv * PXSTRIDE(src_stride[1]), src_stride[1], - ss_ver, f->seq_hdr->sb128, y_stripe, row_h, w, ss_hor); + ss_ver, f->seq_hdr->sb128, y_stripe, row_h, w, h, ss_hor); } } } diff --git a/third_party/dav1d/src/obu.c b/third_party/dav1d/src/obu.c index ec479a9da5c4..66d2e4598c66 100644 --- a/third_party/dav1d/src/obu.c +++ b/third_party/dav1d/src/obu.c @@ -221,7 +221,7 @@ static int parse_seq_hdr(Dav1dContext *const c, GetBits *const gb, if (hdr->monochrome) { hdr->color_range = dav1d_get_bits(gb, 1); hdr->layout = DAV1D_PIXEL_LAYOUT_I400; - hdr->ss_hor = hdr->ss_ver = 0; + hdr->ss_hor = hdr->ss_ver = 1; hdr->chr = DAV1D_CHR_UNKNOWN; hdr->separate_uv_delta_q = 0; } else if (hdr->pri == DAV1D_COLOR_PRI_BT709 && @@ -229,7 +229,7 @@ static int parse_seq_hdr(Dav1dContext *const c, GetBits *const gb, hdr->mtrx == DAV1D_MC_IDENTITY) { hdr->layout = DAV1D_PIXEL_LAYOUT_I444; - hdr->ss_hor = hdr->ss_ver = 1; + hdr->ss_hor = hdr->ss_ver = 0; hdr->color_range = 1; if (hdr->profile != 1 && !(hdr->profile == 2 && hdr->hbd == 2)) goto error; @@ -258,8 +258,8 @@ static int parse_seq_hdr(Dav1dContext *const c, GetBits *const gb, } hdr->chr = hdr->ss_hor == 1 && hdr->ss_ver == 1 ? dav1d_get_bits(gb, 2) : DAV1D_CHR_UNKNOWN; - hdr->separate_uv_delta_q = dav1d_get_bits(gb, 1); } + hdr->separate_uv_delta_q = !hdr->monochrome && dav1d_get_bits(gb, 1); #if DEBUG_SEQ_HDR printf("SEQHDR: post-colorinfo: off=%ld\n", dav1d_get_bits_pos(gb) - init_bit_pos); @@ -1283,7 +1283,7 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in, int global) { return res; } for (int n = 0; n < c->n_tile_data; n++) - dav1d_data_unref(&c->tile[n].data); + dav1d_data_unref_internal(&c->tile[n].data); c->n_tile_data = 0; c->n_tiles = 0; if (type != OBU_FRAME) { @@ -1323,17 +1323,15 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in, int global) { const unsigned bit_pos = dav1d_get_bits_pos(&gb); assert((bit_pos & 7) == 0); assert(pkt_bytelen >= (bit_pos >> 3)); - dav1d_ref_inc(in->ref); - c->tile[c->n_tile_data].data.ref = in->ref; - c->tile[c->n_tile_data].data.m = in->m; - c->tile[c->n_tile_data].data.data = in->data + (bit_pos >> 3); + dav1d_data_ref(&c->tile[c->n_tile_data].data, in); + c->tile[c->n_tile_data].data.data += bit_pos >> 3; c->tile[c->n_tile_data].data.sz = pkt_bytelen - (bit_pos >> 3); // ensure tile groups are in order and sane, see 6.10.1 if (c->tile[c->n_tile_data].start > c->tile[c->n_tile_data].end || c->tile[c->n_tile_data].start != c->n_tiles) { for (int i = 0; i <= c->n_tile_data; i++) - dav1d_data_unref(&c->tile[i].data); + dav1d_data_unref_internal(&c->tile[i].data); c->n_tile_data = 0; c->n_tiles = 0; goto error; @@ -1359,7 +1357,7 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in, int global) { if (c->n_fc == 1) { dav1d_picture_ref(&c->out, &c->refs[c->frame_hdr->existing_frame_idx].p.p); - c->out.m = in->m; + dav1d_data_props_copy(&c->out.m, &in->m); } else { // need to append this to the frame output queue const unsigned next = c->frame_thread.next++; @@ -1383,7 +1381,7 @@ int dav1d_parse_obus(Dav1dContext *const c, Dav1dData *const in, int global) { dav1d_thread_picture_ref(out_delayed, &c->refs[c->frame_hdr->existing_frame_idx].p); out_delayed->visible = 1; - out_delayed->p.m = in->m; + dav1d_data_props_copy(&out_delayed->p.m, &in->m); pthread_mutex_unlock(&f->frame_thread.td.lock); } if (c->refs[c->frame_hdr->existing_frame_idx].p.p.frame_hdr->frame_type == DAV1D_FRAME_TYPE_KEY) { diff --git a/third_party/dav1d/src/picture.c b/third_party/dav1d/src/picture.c index ba51d82a39c7..b6eb199b8d04 100644 --- a/third_party/dav1d/src/picture.c +++ b/third_party/dav1d/src/picture.c @@ -99,8 +99,9 @@ static void free_buffer(const uint8_t *const data, void *const user_data) { static int picture_alloc_with_edges(Dav1dPicture *const p, const int w, const int h, - const enum Dav1dPixelLayout layout, - const int bpc, + Dav1dSequenceHeader *seq_hdr, Dav1dRef *seq_hdr_ref, + Dav1dFrameHeader *frame_hdr, Dav1dRef *frame_hdr_ref, + const int bpc, const Dav1dDataProps *props, Dav1dPicAllocator *const p_allocator, const size_t extra, void **const extra_ptr) { @@ -120,7 +121,11 @@ static int picture_alloc_with_edges(Dav1dPicture *const p, p->m.timestamp = INT64_MIN; p->m.duration = 0; p->m.offset = -1; - p->p.layout = layout; + p->m.user_data.data = NULL; + p->m.user_data.ref = NULL; + p->seq_hdr = seq_hdr; + p->frame_hdr = frame_hdr; + p->p.layout = seq_hdr->layout; p->p.bpc = bpc; int res = p_allocator->alloc_picture_callback(p, p_allocator->cookie); if (res < 0) { @@ -138,6 +143,14 @@ static int picture_alloc_with_edges(Dav1dPicture *const p, return -ENOMEM; } + p->seq_hdr_ref = seq_hdr_ref; + if (seq_hdr_ref) dav1d_ref_inc(seq_hdr_ref); + + p->frame_hdr_ref = frame_hdr_ref; + if (frame_hdr_ref) dav1d_ref_inc(frame_hdr_ref); + + dav1d_data_props_copy(&p->m, props); + if (extra && extra_ptr) *extra_ptr = &pic_ctx->extra_ptr; @@ -146,14 +159,19 @@ static int picture_alloc_with_edges(Dav1dPicture *const p, int dav1d_thread_picture_alloc(Dav1dThreadPicture *const p, const int w, const int h, - const enum Dav1dPixelLayout layout, const int bpc, + Dav1dSequenceHeader *seq_hdr, Dav1dRef *seq_hdr_ref, + Dav1dFrameHeader *frame_hdr, Dav1dRef *frame_hdr_ref, + const int bpc, const Dav1dDataProps *props, struct thread_data *const t, const int visible, Dav1dPicAllocator *const p_allocator) { p->t = t; const int res = - picture_alloc_with_edges(&p->p, w, h, layout, bpc, p_allocator, + picture_alloc_with_edges(&p->p, w, h, + seq_hdr, seq_hdr_ref, + frame_hdr, frame_hdr_ref, + bpc, props, p_allocator, t != NULL ? sizeof(atomic_int) * 2 : 0, (void **) &p->progress); if (res) return res; @@ -170,22 +188,11 @@ int dav1d_picture_alloc_copy(Dav1dPicture *const dst, const int w, const Dav1dPicture *const src) { struct pic_ctx_context *const pic_ctx = src->ref->user_data; - const int res = picture_alloc_with_edges(dst, w, src->p.h, src->p.layout, - src->p.bpc, &pic_ctx->allocator, + const int res = picture_alloc_with_edges(dst, w, src->p.h, + src->seq_hdr, src->seq_hdr_ref, + src->frame_hdr, src->frame_hdr_ref, + src->p.bpc, &src->m, &pic_ctx->allocator, 0, NULL); - - if (!res) { - dst->p = src->p; - dst->m = src->m; - dst->p.w = w; - dst->frame_hdr = src->frame_hdr; - dst->frame_hdr_ref = src->frame_hdr_ref; - if (dst->frame_hdr_ref) dav1d_ref_inc(dst->frame_hdr_ref); - dst->seq_hdr = src->seq_hdr; - dst->seq_hdr_ref = src->seq_hdr_ref; - if (dst->seq_hdr_ref) dav1d_ref_inc(dst->seq_hdr_ref); - } - return res; } @@ -199,6 +206,7 @@ void dav1d_picture_ref(Dav1dPicture *const dst, const Dav1dPicture *const src) { dav1d_ref_inc(src->ref); if (src->frame_hdr_ref) dav1d_ref_inc(src->frame_hdr_ref); if (src->seq_hdr_ref) dav1d_ref_inc(src->seq_hdr_ref); + if (src->m.user_data.ref) dav1d_ref_inc(src->m.user_data.ref); } *dst = *src; } @@ -224,7 +232,7 @@ void dav1d_thread_picture_ref(Dav1dThreadPicture *dst, dst->progress = src->progress; } -void dav1d_picture_unref(Dav1dPicture *const p) { +void dav1d_picture_unref_internal(Dav1dPicture *const p) { validate_input(p != NULL); if (p->ref) { @@ -232,12 +240,13 @@ void dav1d_picture_unref(Dav1dPicture *const p) { dav1d_ref_dec(&p->ref); dav1d_ref_dec(&p->seq_hdr_ref); dav1d_ref_dec(&p->frame_hdr_ref); + dav1d_ref_dec(&p->m.user_data.ref); } memset(p, 0, sizeof(*p)); } void dav1d_thread_picture_unref(Dav1dThreadPicture *const p) { - dav1d_picture_unref(&p->p); + dav1d_picture_unref_internal(&p->p); p->t = NULL; p->progress = NULL; diff --git a/third_party/dav1d/src/picture.h b/third_party/dav1d/src/picture.h index f31edb5fa119..447a81a5027a 100644 --- a/third_party/dav1d/src/picture.h +++ b/third_party/dav1d/src/picture.h @@ -34,6 +34,7 @@ #include "dav1d/picture.h" #include "src/thread_data.h" +#include "src/ref.h" enum PlaneType { PLANE_TYPE_Y, @@ -55,7 +56,9 @@ typedef struct Dav1dThreadPicture { * Allocate a picture with custom border size. */ int dav1d_thread_picture_alloc(Dav1dThreadPicture *p, int w, int h, - enum Dav1dPixelLayout layout, int bpc, + Dav1dSequenceHeader *seq_hdr, Dav1dRef *seq_hdr_ref, + Dav1dFrameHeader *frame_hdr, Dav1dRef *frame_hdr_ref, + int bpc, const Dav1dDataProps *props, struct thread_data *t, int visible, Dav1dPicAllocator *); @@ -109,5 +112,6 @@ void dav1d_thread_picture_signal(const Dav1dThreadPicture *p, int y, int default_picture_allocator(Dav1dPicture *, void *cookie); void default_picture_release(Dav1dPicture *, void *cookie); +void dav1d_picture_unref_internal(Dav1dPicture *p); #endif /* __DAV1D_SRC_PICTURE_H__ */ diff --git a/third_party/dav1d/src/recon_tmpl.c b/third_party/dav1d/src/recon_tmpl.c index 1c7da4bada43..5fb627dddb17 100644 --- a/third_party/dav1d/src/recon_tmpl.c +++ b/third_party/dav1d/src/recon_tmpl.c @@ -205,8 +205,9 @@ static int decode_coefs(Dav1dTileContext *const t, // residual and sign int dc_sign = 1; + const int lossless = f->frame_hdr->segmentation.lossless[b->seg_id]; const uint16_t *const dq_tbl = ts->dq[b->seg_id][plane]; - const uint8_t *const qm_tbl = f->qm[is_1d || *txtp == IDTX][tx][plane]; + const uint8_t *const qm_tbl = f->qm[lossless || is_1d || *txtp == IDTX][tx][plane]; const int dq_shift = imax(0, t_dim->ctx - 2); const int bitdepth = BITDEPTH == 8 ? 8 : f->cur.p.bpc; const int cf_min = -(1 << (7 + bitdepth)); diff --git a/third_party/dav1d/src/ref.c b/third_party/dav1d/src/ref.c index 2c331a144929..89b15804799d 100644 --- a/third_party/dav1d/src/ref.c +++ b/third_party/dav1d/src/ref.c @@ -45,7 +45,9 @@ Dav1dRef *dav1d_ref_create(const size_t size) { res = dav1d_ref_wrap(data, default_free_callback, data); if (!res) { - free(data); + dav1d_free_aligned(data); + } else { + res->data = data; } return res; @@ -58,8 +60,7 @@ Dav1dRef *dav1d_ref_wrap(const uint8_t *const ptr, Dav1dRef *res = malloc(sizeof(Dav1dRef)); if (!res) return NULL; - if (ptr == user_data) - res->data = user_data; + res->data = NULL; res->const_data = ptr; atomic_init(&res->ref_cnt, 1); res->free_callback = free_callback; @@ -86,5 +87,5 @@ void dav1d_ref_dec(Dav1dRef **const pref) { } int dav1d_ref_is_writable(Dav1dRef *const ref) { - return atomic_load(&ref->ref_cnt) == 1; + return atomic_load(&ref->ref_cnt) == 1 && ref->data; } diff --git a/third_party/dav1d/src/tables.c b/third_party/dav1d/src/tables.c index a327371808f5..3fe46d4f330c 100644 --- a/third_party/dav1d/src/tables.c +++ b/third_party/dav1d/src/tables.c @@ -775,37 +775,36 @@ const uint8_t dav1d_sm_weights[128] = { 7, 6, 6, 5, 5, 4, 4, 4 }; -const int16_t dav1d_dr_intra_derivative[90] = { - // More evenly spread out angles and limited to 10-bit +const uint16_t dav1d_dr_intra_derivative[44] = { // Values that are 0 will never be used - 0, 0, 0, // Approx angle - 1023, 0, 0, // 3, ... - 547, 0, 0, // 6, ... - 372, 0, 0, 0, 0, // 9, ... - 273, 0, 0, // 14, ... - 215, 0, 0, // 17, ... - 178, 0, 0, // 20, ... - 151, 0, 0, // 23, ... (113 & 203 are base angles) - 132, 0, 0, // 26, ... - 116, 0, 0, // 29, ... - 102, 0, 0, 0, // 32, ... - 90, 0, 0, // 36, ... - 80, 0, 0, // 39, ... - 71, 0, 0, // 42, ... - 64, 0, 0, // 45, ... (45 & 135 are base angles) - 57, 0, 0, // 48, ... - 51, 0, 0, // 51, ... - 45, 0, 0, 0, // 54, ... - 40, 0, 0, // 58, ... - 35, 0, 0, // 61, ... - 31, 0, 0, // 64, ... - 27, 0, 0, // 67, ... (67 & 157 are base angles) - 23, 0, 0, // 70, ... - 19, 0, 0, // 73, ... - 15, 0, 0, 0, 0, // 76, ... - 11, 0, 0, // 81, ... - 7, 0, 0, // 84, ... - 3, 0, 0, // 87, ... + 0, // Angles: + 1023, 0, // 3, 93, 183 + 547, // 6, 96, 186 + 372, 0, 0, // 9, 99, 189 + 273, // 14, 104, 194 + 215, 0, // 17, 107, 197 + 178, // 20, 110, 200 + 151, 0, // 23, 113, 203 (113 & 203 are base angles) + 132, // 26, 116, 206 + 116, 0, // 29, 119, 209 + 102, 0, // 32, 122, 212 + 90, // 36, 126, 216 + 80, 0, // 39, 129, 219 + 71, // 42, 132, 222 + 64, 0, // 45, 135, 225 (45 & 135 are base angles) + 57, // 48, 138, 228 + 51, 0, // 51, 141, 231 + 45, 0, // 54, 144, 234 + 40, // 58, 148, 238 + 35, 0, // 61, 151, 241 + 31, // 64, 154, 244 + 27, 0, // 67, 157, 247 (67 & 157 are base angles) + 23, // 70, 160, 250 + 19, 0, // 73, 163, 253 + 15, 0, // 76, 166, 256 + 11, 0, // 81, 171, 261 + 7, // 84, 174, 264 + 3 // 87, 177, 267 }; const int8_t ALIGN(dav1d_filter_intra_taps[5][64], 16) = { diff --git a/third_party/dav1d/src/tables.h b/third_party/dav1d/src/tables.h index 25f253ab9381..c2e6e3609084 100644 --- a/third_party/dav1d/src/tables.h +++ b/third_party/dav1d/src/tables.h @@ -114,7 +114,7 @@ extern const int8_t dav1d_mc_warp_filter[193][8]; extern const int16_t dav1d_resize_filter[64][8]; extern const uint8_t dav1d_sm_weights[128]; -extern const int16_t dav1d_dr_intra_derivative[90]; +extern const uint16_t dav1d_dr_intra_derivative[44]; extern const int8_t dav1d_filter_intra_taps[5][64]; extern const uint8_t dav1d_obmc_masks[64]; diff --git a/third_party/dav1d/src/x86/cpu.c b/third_party/dav1d/src/x86/cpu.c index ebee4a0e4287..95ec22ea85aa 100644 --- a/third_party/dav1d/src/x86/cpu.c +++ b/third_party/dav1d/src/x86/cpu.c @@ -25,6 +25,8 @@ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "config.h" + #include #include "src/x86/cpu.h" @@ -47,6 +49,8 @@ unsigned dav1d_get_cpu_flags_x86(void) { if (info[2] & (1 << 9)) flags |= DAV1D_X86_CPU_FLAG_SSSE3; if (info[2] & (1 << 19)) flags |= DAV1D_X86_CPU_FLAG_SSE41; if (info[2] & (1 << 20)) flags |= DAV1D_X86_CPU_FLAG_SSE42; +#if ARCH_X86_64 + /* We only support >128-bit SIMD on x86-64. */ if (info[2] & (1 << 27)) /* OSXSAVE */ { uint64_t xcr = dav1d_cpu_xgetbv(0); if ((xcr & 0x00000006) == 0x00000006) /* XMM/YMM */ { @@ -61,6 +65,7 @@ unsigned dav1d_get_cpu_flags_x86(void) { } } } +#endif } return flags; diff --git a/third_party/dav1d/src/x86/ipred.asm b/third_party/dav1d/src/x86/ipred.asm index d186018d0b9b..f4f26730ccef 100644 --- a/third_party/dav1d/src/x86/ipred.asm +++ b/third_party/dav1d/src/x86/ipred.asm @@ -28,7 +28,7 @@ %if ARCH_X86_64 -SECTION_RODATA 32 +SECTION_RODATA 64 %macro SMOOTH_WEIGHT_TABLE 1-* %rep %0 @@ -57,7 +57,6 @@ smooth_weights: SMOOTH_WEIGHT_TABLE \ 18, 16, 15, 13, 12, 10, 9, 8, \ 7, 6, 6, 5, 5, 4, 4, 4 -; Note that the order of (some of) the following z constants matter z_filter_wh: db 7, 7, 11, 11, 15, 15, 19, 19, 19, 23, 23, 23, 31, 31, 31, 39 db 39, 39, 47, 47, 47, 63, 63, 63, 79, 79, 79, -1 z_filter_k: db 0, 16, 0, 16, 0, 20, 0, 20, 8, 16, 8, 16 @@ -65,10 +64,18 @@ z_filter_k: db 0, 16, 0, 16, 0, 20, 0, 20, 8, 16, 8, 16 db 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 8, 0 z_filter_s: db 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7 db 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15 + db 15, 15, 15, 15, 15, 15, 15, 15 ; should be in one cache line +pb_12: times 4 db 12 ; those are just placed here for alignment. +pb_14: times 4 db 14 +z3_shuf: db 8, 7, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0 z_filter_t0: db 55,127, 39,127, 39,127, 7, 15, 31, 7, 15, 31, 0, 3, 31, 0 z_filter_t1: db 39, 63, 19, 47, 19, 47, 3, 3, 3, 3, 3, 3, 0, 0, 0, 0 -z_upsample: db 1, 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7 -z_shuf_w4: db 0, 1, 1, 2, 2, 3, 3, 4, 8, 9, 9, 10, 10, 11, 11, 12 +z_upsample1: db 1, 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7 +z_upsample2: db 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 8, 8, 8 +z_upsample3: db 0, 0, 0, 0, 1, 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5 +z1_shuf_w4: db 0, 1, 1, 2, 2, 3, 3, 4, 8, 9, 9, 10, 10, 11, 11, 12 +z3_shuf_w4: db 4, 3, 3, 2, 2, 1, 1, 0, 12, 11, 11, 10, 10, 9, 9, 8 +z_transpose4: db 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 z_base_inc: dw 0*64, 1*64, 2*64, 3*64, 4*64, 5*64, 6*64, 7*64 dw 16*64, 17*64, 18*64, 19*64, 20*64, 21*64, 22*64, 23*64 @@ -76,13 +83,14 @@ z_base_inc: dw 0*64, 1*64, 2*64, 3*64, 4*64, 5*64, 6*64, 7*64 filter_shuf1: db 10, 4, 10, 4, 37, 6, 5, 6,103, 9, 7, 9, 72, -1, 8, -1 db 16, 4, 0, 4, 53, 6, 5, 6,119, 11, 7, 11, 95, -1, 15, -1 filter_shuf2: db 3, 4, 3, 4, 5, 6, 5, 6, 7, 2, 7, 2, 1, -1, 1, -1 -filter_shuf3: db 3, 4, 3, 4, 5, 6, 5, 6, 7, 11, 7, 11, 15, -1, 15, -1 +filter_shuf3: db 3, 4, 3, 4, 5, 6, 5, 6, 7, 11, 7, 11; 15, -1, 15, -1 +pb_127_m127: times 2 db 127, -127 ipred_v_shuf: db 0, 1, 0, 1, 4, 5, 4, 5, 8, 9, 8, 9, 12, 13, 12, 13 db 2, 3, 2, 3, 6, 7, 6, 7, 10, 11, 10, 11, 14, 15, 14, 15 ipred_h_shuf: db 7, 7, 7, 7, 3, 3, 3, 3, 5, 5, 5, 5, 1, 1, 1, 1 - db 6, 6, 6, 6, 2, 2, 2, 2, 4, 4, 4, 4, 0, 0, 0, 0 + db 6, 6, 6, 6, 2, 2, 2, 2, 4, 4, 4, 4; 0, 0, 0, 0 +pw_64: times 2 dw 64 -pb_0to15: cfl_ac_w16_pad_shuffle: ; w=16, w_pad=1 db 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 ; w=8, w_pad=1 as well as second half of previous one @@ -94,26 +102,27 @@ cfl_ac_w8_pad1_shuffle: db 0, 1, 2, 3, 4, 5 ; w=16,w_pad=3 db 0, 1, 2, 3, 4, 5 times 13 db 6, 7 +pb_15to0: db 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 -pb_1: times 4 db 1 -pb_2: times 4 db 2 -pb_4: times 4 db 4 -pb_8: times 4 db 8 -pb_12: times 4 db 12 -pb_14: times 4 db 14 -pb_15 times 4 db 15 -pb_31: times 4 db 31 -pb_128: times 4 db 128 -pw_1: times 2 dw 1 -pw_8: times 2 dw 8 -pw_62: times 2 dw 62 -pw_64: times 2 dw 64 -pw_128: times 2 dw 128 -pw_255: times 2 dw 255 -pw_512: times 2 dw 512 +%define pb_0to15 cfl_ac_w16_pad_shuffle +%define pb_1 (ipred_h_shuf+12) +%define pb_2 (ipred_h_shuf+20) +%define pb_3 (ipred_h_shuf+ 4) +%define pb_4 (ipred_h_shuf+24) +%define pb_7 (ipred_h_shuf+ 0) +%define pb_8 (z_upsample2 +12) +%define pb_15 (z_filter_s +32) +%define pw_8 (z_filter_k +32) -pb_36_m4: times 2 db 36, -4 -pb_127_m127: times 2 db 127, -127 +pb_27: times 4 db 27 +pb_31: times 4 db 31 +pb_128: times 4 db 128 +pw_1: times 2 dw 1 +pw_62: times 2 dw 62 +pw_128: times 2 dw 128 +pw_255: times 2 dw 255 +pw_512: times 2 dw 512 +pb_36_m4: times 2 db 36, -4 %macro JMP_TABLE 3-* %xdefine %1_%2_table (%%table - 2*4) @@ -138,6 +147,7 @@ JMP_TABLE ipred_dc, avx2, h4, h8, h16, h32, h64, w4, w8, w16, w32, w64, \ JMP_TABLE ipred_dc_left, avx2, h4, h8, h16, h32, h64 JMP_TABLE ipred_h, avx2, w4, w8, w16, w32, w64 JMP_TABLE ipred_z1, avx2, w4, w8, w16, w32, w64 +JMP_TABLE ipred_z3, avx2, h4, h8, h16, h32, h64 JMP_TABLE ipred_cfl, avx2, h4, h8, h16, h32, w4, w8, w16, w32, \ s4-8*4, s8-8*4, s16-8*4, s32-8*4 JMP_TABLE ipred_cfl_left, avx2, h4, h8, h16, h32 @@ -1298,9 +1308,10 @@ cglobal ipred_z1, 3, 8, 0, dst, stride, tl, w, h, angle, dx, maxbase inc tlq movsxd wq, [r6+wq*4] add wq, r6 - movzx dxd, angleb + mov dxd, angled + and dxd, 0x7e add angled, 165 ; ~90 - movzx dxd, word [r7+dxq*2] + movzx dxd, word [r7+dxq] xor angled, 0x4ff ; d = 90 - angle vpbroadcastd m3, [pw_512] vpbroadcastd m4, [pw_62] @@ -1315,10 +1326,8 @@ cglobal ipred_z1, 3, 8, 0, dst, stride, tl, w, h, angle, dx, maxbase jg .w4_no_upsample ; !enable_intra_edge_filter || h > 8 || (h == 8 && is_sm) ALLOC_STACK -32, 8 mova xm1, [tlq-1] - pshufb xm0, xm1, [z_upsample] - vpbroadcastd xm2, [pb_8] - pminub xm2, [z_filter_s+6] - pshufb xm1, xm2 + pshufb xm0, xm1, [z_upsample1] + pshufb xm1, [z_upsample2] vpbroadcastd xm2, [pb_36_m4] ; upshifted by 2 to be able to reuse add dxd, dxd ; pw_512 (which is already in m3) pmaddubsw xm0, xm2 ; for rounding instead of pw_2048 @@ -1375,13 +1384,14 @@ ALIGN function_align .filter_strength: ; w4/w8/w16 ; The C version uses a lot of branches, but we can do all the comparisons ; in parallel and use popcnt to get the final filter strength value. +%define base r3-z_filter_t0 + lea r3, [z_filter_t0] movd xm0, maxbased movd xm2, angled - lea r3, [z_filter_t0] shr angled, 8 ; is_sm << 1 vpbroadcastb m0, xm0 vpbroadcastb m2, xm2 - pcmpeqb m1, m0, [r3-z_filter_t0+z_filter_wh] + pcmpeqb m1, m0, [base+z_filter_wh] pand m1, m2 mova xm2, [r3+angleq*8] ; upper ymm half zero in both cases pcmpgtb m1, m2 @@ -1398,14 +1408,13 @@ ALIGN function_align call .filter_strength mov maxbased, 7 jz .w4_main ; filter_strength == 0 - lea r3, [z_filter_k-4] - vpbroadcastd m7, [pb_8] + vpbroadcastd m7, [base+pb_8] vbroadcasti128 m2, [tlq-1] - pminub m1, m7, [r3-z_filter_k+z_filter_s+4] - vpbroadcastd m8, [r3+r5*4+12*0] - pminub m7, [r3-z_filter_k+z_filter_s+12] - vpbroadcastd m9, [r3+r5*4+12*1] - vpbroadcastd m10, [r3+r5*4+12*2] + pminub m1, m7, [base+z_filter_s] + vpbroadcastd m8, [base+z_filter_k-4+r5*4+12*0] + pminub m7, [base+z_filter_s+8] + vpbroadcastd m9, [base+z_filter_k-4+r5*4+12*1] + vpbroadcastd m10, [base+z_filter_k-4+r5*4+12*2] pshufb m0, m2, m1 shufps m1, m7, q2121 pmaddubsw m0, m8 @@ -1432,7 +1441,7 @@ ALIGN function_align mov r3d, dxd ; xpos movd xm9, maxbased vpbroadcastw m9, xm9 - vbroadcasti128 m8, [z_shuf_w4] + vbroadcasti128 m8, [z1_shuf_w4] psrlw m7, 8 ; top[max_base_x] paddw m10, m6, m6 psubw m9, m0 ; max_base_x @@ -1502,7 +1511,7 @@ ALIGN function_align movd xm6, hd vinserti128 m0, [tlq+7], 1 vpbroadcastb xm6, xm6 - vbroadcasti128 m1, [z_upsample] + vbroadcasti128 m1, [z_upsample1] pminub xm6, xm2 vpbroadcastd m7, [pb_36_m4] vinserti128 m2, xm6, 1 @@ -1561,9 +1570,8 @@ ALIGN function_align jg .w8_upsample_loop RET .w8_no_intra_edge_filter: - mov r3d, 15 - cmp hd, 8 - cmova maxbased, r3d + and maxbased, 7 + or maxbased, 8 ; imin(h+7, 15) jmp .w8_main .w8_no_upsample: %assign stack_offset org_stack_offset @@ -1572,27 +1580,22 @@ ALIGN function_align test angled, 0x400 jnz .w8_no_intra_edge_filter call .filter_strength - vpbroadcastd xm6, [pb_15] - pminub xm6, xm0 ; imin(h, 8) + 7 - movd maxbased, xm6 - movzx maxbased, maxbaseb jz .w8_main ; filter_strength == 0 - lea r3, [z_filter_k-4] movu xm2, [tlq] - pminub xm1, xm6, [r3-z_filter_k+z_filter_s+18] + pminub xm1, xm0, [base+z_filter_s+14] vinserti128 m2, [tlq-1], 1 - vinserti128 m1, [r3-z_filter_k+z_filter_s+ 4], 1 - vpbroadcastd m7, [r3+r5*4+12*0] - pminub xm6, [r3-z_filter_k+z_filter_s+26] - vinserti128 m6, [r3-z_filter_k+z_filter_s+12], 1 - pshufb m0, m2, m1 - pmaddubsw m0, m7 - vpbroadcastd m7, [r3+r5*4+12*1] + vinserti128 m1, [base+z_filter_s+ 0], 1 + vpbroadcastd m7, [base+z_filter_k-4+r5*4+12*0] + pminub xm0, [base+z_filter_s+22] + vinserti128 m0, [base+z_filter_s+ 8], 1 + pshufb m6, m2, m1 + pmaddubsw m6, m7 + vpbroadcastd m7, [base+z_filter_k-4+r5*4+12*1] movzx r3d, byte [tlq+15] - shufps m1, m6, q2121 + shufps m1, m0, q2121 pshufb m1, m2, m1 pmaddubsw m1, m7 - paddw m0, m1 + paddw m1, m6 sub r5d, 3 jnz .w8_3tap ; filter_strength == 3 uses a 5-tap filter instead of a 3-tap one, @@ -1600,24 +1603,24 @@ ALIGN function_align ; slightly different from out[max_base_x] when h > w. vpbroadcastd m7, [z_filter_k+4*8] movzx r2d, byte [tlq+14] - pshufb m2, m6 + pshufb m2, m0 pmaddubsw m2, m7 sub r2d, r3d lea r2d, [r2+r3*8+4] shr r2d, 3 ; (tlq[w*2-2] + tlq[w*2-1]*7 + 4) >> 3 mov [rsp+16], r2b - paddw m0, m2 + paddw m1, m2 .w8_3tap: - pmulhrsw m0, m3 + pmulhrsw m1, m3 sar r5d, 1 mov tlq, rsp add r5d, 17 ; w*2 + (filter_strength == 3) cmp hd, 8 cmova maxbased, r5d mov [tlq+r5], r3b - vextracti128 xm1, m0, 1 - packuswb xm1, xm0 - mova [tlq], xm1 + vextracti128 xm0, m1, 1 + packuswb xm0, xm1 + mova [tlq], xm0 .w8_main: movd xm2, dxd vbroadcasti128 m0, [z_base_inc] @@ -1668,9 +1671,8 @@ ALIGN function_align .w8_end: RET .w16_no_intra_edge_filter: - mov r3d, 31 - cmp hd, 16 - cmova maxbased, r3d + and maxbased, 15 + or maxbased, 16 ; imin(h+15, 31) jmp .w16_main ALIGN function_align .w16: @@ -1680,25 +1682,18 @@ ALIGN function_align test angled, 0x400 jnz .w16_no_intra_edge_filter call .filter_strength - vpbroadcastd m1, [pb_31] - pminub m0, m1 ; imin(h, 16) + 15 - movd maxbased, xm0 - movzx maxbased, maxbaseb jz .w16_main ; filter_strength == 0 - lea r3, [z_filter_k-4] - vpbroadcastd m1, [pb_12] - vpbroadcastd m11, [pb_15] - vbroadcasti128 m6, [r3-z_filter_k+z_filter_s+12] - vinserti128 m2, m6, [r3-z_filter_k+z_filter_s+4], 0 - vinserti128 m6, [r3-z_filter_k+z_filter_s+20], 1 + vpbroadcastd m1, [base+pb_12] + vbroadcasti128 m6, [base+z_filter_s+8] + vinserti128 m2, m6, [base+z_filter_s], 0 + vinserti128 m6, [base+z_filter_s+16], 1 mova xm10, [tlq-1] vinserti128 m10, [tlq+3], 1 - vpbroadcastd m9, [r3+r5*4+12*0] - vbroadcasti128 m7, [r3-z_filter_k+z_filter_s+18] - vinserti128 m8, m7, [r3-z_filter_k+z_filter_s+10], 0 - vinserti128 m7, [r3-z_filter_k+z_filter_s+26], 1 + vpbroadcastd m9, [base+z_filter_k-4+r5*4+12*0] + vbroadcasti128 m7, [base+z_filter_s+14] + vinserti128 m8, m7, [base+z_filter_s+6], 0 + vinserti128 m7, [base+z_filter_s+22], 1 psubw m0, m1 - pminub m0, m11 ; imin(h+3, 15) movu xm11, [tlq+12] vinserti128 m11, [tlq+16], 1 pminub m8, m0 @@ -1709,7 +1704,7 @@ ALIGN function_align pshufb m1, m11, m8 shufps m8, m7, q2121 pmaddubsw m1, m9 - vpbroadcastd m9, [r3+r5*4+12*1] + vpbroadcastd m9, [base+z_filter_k-4+r5*4+12*1] movzx r3d, byte [tlq+31] pshufb m2, m10, m2 pmaddubsw m2, m9 @@ -2131,6 +2126,1170 @@ ALIGN function_align .w64_end: RET +cglobal ipred_z3, 4, 9, 0, dst, stride, tl, w, h, angle, dy, org_w, maxbase + %assign org_stack_offset stack_offset + lea r6, [ipred_z3_avx2_table] + tzcnt hd, hm + movifnidn angled, anglem + lea r7, [dr_intra_derivative+45*2-1] + dec tlq + movsxd hq, [r6+hq*4] + sub angled, 180 + add hq, r6 + mov dyd, angled + neg dyd + xor angled, 0x400 + or dyq, ~0x7e + movzx dyd, word [r7+dyq] + vpbroadcastd m3, [pw_512] + vpbroadcastd m4, [pw_62] + vpbroadcastd m5, [pw_64] + mov org_wd, wd + jmp hq +.h4: + lea r7, [strideq*3] + cmp angleb, 40 + jae .h4_no_upsample + lea r4d, [angleq-1024] + sar r4d, 7 + add r4d, wd + jg .h4_no_upsample ; !enable_intra_edge_filter || w > 8 || (w == 8 && is_sm) + ALLOC_STACK -32, 9 + movu xm8, [tlq-7] + pshufb xm0, xm8, [z_upsample3] + vpbroadcastb xm2, xm8 + pshufb xm1, xm8, [z_filter_s+2] + mova [rsp+16], xm2 ; top[max_base_y] + vpbroadcastd xm2, [pb_36_m4] + add dyd, dyd + pmaddubsw xm0, xm2 + pmaddubsw xm1, xm2 + movd xm7, dyd + mov r2d, dyd + vpbroadcastw m7, xm7 + paddw xm1, xm0 + pmulhrsw xm1, xm3 + pslldq m6, m7, 8 + paddw xm2, xm7, xm7 + paddw m6, m7 + packuswb xm1, xm1 + paddw m6, m2 + punpcklbw xm1, xm8 + mova xm8, [z_transpose4] + psllw m7, 2 + pshufb xm1, [pb_15to0] + mova [rsp], xm1 +.h4_upsample_loop: + lea r4d, [r2+dyq] + shr r2d, 6 + vpbroadcastq m1, [rsp+r2] + lea r2d, [r4+dyq] + shr r4d, 6 + vpbroadcastq m2, [rsp+r4] + lea r4d, [r2+dyq] + shr r2d, 6 + movq xm0, [rsp+r2] + lea r2d, [r4+dyq] + shr r4d, 6 + movhps xm0, [rsp+r4] + vpblendd m1, m2, 0xc0 + pand m2, m4, m6 + vpblendd m0, m1, 0xf0 + psubw m1, m5, m2 + psllw m2, 8 + por m1, m2 + pmaddubsw m0, m1 + paddw m6, m7 + pmulhrsw m0, m3 + vextracti128 xm1, m0, 1 + packuswb xm1, xm0 + pshufb xm1, xm8 + movd [dstq+strideq*0], xm1 + pextrd [dstq+strideq*1], xm1, 1 + pextrd [dstq+strideq*2], xm1, 2 + pextrd [dstq+r7 ], xm1, 3 + add dstq, 4 + sub wd, 4 + jg .h4_upsample_loop + RET +ALIGN function_align +.filter_strength: ; h4/h8/h16 +%define base r4-z_filter_t0 + lea r4, [z_filter_t0] + movd xm0, maxbased + movd xm2, angled + shr angled, 8 ; is_sm << 1 + vpbroadcastb m0, xm0 + vpbroadcastb m2, xm2 + pcmpeqb m1, m0, [base+z_filter_wh] + pand m1, m2 + mova xm2, [r4+angleq*8] + pcmpgtb m1, m2 + pmovmskb r5d, m1 + popcnt r5d, r5d + ret +.h4_no_upsample: + %assign stack_offset org_stack_offset + ALLOC_STACK -16, 12 + mov maxbased, 7 + test angled, 0x400 ; !enable_intra_edge_filter + jnz .h4_main + lea maxbased, [wq+3] + call .filter_strength + mov maxbased, 7 + jz .h4_main ; filter_strength == 0 + vpbroadcastd m7, [base+pb_7] + vbroadcasti128 m2, [tlq-14] + pmaxub m1, m7, [base+z_filter_s-4] + vpbroadcastd m8, [base+z_filter_k-4+r5*4+12*0] + pmaxub m7, [base+z_filter_s+4] + vpbroadcastd m9, [base+z_filter_k-4+r5*4+12*1] + vpbroadcastd m10, [base+z_filter_k-4+r5*4+12*2] + pshufb m0, m2, m1 + shufps m1, m7, q2121 + pmaddubsw m0, m8 + pshufb m1, m2, m1 + pmaddubsw m1, m9 + pshufb m2, m7 + pmaddubsw m2, m10 + paddw m0, m1 + paddw m0, m2 + pmulhrsw m0, m3 + mov r4d, 9 + lea tlq, [rsp+15] + cmp wd, 4 + cmova maxbased, r4d + vextracti128 xm1, m0, 1 + packuswb xm0, xm1 + mova [rsp], xm0 +.h4_main: + movd xm6, dyd + vpbroadcastq m0, [z_base_inc] ; base_inc << 6 + mov r4, tlq + sub tlq, 4 + neg dyq + vpbroadcastw m6, xm6 + sub r4, maxbaseq + shl maxbased, 6 + vpbroadcastb m7, [r4] + lea r4, [dyq+63] ; ypos + movd xm9, maxbased + sub maxbased, 63 + vbroadcasti128 m8, [z3_shuf_w4] + neg maxbaseq + vpbroadcastw m9, xm9 + psrlw m7, 8 ; top[max_base_y] + paddw m10, m6, m6 + psubw m9, m0 ; max_base_y + vpblendd m6, m10, 0xcc + mova xm0, xm10 + paddw m6, m0 ; ypos2 ypos3 ypos0 ypos1 + paddw m10, m10 + mova xm11, [z_transpose4] +.h4_loop: + lea r5, [r4+dyq] + sar r4, 6 ; base0 + vpbroadcastq m1, [tlq+r4] + lea r4, [r5+dyq] + sar r5, 6 ; base1 + vpbroadcastq m2, [tlq+r5] + lea r5, [r4+dyq] + sar r4, 6 ; base2 + movq xm0, [tlq+r4] + lea r4, [r5+dyq] + sar r5, 6 ; base3 + movhps xm0, [tlq+r5] + vpblendd m1, m2, 0xc0 + pand m2, m4, m6 ; frac << 1 + vpblendd m0, m1, 0xf0 + psubw m1, m5, m2 ; (32 - frac) << 1 + psllw m2, 8 + pshufb m0, m8 + por m1, m2 ; (32-frac, frac) << 1 + pmaddubsw m0, m1 + pcmpgtw m1, m9, m6 ; base < max_base_y + pmulhrsw m0, m3 + paddsw m6, m10 ; ypos += dy + vpblendvb m0, m7, m0, m1 + vextracti128 xm1, m0, 1 + packuswb xm1, xm0 + pshufb xm1, xm11 ; transpose + movd [dstq+strideq*0], xm1 + pextrd [dstq+strideq*1], xm1, 1 + pextrd [dstq+strideq*2], xm1, 2 + pextrd [dstq+r7 ], xm1, 3 + add dstq, 4 + sub wd, 4 + jz .h4_end + cmp r4d, maxbased + jg .h4_loop + packuswb xm7, xm7 +.h4_end_loop: + movd [dstq+strideq*0], xm7 + movd [dstq+strideq*1], xm7 + movd [dstq+strideq*2], xm7 + movd [dstq+r7 ], xm7 + add dstq, 4 + sub wd, 4 + jg .h4_end_loop +.h4_end: + RET +ALIGN function_align +.h8: + lea r4d, [angleq+216] + mov r4b, wb + cmp r4d, 8 + ja .h8_no_upsample ; !enable_intra_edge_filter || is_sm || d >= 40 || w > 8 + %assign stack_offset org_stack_offset + ALLOC_STACK -32, 8 + and r4d, 4 + mova xm0, [tlq-15] + vinserti128 m0, [tlq- 9], 1 + movd xm1, r4d + movu xm2, [z_filter_s+2] + vinserti128 m2, [z_filter_s+6], 1 + vpbroadcastb xm1, xm1 ; w & 4 + vpbroadcastd m7, [pb_36_m4] + pmaxub xm1, [z_upsample3] ; clip 4x8 + vinserti128 m1, [z_upsample1], 1 + add dyd, dyd + pshufb m1, m0, m1 + pshufb m2, m0, m2 + vinserti128 m0, [tlq-7], 1 + movd xm6, dyd + pmaddubsw m1, m7 + pmaddubsw m2, m7 + vpbroadcastw m6, xm6 + mov r2d, dyd + lea r5, [strideq*3] + paddw m7, m6, m6 + paddw m1, m2 + vpblendd m6, m7, 0xf0 + pmulhrsw m1, m3 + pslldq m2, m7, 8 + paddw m7, m7 + paddw m6, m2 + vbroadcasti128 m2, [pb_15to0] + packuswb m1, m1 + punpcklbw m1, m0 + pshufb m1, m2 + vextracti128 [rsp+ 0], m1, 1 + mova [rsp+16], xm1 +.h8_upsample_loop: + lea r4d, [r2+dyq] + shr r2d, 6 ; base0 + movu xm0, [rsp+r2] + lea r2d, [r4+dyq] + shr r4d, 6 ; base1 + vinserti128 m0, [rsp+r4], 1 + lea r4d, [r2+dyq] + shr r2d, 6 ; base2 + pand m1, m4, m6 + psubw m2, m5, m1 + psllw m1, 8 + por m2, m1 + punpcklqdq m1, m2, m2 ; frac0 frac1 + pmaddubsw m0, m1 + movu xm1, [rsp+r2] + lea r2d, [r4+dyq] + shr r4d, 6 ; base3 + vinserti128 m1, [rsp+r4], 1 + punpckhqdq m2, m2 ; frac2 frac3 + pmaddubsw m1, m2 + pmulhrsw m0, m3 + paddw m6, m7 + pmulhrsw m1, m3 + lea r4, [dstq+strideq*4] + psllw m1, 8 + por m0, m1 + vextracti128 xm1, m0, 1 + punpcklbw xm2, xm0, xm1 + punpckhbw xm0, xm1 + movd [dstq+strideq*0], xm2 + pextrd [dstq+strideq*1], xm2, 1 + pextrd [dstq+strideq*2], xm2, 2 + pextrd [dstq+r5 ], xm2, 3 + movd [r4 +strideq*0], xm0 + pextrd [r4 +strideq*1], xm0, 1 + pextrd [r4 +strideq*2], xm0, 2 + pextrd [r4 +r5 ], xm0, 3 + add dstq, 4 + sub wd, 4 + jg .h8_upsample_loop + RET +.h8_no_intra_edge_filter: + and maxbased, 7 + or maxbased, 8 ; imin(w+7, 15) + jmp .h8_main +.h8_no_upsample: + %assign stack_offset org_stack_offset + ALLOC_STACK -32, 10 + lea maxbased, [wq+7] + test angled, 0x400 + jnz .h8_no_intra_edge_filter + call .filter_strength + jz .h8_main ; filter_strength == 0 + vpbroadcastd xm6, [base+pb_15] + pcmpeqb xm1, xm1 + psubusb xm6, xm0 + psubb xm6, xm1 ; w == 4 ? 5 : 1 + movu xm2, [tlq-16] + pmaxub xm1, xm6, [base+z_filter_s] + vinserti128 m2, [tlq-14], 1 + vinserti128 m1, [base+z_filter_s+12], 1 + vpbroadcastd m7, [base+z_filter_k-4+r5*4+12*0] + pmaxub xm6, [base+z_filter_s+ 8] + vinserti128 m6, [base+z_filter_s+20], 1 + pshufb m0, m2, m1 + pmaddubsw m0, m7 + vpbroadcastd m7, [base+z_filter_k-4+r5*4+12*1] + movzx r4d, byte [tlq-15] + shufps m1, m6, q2121 + pshufb m1, m2, m1 + pmaddubsw m1, m7 + paddw m0, m1 + sub r5d, 3 + jnz .h8_3tap + vpbroadcastd m7, [z_filter_k+4*8] + movzx r2d, byte [tlq-14] + pshufb m2, m6 + pmaddubsw m2, m7 + sub r2d, r4d + lea r2d, [r2+r4*8+4] + shr r2d, 3 + mov [rsp+15], r2b + paddw m0, m2 +.h8_3tap: + pmulhrsw m0, m3 + sar r5d, 1 + lea tlq, [rsp+31] + add r5d, 17 + cmp wd, 8 + cmova maxbased, r5d + neg r5 + mov [tlq+r5], r4b + vextracti128 xm1, m0, 1 + packuswb xm0, xm1 + mova [tlq-15], xm0 +.h8_main: + movd xm2, dyd + vbroadcasti128 m0, [z_base_inc] + mov r4, tlq + sub tlq, 8 + neg dyq + vpbroadcastw m2, xm2 + sub r4, maxbaseq + shl maxbased, 6 + vpbroadcastb m7, [r4] + lea r4, [dyq+63] + movd xm9, maxbased + sub maxbased, 63 + vbroadcasti128 m8, [z3_shuf] + neg maxbaseq + vpbroadcastw m9, xm9 + psrlw m7, 8 + psubw m9, m0 + paddw m6, m2, m2 + vpblendd m2, m6, 0x0f +.h8_loop: + lea r5, [r4+dyq] + sar r4, 6 + pand m0, m4, m2 + psubw m1, m5, m0 + psllw m0, 8 + por m1, m0 + vbroadcasti128 m0, [tlq+r4] + lea r4, [r5+dyq] + sar r5, 6 + vinserti128 m0, [tlq+r5], 0 + sub rsp, 8*2 + pshufb m0, m8 + pmaddubsw m0, m1 + pcmpgtw m1, m9, m2 + paddsw m2, m6 + pmulhrsw m0, m3 + vpblendvb m0, m7, m0, m1 + vextracti128 xm1, m0, 1 + psllw xm0, 8 + por xm0, xm1 ; interleave rows (partial transpose) + mova [rsp], xm0 + sub wd, 2 + jz .h8_transpose + cmp r4d, maxbased + jg .h8_loop + packuswb xm0, xm7, xm7 +.h8_end_loop: + sub rsp, 8*2 + mova [rsp], xm0 + sub wd, 2 + jg .h8_end_loop +.h8_transpose: + mova xm2, [rsp+16*1] + sub org_wd, 8 + lea r2, [strideq*3] + lea r6, [dstq+org_wq] + cmovg dstq, r6 + punpcklwd xm1, xm2, xm0 + punpckhwd xm2, xm0 + lea r6, [dstq+strideq*4] + jge .h8_w8 + add rsp, 16*2 + movd [dstq+strideq*0], xm1 + pextrd [dstq+strideq*1], xm1, 1 + pextrd [dstq+strideq*2], xm1, 2 + pextrd [dstq+r2 ], xm1, 3 + movd [r6 +strideq*0], xm2 + pextrd [r6 +strideq*1], xm2, 1 + pextrd [r6 +strideq*2], xm2, 2 + pextrd [r6 +r2 ], xm2, 3 + jmp .h8_end +.h8_w8_loop: + mova xm0, [rsp+16*0] + mova xm2, [rsp+16*1] + punpcklwd xm1, xm2, xm0 + punpckhwd xm2, xm0 +.h8_w8: ; w8/w16/w32 + mova xm0, [rsp+16*2] + mova xm4, [rsp+16*3] + add rsp, 16*4 + punpcklwd xm3, xm4, xm0 + punpckhwd xm4, xm0 + punpckldq xm0, xm3, xm1 + punpckhdq xm3, xm1 + punpckldq xm1, xm4, xm2 + punpckhdq xm4, xm2 + movq [dstq+strideq*0], xm0 + movhps [dstq+strideq*1], xm0 + movq [dstq+strideq*2], xm3 + movhps [dstq+r2 ], xm3 + movq [r6 +strideq*0], xm1 + movhps [r6 +strideq*1], xm1 + movq [r6 +strideq*2], xm4 + movhps [r6 +r2 ], xm4 + sub dstq, 8 + sub r6, 8 + sub org_wd, 8 + jge .h8_w8_loop +.h8_end: + RET +.h16_no_intra_edge_filter: + and maxbased, 15 + or maxbased, 16 ; imin(w+15, 31) + jmp .h16_main +ALIGN function_align +.h16: + %assign stack_offset org_stack_offset + ALLOC_STACK -64, 12 + lea maxbased, [wq+15] + test angled, 0x400 + jnz .h16_no_intra_edge_filter + call .filter_strength + jz .h16_main ; filter_strength == 0 + vpbroadcastd m11, [base+pb_27] + vpbroadcastd m1, [base+pb_1] + vbroadcasti128 m6, [base+z_filter_s+12] + vinserti128 m2, m6, [base+z_filter_s+4], 0 + vinserti128 m6, [base+z_filter_s+20], 1 + movu xm10, [tlq-18] + vinserti128 m10, [tlq-14], 1 + vpbroadcastd m9, [base+z_filter_k-4+r5*4+12*0] + vbroadcasti128 m7, [base+z_filter_s+8] + vinserti128 m8, m7, [base+z_filter_s+0], 0 + vinserti128 m7, [base+z_filter_s+16], 1 + psubusb m11, m0 + por m1, m11 + movu xm11, [tlq-32] + vinserti128 m11, [tlq-28], 1 + pmaxub m8, m1 + pmaxub m7, m1 + pshufb m0, m10, m2 + shufps m2, m6, q2121 + pmaddubsw m0, m9 + pshufb m1, m11, m8 + shufps m8, m7, q2121 + pmaddubsw m1, m9 + vpbroadcastd m9, [base+z_filter_k-4+r5*4+12*1] + movzx r4d, byte [tlq-31] + pshufb m2, m10, m2 + pmaddubsw m2, m9 + pshufb m8, m11, m8 + pmaddubsw m8, m9 + paddw m0, m2 + paddw m1, m8 + sub r5d, 3 + jnz .h16_3tap + vpbroadcastd m9, [z_filter_k+4*8] + movzx r2d, byte [tlq-30] + pshufb m10, m6 + pmaddubsw m10, m9 + pshufb m11, m7 + pmaddubsw m11, m9 + sub r2d, r4d + lea r2d, [r2+r4*8+4] + shr r2d, 3 + mov [rsp+31], r2b + paddw m0, m10 + paddw m1, m11 +.h16_3tap: + pmulhrsw m0, m3 + pmulhrsw m1, m3 + sar r5d, 1 + lea tlq, [rsp+63] + add r5d, 33 + cmp wd, 16 + cmova maxbased, r5d + neg r5 + mov [tlq+r5], r4b + packuswb m0, m1 + vpermq m0, m0, q2031 + mova [tlq-31], m0 +.h16_main: + movd xm6, dyd + vbroadcasti128 m0, [z_base_inc] + mov r4, tlq + sub tlq, 8 + neg dyq + vpbroadcastw m6, xm6 + sub r4, maxbaseq + shl maxbased, 6 + vpbroadcastb m7, [r4] + lea r4, [dyq+63] + movd xm9, maxbased + sub maxbased, 63 + vbroadcasti128 m8, [z3_shuf] + neg maxbaseq + vpbroadcastw m9, xm9 + psubw m9, m0 + paddw m11, m6, m6 + psubw m10, m9, m3 ; 64*8 + vpblendd m6, m11, 0xf0 +.h16_loop: + lea r5, [r4+dyq] + sar r4, 6 + pand m1, m4, m6 + psubw m2, m5, m1 + psllw m1, 8 + por m2, m1 + movu xm0, [tlq+r4-0] + movu xm1, [tlq+r4-8] + lea r4, [r5+dyq] + sar r5, 6 + vinserti128 m0, [tlq+r5-0], 1 + vinserti128 m1, [tlq+r5-8], 1 + sub rsp, 32 + pshufb m0, m8 + pshufb m1, m8 + pmaddubsw m0, m2 + pmaddubsw m1, m2 + pmulhrsw m0, m3 + pmulhrsw m1, m3 + packuswb m0, m1 + pcmpgtw m1, m9, m6 + pcmpgtw m2, m10, m6 + packsswb m1, m2 + paddsw m6, m11 + vpblendvb m0, m7, m0, m1 + vpermq m0, m0, q3120 + mova [rsp], m0 + sub wd, 2 + jz .h16_transpose + cmp r4d, maxbased + jg .h16_loop + mova m0, m7 +.h16_end_loop: + sub rsp, 32 + mova [rsp], m7 + sub wd, 2 + jg .h16_end_loop +.h16_transpose: + mova m2, [rsp+32*1] + sub org_wd, 8 + lea r2, [strideq*3] + lea r6, [dstq+org_wq] + cmovg dstq, r6 + punpcklbw m1, m2, m0 + punpckhbw m2, m0 + lea r3, [strideq*5] + punpcklbw m0, m1, m2 + punpckhbw m1, m2 + lea r4, [strideq+r2*2] ; stride*7 + jge .h16_w8 + add rsp, 32*2 + movd [dstq+strideq*0], xm0 + pextrd [dstq+strideq*1], xm0, 1 + pextrd [dstq+strideq*2], xm0, 2 + pextrd [dstq+r2 ], xm0, 3 + vextracti128 xm0, m0, 1 + movd [dstq+strideq*4], xm1 + pextrd [dstq+r3 ], xm1, 1 + pextrd [dstq+r2*2 ], xm1, 2 + pextrd [dstq+r4 ], xm1, 3 + lea dstq, [dstq+strideq*8] + vextracti128 xm1, m1, 1 + movd [dstq+strideq*0], xm0 + pextrd [dstq+strideq*1], xm0, 1 + pextrd [dstq+strideq*2], xm0, 2 + pextrd [dstq+r2 ], xm0, 3 + movd [dstq+strideq*4], xm1 + pextrd [dstq+r3 ], xm1, 1 + pextrd [dstq+r2*2 ], xm1, 2 + pextrd [dstq+r4 ], xm1, 3 + jmp .h16_end +.h16_w8_loop: + mova m0, [rsp+32*0] + mova m2, [rsp+32*1] + punpcklbw m1, m2, m0 + punpckhbw m2, m0 + punpcklbw m0, m1, m2 + punpckhbw m1, m2 +.h16_w8: + mova m2, [rsp+32*2] + mova m4, [rsp+32*3] + lea r6, [dstq+strideq*8] + add rsp, 32*4 + punpcklbw m3, m4, m2 + punpckhbw m4, m2 + punpcklbw m2, m3, m4 + punpckhbw m3, m4 + punpckldq m4, m2, m0 + punpckhdq m2, m0 + punpckldq m0, m3, m1 + punpckhdq m3, m1 + movq [dstq+strideq*0], xm4 + movhps [dstq+strideq*1], xm4 + vextracti128 xm4, m4, 1 + movq [dstq+strideq*2], xm2 + movhps [dstq+r2 ], xm2 + vextracti128 xm2, m2, 1 + movq [dstq+strideq*4], xm0 + movhps [dstq+r3 ], xm0 + vextracti128 xm0, m0, 1 + movq [dstq+r2*2 ], xm3 + movhps [dstq+r4 ], xm3 + vextracti128 xm3, m3, 1 + movq [r6+strideq*0], xm4 + movhps [r6+strideq*1], xm4 + movq [r6+strideq*2], xm2 + movhps [r6+r2 ], xm2 + movq [r6+strideq*4], xm0 + movhps [r6+r3 ], xm0 + movq [r6+r2*2 ], xm3 + movhps [r6+r4 ], xm3 + sub dstq, 8 + sub org_wd, 8 + jge .h16_w8_loop +.h16_end: + RET +ALIGN function_align +.h32: + %assign stack_offset org_stack_offset + ALLOC_STACK -96, 15 + lea maxbased, [wq+31] + and maxbased, 31 + or maxbased, 32 ; imin(w+31, 63) + test angled, 0x400 ; !enable_intra_edge_filter + jnz .h32_main + vbroadcasti128 m0, [pb_0to15] + mov r4d, 21 + mov r5d, 3 + movu xm11, [tlq-66] ; 56-63 + vinserti128 m11, [tlq-52], 1 ; 40-47 + sub r4d, wd ; 21-w + cmovg r5d, r4d + movu xm12, [tlq-58] ; 48-55 + vinserti128 m12, [tlq-44], 1 ; 32-39 + sub r4d, 8 ; 13-w + movd xm1, r5d + movu xm13, [tlq-34] ; 24-31 + vinserti128 m13, [tlq-20], 1 ; 8-15 + movd xm2, r4d + vpbroadcastb m1, xm1 + movu xm14, [tlq-28] ; 16-23 + vinserti128 m14, [tlq-14], 1 ; 0- 7 + vpbroadcastb m2, xm2 + pmaxsb m1, m0 ; clip 16x32 and (32|64)x32 + movu m7, [z_filter_s+4] + pshufb m11, m1 + vinserti128 m8, m7, [z_filter_s+8], 1 + vinserti128 m7, [z_filter_s+16], 0 + pmaxsb m2, m0 ; clip 8x32 + vpbroadcastd m9, [z_filter_k+4*2+12*0] + pshufb m12, m2 + pshufb m0, m11, m8 + pmaddubsw m0, m9 + pshufb m2, m12, m8 + pmaddubsw m2, m9 + pshufb m1, m13, m8 + pmaddubsw m1, m9 + shufps m8, m7, q1021 + pshufb m6, m14, m8 + pmaddubsw m6, m9 + vpbroadcastd m9, [z_filter_k+4*2+12*1] + pshufb m10, m11, m8 + pmaddubsw m10, m9 + paddw m0, m10 + pshufb m10, m12, m8 + pmaddubsw m10, m9 + paddw m2, m10 + pshufb m10, m13, m8 + pmaddubsw m10, m9 + shufps m8, m7, q2121 + paddw m1, m10 + pshufb m10, m14, m8 + pmaddubsw m10, m9 + paddw m6, m10 + vpbroadcastd m9, [z_filter_k+4*2+12*2] + pshufb m11, m8 + pmaddubsw m11, m9 + pshufb m12, m8 + pmaddubsw m12, m9 + movzx r4d, byte [tlq-63] + movzx r2d, byte [tlq-62] + paddw m0, m11 + paddw m2, m12 + pshufb m13, m8 + pmaddubsw m13, m9 + pshufb m14, m7 + pmaddubsw m14, m9 + paddw m1, m13 + paddw m6, m14 + sub r2d, r4d + lea r2d, [r2+r4*8+4] ; edge case for 64x32 + pmulhrsw m0, m3 + pmulhrsw m2, m3 + pmulhrsw m1, m3 + pmulhrsw m6, m3 + shr r2d, 3 + mov [rsp+31], r2b + lea tlq, [rsp+95] + mov [tlq-65], r4b + mov r4d, 65 + cmp wd, 32 + cmova maxbased, r4d + packuswb m0, m2 + packuswb m1, m6 + mova [tlq-63], m0 + mova [tlq-31], m1 +.h32_main: + movd xm6, dyd + mov r4, tlq + sub tlq, 8 + neg dyq + vpbroadcastw m6, xm6 + sub r4, maxbaseq + shl maxbased, 6 + vpbroadcastb m7, [r4] + lea r4, [dyq+63] + movd xm9, maxbased + sub maxbased, 63 + vbroadcasti128 m8, [z3_shuf] + neg maxbaseq + vpbroadcastw m9, xm9 + psubw m9, [z_base_inc] + mova m11, m6 + psubw m10, m9, m3 ; 64*8 +.h32_loop: + mov r5, r4 + sar r5, 6 + pand m1, m4, m6 + psubw m2, m5, m1 + psllw m1, 8 + por m2, m1 + movu xm0, [tlq+r5- 0] + vinserti128 m0, [tlq+r5-16], 1 + movu xm1, [tlq+r5- 8] + vinserti128 m1, [tlq+r5-24], 1 + sub rsp, 32 + add r4, dyq + pshufb m0, m8 + pshufb m1, m8 + pmaddubsw m0, m2 + pmaddubsw m1, m2 + pmulhrsw m0, m3 + pmulhrsw m1, m3 + packuswb m0, m1 + pcmpgtw m1, m9, m6 + pcmpgtw m2, m10, m6 + packsswb m1, m2 + paddsw m6, m11 + vpblendvb m0, m7, m0, m1 + mova [rsp], m0 + dec wd + jz .h32_transpose + cmp r4d, maxbased + jg .h32_loop +.h32_end_loop: + sub rsp, 32 + mova [rsp], m7 + dec wd + jg .h32_end_loop +.h32_transpose: + lea dstq, [dstq+org_wq-8] + lea r2, [strideq*3] + lea r3, [strideq*5] + lea r4, [strideq+r2*2] ; stride*7 +.h32_w8_loop: + mova m7, [rsp+32*0] + mova m6, [rsp+32*1] + mova m5, [rsp+32*2] + mova m4, [rsp+32*3] + mova m3, [rsp+32*4] + mova m2, [rsp+32*5] + mova m1, [rsp+32*6] + mova m0, [rsp+32*7] + lea r6, [dstq+strideq*8] + add rsp, 32*8 + punpcklbw m8, m0, m1 + punpckhbw m0, m1 + punpcklbw m1, m2, m3 + punpckhbw m2, m3 + punpcklbw m3, m4, m5 + punpckhbw m4, m5 + punpcklbw m5, m6, m7 + punpckhbw m6, m7 + punpcklwd m7, m8, m1 + punpckhwd m8, m1 + punpcklwd m1, m0, m2 + punpckhwd m0, m2 + punpcklwd m2, m3, m5 + punpckhwd m3, m5 + punpcklwd m5, m4, m6 + punpckhwd m4, m6 + punpckldq m6, m7, m2 + punpckhdq m7, m2 + punpckldq m2, m8, m3 + punpckhdq m8, m3 + punpckldq m3, m1, m5 + punpckhdq m1, m5 + punpckldq m5, m0, m4 + punpckhdq m0, m4 + movq [dstq+strideq*0], xm6 + movhps [dstq+strideq*1], xm6 + vextracti128 xm6, m6, 1 + movq [dstq+strideq*2], xm7 + movhps [dstq+r2 ], xm7 + vextracti128 xm7, m7, 1 + movq [dstq+strideq*4], xm2 + movhps [dstq+r3 ], xm2 + vextracti128 xm2, m2, 1 + movq [dstq+r2*2 ], xm8 + movhps [dstq+r4 ], xm8 + vextracti128 xm8, m8, 1 + movq [r6+strideq*0], xm3 + movhps [r6+strideq*1], xm3 + vextracti128 xm3, m3, 1 + movq [r6+strideq*2], xm1 + movhps [r6+r2 ], xm1 + vextracti128 xm1, m1, 1 + movq [r6+strideq*4], xm5 + movhps [r6+r3 ], xm5 + vextracti128 xm5, m5, 1 + movq [r6+r2*2 ], xm0 + movhps [r6+r4 ], xm0 + lea r6, [r6+strideq*8] + vextracti128 xm0, m0, 1 + movq [r6+strideq*0], xm6 + movhps [r6+strideq*1], xm6 + movq [r6+strideq*2], xm7 + movhps [r6+r2 ], xm7 + movq [r6+strideq*4], xm2 + movhps [r6+r3 ], xm2 + movq [r6+r2*2 ], xm8 + movhps [r6+r4 ], xm8 + lea r6, [r6+strideq*8] + movq [r6+strideq*0], xm3 + movhps [r6+strideq*1], xm3 + movq [r6+strideq*2], xm1 + movhps [r6+r2 ], xm1 + movq [r6+strideq*4], xm5 + movhps [r6+r3 ], xm5 + movq [r6+r2*2 ], xm0 + movhps [r6+r4 ], xm0 + sub dstq, 8 + sub org_wd, 8 + jg .h32_w8_loop + RET +ALIGN function_align +.h64: + %assign stack_offset org_stack_offset + ALLOC_STACK -128, 16 + lea maxbased, [wq+63] + test angled, 0x400 ; !enable_intra_edge_filter + jnz .h64_main + mov r4d, 21 + vpbroadcastb xm11, [tlq-127] + vpblendd xm11, [tlq-130], 0x0e ; 120-127 + sub r4d, wd ; 21-w + mov r5d, 3 + vinserti128 m11, [tlq-116], 1 ; 104-111 + movu m7, [z_filter_s+4] + cmp wd, 32 + cmove r4d, r5d + vinserti128 m8, m7, [z_filter_s+8], 1 + vbroadcasti128 m6, [pb_0to15] + movd xm1, r4d + vpbroadcastd m9, [z_filter_k+4*2+12*0] + movu xm12, [tlq-122] ; 112-119 + vinserti128 m12, [tlq-108], 1 ; 96-103 + vpbroadcastb m1, xm1 + movu xm13, [tlq- 98] ; 88- 95 + vinserti128 m13, [tlq- 84], 1 ; 72- 79 + movu xm14, [tlq- 90] ; 80- 87 + vinserti128 m14, [tlq- 76], 1 ; 64- 71 + vinserti128 m7, [z_filter_s+16], 0 + pshufb m0, m11, m8 + pmaddubsw m0, m9 + pshufb m2, m12, m8 + pmaddubsw m2, m9 + pmaxsb m1, m6 ; clip (16|32)x64 + pshufb m13, m1 + pshufb m1, m13, m8 + pmaddubsw m1, m9 + pshufb m6, m14, m8 + pmaddubsw m6, m9 + vpbroadcastd m9, [z_filter_k+4*2+12*1] + shufps m15, m8, m7, q1021 + pshufb m10, m11, m15 + pmaddubsw m10, m9 + paddw m0, m10 + pshufb m10, m12, m15 + pmaddubsw m10, m9 + paddw m2, m10 + pshufb m10, m13, m15 + pmaddubsw m10, m9 + paddw m1, m10 + pshufb m10, m14, m15 + pmaddubsw m10, m9 + paddw m6, m10 + vpbroadcastd m9, [z_filter_k+4*2+12*2] + shufps m10, m8, m7, q2132 + pshufb m11, m10 + pmaddubsw m11, m9 + pshufb m12, m10 + pmaddubsw m12, m9 + pshufb m13, m10 + pmaddubsw m13, m9 + pshufb m14, m10 + pmaddubsw m14, m9 + paddw m0, m11 + paddw m2, m12 + paddw m1, m13 + paddw m6, m14 + movu xm11, [tlq-66] ; 56-63 + vinserti128 m11, [tlq-52], 1 ; 40-47 + movu xm12, [tlq-58] ; 48-55 + vinserti128 m12, [tlq-44], 1 ; 32-39 + movu xm13, [tlq-34] ; 24-31 + vinserti128 m13, [tlq-20], 1 ; 8-15 + movu xm14, [tlq-28] ; 16-23 + vinserti128 m14, [tlq-14], 1 ; 0- 7 + pmulhrsw m0, m3 + pmulhrsw m2, m3 + pmulhrsw m1, m3 + pmulhrsw m6, m3 + lea tlq, [rsp+127] + packuswb m0, m2 + packuswb m1, m6 + mova [tlq-127], m0 + mova [tlq- 95], m1 + pshufb m0, m11, m10 + pmaddubsw m0, m9 + pshufb m2, m12, m10 + pmaddubsw m2, m9 + pshufb m1, m13, m10 + pmaddubsw m1, m9 + pshufb m6, m14, m7 + pmaddubsw m6, m9 + vpbroadcastd m9, [z_filter_k+4*2+12*1] + pshufb m7, m11, m15 + pmaddubsw m7, m9 + paddw m0, m7 + pshufb m7, m12, m15 + pmaddubsw m7, m9 + paddw m2, m7 + pshufb m7, m13, m15 + pmaddubsw m7, m9 + paddw m1, m7 + pshufb m7, m14, m10 + pmaddubsw m7, m9 + paddw m6, m7 + vpbroadcastd m9, [z_filter_k+4*2+12*0] + pshufb m11, m8 + pmaddubsw m11, m9 + pshufb m12, m8 + pmaddubsw m12, m9 + pshufb m13, m8 + pmaddubsw m13, m9 + pshufb m14, m15 + pmaddubsw m14, m9 + paddw m0, m11 + paddw m2, m12 + paddw m1, m13 + paddw m6, m14 + pmulhrsw m0, m3 + pmulhrsw m2, m3 + pmulhrsw m1, m3 + pmulhrsw m6, m3 + packuswb m0, m2 + packuswb m1, m6 + mova [tlq-63], m0 + mova [tlq-31], m1 +.h64_main: + movd xm6, dyd + mov r4, tlq + sub tlq, 24 + neg dyq + vpbroadcastw m6, xm6 + sub r4, maxbaseq + shl maxbased, 6 + vpbroadcastb m7, [r4] + lea r4, [dyq+63] + movd xm10, maxbased + sub maxbased, 63 + vbroadcasti128 m8, [z3_shuf] + neg maxbaseq + mova xm1, [z_base_inc+16] + vinserti128 m1, [z_base_inc], 1 + vpbroadcastw m10, xm10 + psllw m0, m3, 2 ; 64*32 + psubw m10, m1 + mova m14, m6 + psubw m11, m10, m3 ; 64*8 + psubw m12, m10, m0 + psubw m13, m11, m0 +.h64_loop: + mov r5, r4 + sar r5, 6 + movu m0, [tlq+r5-0] + movu m1, [tlq+r5-8] + pand m2, m4, m6 + psubw m9, m5, m2 + psllw m2, 8 + por m9, m2 + pshufb m0, m8 + pshufb m1, m8 + pmaddubsw m0, m9 + pmaddubsw m1, m9 + pmulhrsw m0, m3 + pmulhrsw m1, m3 + packuswb m0, m1 + pcmpgtw m1, m10, m6 + pcmpgtw m2, m11, m6 + packsswb m1, m2 + vpblendvb m2, m7, m0, m1 + movu m0, [tlq+r5-32] + movu m1, [tlq+r5-40] + add r4, dyq + sub rsp, 64 + mova [rsp+32], m2 + pshufb m0, m8 + pshufb m1, m8 + pmaddubsw m0, m9 + pmaddubsw m1, m9 + pcmpgtw m9, m12, m6 + pcmpgtw m2, m13, m6 + pmulhrsw m0, m3 + pmulhrsw m1, m3 + paddsw m6, m14 + packsswb m9, m2 + packuswb m0, m1 + vpblendvb m0, m7, m0, m9 + mova [rsp], m0 + dec wd + jz .h64_transpose + cmp r4d, maxbased + jg .h64_loop +.h64_end_loop: + sub rsp, 64 + mova [rsp+32], m7 + mova [rsp+ 0], m7 + dec wd + jg .h64_end_loop +.h64_transpose: + lea r2, [strideq*3] + lea r3, [strideq*5] + imul r5, strideq, -8 + lea dstq, [dstq+org_wq-16] + lea r4, [strideq+r2*2] ; stride*7 +.h64_transpose_loop0: + lea r6, [rsp+16*3] +.h64_transpose_loop: + mova xm0, [r6+64*15] + vinserti128 m0, [r6+64* 7], 1 + mova xm1, [r6+64*14] + vinserti128 m1, [r6+64* 6], 1 + mova xm2, [r6+64*13] + vinserti128 m2, [r6+64* 5], 1 + mova xm3, [r6+64*12] + vinserti128 m3, [r6+64* 4], 1 + mova xm4, [r6+64*11] + vinserti128 m4, [r6+64* 3], 1 + mova xm5, [r6+64*10] + vinserti128 m5, [r6+64* 2], 1 + mova xm6, [r6+64* 9] + vinserti128 m6, [r6+64* 1], 1 + mova xm7, [r6+64* 8] + vinserti128 m7, [r6+64* 0], 1 + sub r6, 16 + punpcklbw m8, m0, m1 + punpckhbw m0, m1 + punpcklbw m1, m2, m3 + punpckhbw m2, m3 + punpcklbw m3, m4, m5 + punpckhbw m4, m5 + punpcklbw m5, m6, m7 + punpckhbw m6, m7 + punpcklwd m7, m8, m1 + punpckhwd m8, m1 + punpcklwd m1, m0, m2 + punpckhwd m0, m2 + punpcklwd m2, m3, m5 + punpckhwd m3, m5 + punpcklwd m5, m4, m6 + punpckhwd m4, m6 + punpckldq m6, m7, m2 + punpckhdq m7, m2 + punpckldq m2, m8, m3 + punpckhdq m8, m3 + punpckldq m3, m1, m5 + punpckhdq m1, m5 + punpckldq m5, m0, m4 + punpckhdq m0, m4 + vpermq m6, m6, q3120 + vpermq m7, m7, q3120 + vpermq m2, m2, q3120 + vpermq m8, m8, q3120 + vpermq m3, m3, q3120 + vpermq m1, m1, q3120 + vpermq m5, m5, q3120 + vpermq m0, m0, q3120 + mova [dstq+strideq*0], xm6 + vextracti128 [dstq+strideq*1], m6, 1 + mova [dstq+strideq*2], xm7 + vextracti128 [dstq+r2 ], m7, 1 + mova [dstq+strideq*4], xm2 + vextracti128 [dstq+r3 ], m2, 1 + mova [dstq+r2*2 ], xm8 + vextracti128 [dstq+r4 ], m8, 1 + sub dstq, r5 + mova [dstq+strideq*0], xm3 + vextracti128 [dstq+strideq*1], m3, 1 + mova [dstq+strideq*2], xm1 + vextracti128 [dstq+r2 ], m1, 1 + mova [dstq+strideq*4], xm5 + vextracti128 [dstq+r3 ], m5, 1 + mova [dstq+r2*2 ], xm0 + vextracti128 [dstq+r4 ], m0, 1 + sub dstq, r5 + cmp r6, rsp + jae .h64_transpose_loop + add rsp, 64*16 + lea dstq, [dstq+r5*8-16] + sub org_wd, 16 + jg .h64_transpose_loop0 +.h64_end: + RET + %macro FILTER_XMM 4 ; dst, src, tmp, shuf %ifnum %4 pshufb xm%2, xm%4 @@ -2168,7 +3327,7 @@ ALIGN function_align pmaddubsw m%3, m5 paddw m%1, m%3 psraw m%1, 4 - vperm2i128 m%3, m%1, m%1, 0x01 + vpermq m%3, m%1, q1032 packuswb m%1, m%3 %endmacro @@ -2249,10 +3408,12 @@ ALIGN function_align RET ALIGN function_align .w16: +%if WIN64 %assign stack_offset stack_offset - stack_size_padded %assign xmm_regs_used 15 %assign stack_size_padded 0x98 SUB rsp, stack_size_padded +%endif sub hd, 2 TAIL_CALL .w16_main, 0 .w16_main: diff --git a/third_party/dav1d/src/x86/ipred_init_tmpl.c b/third_party/dav1d/src/x86/ipred_init_tmpl.c index 93bb8b2de8aa..3662d40069ec 100644 --- a/third_party/dav1d/src/x86/ipred_init_tmpl.c +++ b/third_party/dav1d/src/x86/ipred_init_tmpl.c @@ -39,6 +39,7 @@ decl_angular_ipred_fn(dav1d_ipred_smooth_avx2); decl_angular_ipred_fn(dav1d_ipred_smooth_v_avx2); decl_angular_ipred_fn(dav1d_ipred_smooth_h_avx2); decl_angular_ipred_fn(dav1d_ipred_z1_avx2); +decl_angular_ipred_fn(dav1d_ipred_z3_avx2); decl_angular_ipred_fn(dav1d_ipred_filter_avx2); decl_cfl_pred_fn(dav1d_ipred_cfl_avx2); @@ -51,7 +52,12 @@ decl_cfl_ac_fn(dav1d_ipred_cfl_ac_422_avx2); decl_pal_pred_fn(dav1d_pal_pred_avx2); +decl_angular_ipred_fn(dav1d_ipred_dc_ssse3); +decl_angular_ipred_fn(dav1d_ipred_dc_128_ssse3); +decl_angular_ipred_fn(dav1d_ipred_dc_top_ssse3); +decl_angular_ipred_fn(dav1d_ipred_dc_left_ssse3); decl_angular_ipred_fn(dav1d_ipred_h_ssse3); +decl_angular_ipred_fn(dav1d_ipred_v_ssse3); void bitfn(dav1d_intra_pred_dsp_init_x86)(Dav1dIntraPredDSPContext *const c) { const unsigned flags = dav1d_get_cpu_flags(); @@ -59,7 +65,12 @@ void bitfn(dav1d_intra_pred_dsp_init_x86)(Dav1dIntraPredDSPContext *const c) { if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return; #if BITDEPTH == 8 - c->intra_pred[HOR_PRED] = dav1d_ipred_h_ssse3; + c->intra_pred[DC_PRED] = dav1d_ipred_dc_ssse3; + c->intra_pred[DC_128_PRED] = dav1d_ipred_dc_128_ssse3; + c->intra_pred[TOP_DC_PRED] = dav1d_ipred_dc_top_ssse3; + c->intra_pred[LEFT_DC_PRED] = dav1d_ipred_dc_left_ssse3; + c->intra_pred[HOR_PRED] = dav1d_ipred_h_ssse3; + c->intra_pred[VERT_PRED] = dav1d_ipred_v_ssse3; #endif if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return; @@ -76,6 +87,7 @@ void bitfn(dav1d_intra_pred_dsp_init_x86)(Dav1dIntraPredDSPContext *const c) { c->intra_pred[SMOOTH_V_PRED] = dav1d_ipred_smooth_v_avx2; c->intra_pred[SMOOTH_H_PRED] = dav1d_ipred_smooth_h_avx2; c->intra_pred[Z1_PRED] = dav1d_ipred_z1_avx2; + c->intra_pred[Z3_PRED] = dav1d_ipred_z3_avx2; c->intra_pred[FILTER_PRED] = dav1d_ipred_filter_avx2; c->cfl_pred[DC_PRED] = dav1d_ipred_cfl_avx2; diff --git a/third_party/dav1d/src/x86/ipred_ssse3.asm b/third_party/dav1d/src/x86/ipred_ssse3.asm index d295c377f0f9..bfa3621218ba 100644 --- a/third_party/dav1d/src/x86/ipred_ssse3.asm +++ b/third_party/dav1d/src/x86/ipred_ssse3.asm @@ -29,6 +29,9 @@ SECTION_RODATA 16 +pb_128 : times 8 db 128 +pd_32768 : times 1 dd 32768 + %macro JMP_TABLE 3-* %xdefine %1_%2_table (%%table - 2*4) %xdefine %%base mangle(private_prefix %+ _%1_%2) @@ -39,11 +42,19 @@ SECTION_RODATA 16 %endrep %endmacro -JMP_TABLE ipred_h, ssse3, w4, w8, w16, w32, w64 +%define ipred_dc_splat_ssse3_table (ipred_dc_ssse3_table + 10*4) + +JMP_TABLE ipred_h, ssse3, w4, w8, w16, w32, w64 +JMP_TABLE ipred_dc, ssse3, h4, h8, h16, h32, h64, w4, w8, w16, w32, w64, \ + s4-10*4, s8-10*4, s16-10*4, s32-10*4, s64-10*4 +JMP_TABLE ipred_dc_left, ssse3, h4, h8, h16, h32, h64 SECTION .text - +;--------------------------------------------------------------------------------------- +;int dav1d_ipred_h_ssse3(pixel *dst, const ptrdiff_t stride, const pixel *const topleft, +; const int width, const int height, const int a); +;--------------------------------------------------------------------------------------- %macro IPRED_SET 3 ; width, stride, stride size pshuflw_imm8 pshuflw m1, m0, %3 ; extend 8 byte for 2 pos punpcklqdq m1, m1 @@ -93,7 +104,7 @@ SECTION .text INIT_XMM ssse3 cglobal ipred_h, 3, 6, 2, dst, stride, tl, w, h, stride3 - lea r5, [ipred_h_ssse3_table] + LEA r5, ipred_h_ssse3_table tzcnt wd, wm movifnidn hd, hm movsxd wq, [r5+wq*4] @@ -110,3 +121,352 @@ cglobal ipred_h, 3, 6, 2, dst, stride, tl, w, h, stride3 IPRED_H 32 .w64: IPRED_H 64 + +;--------------------------------------------------------------------------------------- +;int dav1d_ipred_v_ssse3(pixel *dst, const ptrdiff_t stride, const pixel *const topleft, +; const int width, const int height, const int a); +;--------------------------------------------------------------------------------------- +cglobal ipred_v, 3, 7, 6, dst, stride, tl, w, h, stride3 + LEA r5, ipred_dc_splat_ssse3_table + tzcnt wd, wm + movu m0, [tlq+ 1] + movu m1, [tlq+17] + movu m2, [tlq+33] + movu m3, [tlq+49] + movifnidn hd, hm + movsxd wq, [r5+wq*4] + add wq, r5 + lea stride3q, [strideq*3] + jmp wq + +;--------------------------------------------------------------------------------------- +;int dav1d_ipred_dc_ssse3(pixel *dst, const ptrdiff_t stride, const pixel *const topleft, +; const int width, const int height, const int a); +;--------------------------------------------------------------------------------------- +cglobal ipred_dc, 3, 7, 6, dst, stride, tl, w, h, stride3 + movifnidn hd, hm + movifnidn wd, wm + tzcnt r6d, hd + lea r5d, [wq+hq] + movd m4, r5d + tzcnt r5d, r5d + movd m5, r5d + LEA r5, ipred_dc_ssse3_table + tzcnt wd, wd + movsxd r6, [r5+r6*4] + movsxd wq, [r5+wq*4+20] + pcmpeqd m3, m3 + psrlw m4, 1 ; dc = (width + height) >> 1; + add r6, r5 + add wq, r5 + lea stride3q, [strideq*3] + jmp r6 +.h4: + movd m0, [tlq-4] + pmaddubsw m0, m3 + jmp wq +.w4: + movd m1, [tlq+1] + pmaddubsw m1, m3 + psubw m0, m4 + paddw m0, m1 + pmaddwd m0, m3 + cmp hd, 4 + jg .w4_mul + psrlw m0, 3 ; dc >>= ctz(width + height); + jmp .w4_end +.w4_mul: + punpckhqdq m1, m0, m0 + paddw m0, m1 + psrlq m1, m0, 32 + paddw m0, m1 + psrlw m0, 2 + mov r6d, 0x5556 + mov r2d, 0x3334 + test hd, 8 + cmovz r6d, r2d + movd m5, r6d + pmulhuw m0, m5 +.w4_end: + pxor m1, m1 + pshufb m0, m1 +.s4: + movd [dstq+strideq*0], m0 + movd [dstq+strideq*1], m0 + movd [dstq+strideq*2], m0 + movd [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + sub hd, 4 + jg .s4 + RET +ALIGN function_align +.h8: + movq m0, [tlq-8] + pmaddubsw m0, m3 + jmp wq +.w8: + movq m1, [tlq+1] + pmaddubsw m1, m3 + psubw m4, m0 + punpckhqdq m0, m0 + psubw m0, m4 + paddw m0, m1 + pshuflw m1, m0, q1032 ; psrlq m1, m0, 32 + paddw m0, m1 + pmaddwd m0, m3 + psrlw m0, m5 + cmp hd, 8 + je .w8_end + mov r6d, 0x5556 + mov r2d, 0x3334 + cmp hd, 32 + cmovz r6d, r2d + movd m1, r6d + pmulhuw m0, m1 +.w8_end: + pxor m1, m1 + pshufb m0, m1 +.s8: + movq [dstq+strideq*0], m0 + movq [dstq+strideq*1], m0 + movq [dstq+strideq*2], m0 + movq [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + sub hd, 4 + jg .s8 + RET +ALIGN function_align +.h16: + mova m0, [tlq-16] + pmaddubsw m0, m3 + jmp wq +.w16: + movu m1, [tlq+1] + pmaddubsw m1, m3 + paddw m0, m1 + psubw m4, m0 + punpckhqdq m0, m0 + psubw m0, m4 + pshuflw m1, m0, q1032 ; psrlq m1, m0, 32 + paddw m0, m1 + pmaddwd m0, m3 + psrlw m0, m5 + cmp hd, 16 + je .w16_end + mov r6d, 0x5556 + mov r2d, 0x3334 + test hd, 8|32 + cmovz r6d, r2d + movd m1, r6d + pmulhuw m0, m1 +.w16_end: + pxor m1, m1 + pshufb m0, m1 +.s16: + mova [dstq+strideq*0], m0 + mova [dstq+strideq*1], m0 + mova [dstq+strideq*2], m0 + mova [dstq+stride3q ], m0 + lea dstq, [dstq+strideq*4] + sub hd, 4 + jg .s16 + RET +ALIGN function_align +.h32: + mova m0, [tlq-32] + pmaddubsw m0, m3 + mova m2, [tlq-16] + pmaddubsw m2, m3 + paddw m0, m2 + jmp wq +.w32: + movu m1, [tlq+1] + pmaddubsw m1, m3 + movu m2, [tlq+17] + pmaddubsw m2, m3 + paddw m1, m2 + paddw m0, m1 + psubw m4, m0 + punpckhqdq m0, m0 + psubw m0, m4 + pshuflw m1, m0, q1032 ; psrlq m1, m0, 32 + paddw m0, m1 + pmaddwd m0, m3 + psrlw m0, m5 + cmp hd, 32 + je .w32_end + lea r2d, [hq*2] + mov r6d, 0x5556 + mov r2d, 0x3334 + test hd, 64|16 + cmovz r6d, r2d + movd m1, r6d + pmulhuw m0, m1 +.w32_end: + pxor m1, m1 + pshufb m0, m1 + mova m1, m0 +.s32: + mova [dstq], m0 + mova [dstq+16], m1 + mova [dstq+strideq], m0 + mova [dstq+strideq+16], m1 + mova [dstq+strideq*2], m0 + mova [dstq+strideq*2+16], m1 + mova [dstq+stride3q], m0 + mova [dstq+stride3q+16], m1 + lea dstq, [dstq+strideq*4] + sub hd, 4 + jg .s32 + RET +ALIGN function_align +.h64: + mova m0, [tlq-64] + mova m1, [tlq-48] + pmaddubsw m0, m3 + pmaddubsw m1, m3 + paddw m0, m1 + mova m1, [tlq-32] + pmaddubsw m1, m3 + paddw m0, m1 + mova m1, [tlq-16] + pmaddubsw m1, m3 + paddw m0, m1 + jmp wq +.w64: + movu m1, [tlq+ 1] + movu m2, [tlq+17] + pmaddubsw m1, m3 + pmaddubsw m2, m3 + paddw m1, m2 + movu m2, [tlq+33] + pmaddubsw m2, m3 + paddw m1, m2 + movu m2, [tlq+49] + pmaddubsw m2, m3 + paddw m1, m2 + paddw m0, m1 + psubw m4, m0 + punpckhqdq m0, m0 + psubw m0, m4 + pshuflw m1, m0, q1032 ; psrlq m1, m0, 32 + paddw m0, m1 + pmaddwd m0, m3 + psrlw m0, m5 + cmp hd, 64 + je .w64_end + mov r6d, 0x5556 + mov r2d, 0x3334 + test hd, 32 + cmovz r6d, r2d + movd m1, r6d + pmulhuw m0, m1 +.w64_end: + pxor m1, m1 + pshufb m0, m1 + mova m1, m0 + mova m2, m0 + mova m3, m0 +.s64: + mova [dstq], m0 + mova [dstq+16], m1 + mova [dstq+32], m2 + mova [dstq+48], m3 + mova [dstq+strideq], m0 + mova [dstq+strideq+16], m1 + mova [dstq+strideq+32], m2 + mova [dstq+strideq+48], m3 + lea dstq, [dstq+strideq*2] + sub hd, 2 + jg .s64 + RET + +;--------------------------------------------------------------------------------------- +;int dav1d_ipred_dc_left_ssse3(pixel *dst, const ptrdiff_t stride, const pixel *const topleft, +; const int width, const int height, const int a); +;--------------------------------------------------------------------------------------- +cglobal ipred_dc_left, 3, 7, 6, dst, stride, tl, w, h, stride3 + LEA r5, ipred_dc_left_ssse3_table + mov hd, hm ; zero upper half + tzcnt r6d, hd + sub tlq, hq + tzcnt wd, wm + movu m0, [tlq] + movd m3, [r5-ipred_dc_left_ssse3_table+pd_32768] + movd m2, r6d + psrld m3, m2 + movsxd r6, [r5+r6*4] + pcmpeqd m2, m2 + pmaddubsw m0, m2 + add r6, r5 + add r5, ipred_dc_splat_ssse3_table-ipred_dc_left_ssse3_table + movsxd wq, [r5+wq*4] + add wq, r5 + jmp r6 +.h64: + movu m1, [tlq+48] ; unaligned when jumping here from dc_top + pmaddubsw m1, m2 + paddw m0, m1 + movu m1, [tlq+32] ; unaligned when jumping here from dc_top + pmaddubsw m1, m2 + paddw m0, m1 +.h32: + movu m1, [tlq+16] ; unaligned when jumping here from dc_top + pmaddubsw m1, m2 + paddw m0, m1 +.h16: + pshufd m1, m0, q3232 ; psrlq m1, m0, 16 + paddw m0, m1 +.h8: + pshuflw m1, m0, q1032 ; psrlq m1, m0, 32 + paddw m0, m1 +.h4: + pmaddwd m0, m2 + pmulhrsw m0, m3 + lea stride3q, [strideq*3] + pxor m1, m1 + pshufb m0, m1 + mova m1, m0 + mova m2, m0 + mova m3, m0 + jmp wq + +;--------------------------------------------------------------------------------------- +;int dav1d_ipred_dc_128_ssse3(pixel *dst, const ptrdiff_t stride, const pixel *const topleft, +; const int width, const int height, const int a); +;--------------------------------------------------------------------------------------- +cglobal ipred_dc_128, 2, 7, 6, dst, stride, tl, w, h, stride3 + LEA r5, ipred_dc_splat_ssse3_table + tzcnt wd, wm + movifnidn hd, hm + movsxd wq, [r5+wq*4] + movddup m0, [r5-ipred_dc_splat_ssse3_table+pb_128] + mova m1, m0 + mova m2, m0 + mova m3, m0 + add wq, r5 + lea stride3q, [strideq*3] + jmp wq + +;--------------------------------------------------------------------------------------- +;int dav1d_ipred_dc_top_ssse3(pixel *dst, const ptrdiff_t stride, const pixel *const topleft, +; const int width, const int height, const int a); +;--------------------------------------------------------------------------------------- +cglobal ipred_dc_top, 3, 7, 6, dst, stride, tl, w, h + LEA r5, ipred_dc_left_ssse3_table + tzcnt wd, wm + inc tlq + movu m0, [tlq] + movifnidn hd, hm + movd m3, [r5-ipred_dc_left_ssse3_table+pd_32768] + movd m2, wd + psrld m3, m2 + movsxd r6, [r5+wq*4] + pcmpeqd m2, m2 + pmaddubsw m0, m2 + add r6, r5 + add r5, ipred_dc_splat_ssse3_table-ipred_dc_left_ssse3_table + movsxd wq, [r5+wq*4] + add wq, r5 + jmp r6 + diff --git a/third_party/dav1d/src/x86/itx.asm b/third_party/dav1d/src/x86/itx.asm index abcd357b7339..b8192b893259 100644 --- a/third_party/dav1d/src/x86/itx.asm +++ b/third_party/dav1d/src/x86/itx.asm @@ -113,6 +113,15 @@ idct64_mul: COEF_X8 4095, 101, 4065, 501, 2967, -2824, 3229, -2520 COEF_X8 3996, 897, 3889, 1285, 3461, -2191, 3659, -1842 COEF_X8 3349, 2359, 3102, 2675, 4036, -700, 4085, -301 +pw_201_4091x8: dw 201*8, 4091*8 +pw_m601_4052x8: dw -601*8, 4052*8 +pw_995_3973x8: dw 995*8, 3973*8 +pw_m1380_3857x8: dw -1380*8, 3857*8 +pw_1751_3703x8: dw 1751*8, 3703*8 +pw_m2106_3513x8: dw -2106*8, 3513*8 +pw_2440_3290x8: dw 2440*8, 3290*8 +pw_m2751_3035x8: dw -2751*8, 3035*8 + %define o_idct64_offset idct64_mul - (o_base) - 8 SECTION .text @@ -215,12 +224,6 @@ SECTION .text packssdw m%2, m%3 %endmacro -%macro ITX_MULHRSW_SHL3 4 ; dst/src, tmp, coef[1-2] - vpbroadcastd m%2, [pw_%3_%4] - psllw m%2, 3 - pmulhrsw m%1, m%2 -%endmacro - %macro IDCT4_1D 7 ; src[1-4], tmp[1-2], pd_2048 ITX_MULSUB_2W %2, %4, %5, %6, %7, 1567, 3784 ; t2, t3 vpbroadcastd m%6, [o(pw_2896x8)] @@ -228,20 +231,20 @@ SECTION .text psubw m%1, m%3 pmulhrsw m%1, m%6 ; t1 pmulhrsw m%5, m%6 ; t0 - psubw m%3, m%1, m%2 - paddw m%2, m%1 - paddw m%1, m%5, m%4 - psubw m%4, m%5, m%4 + psubsw m%3, m%1, m%2 + paddsw m%2, m%1 + paddsw m%1, m%5, m%4 + psubsw m%4, m%5, m%4 %endmacro %macro IDCT8_1D 11 ; src[1-8], tmp[1-2], pd_2048 ITX_MULSUB_2W %6, %4, %9, %10, %11, 3406, 2276 ; t5a, t6a ITX_MULSUB_2W %2, %8, %9, %10, %11, 799, 4017 ; t4a, t7a ITX_MULSUB_2W %3, %7, %9, %10, %11, 1567, 3784 ; t2, t3 - paddw m%9, m%2, m%6 ; t4 - psubw m%2, m%6 ; t5a - paddw m%10, m%8, m%4 ; t7 - psubw m%8, m%4 ; t6a + paddsw m%9, m%2, m%6 ; t4 + psubsw m%2, m%6 ; t5a + paddsw m%10, m%8, m%4 ; t7 + psubsw m%8, m%4 ; t6a vpbroadcastd m%4, [o(pw_2896x8)] psubw m%6, m%1, m%5 paddw m%1, m%5 @@ -251,18 +254,18 @@ SECTION .text pmulhrsw m%6, m%4 ; t1 pmulhrsw m%8, m%4 ; t6 pmulhrsw m%5, m%4 ; t5 - psubw m%4, m%1, m%7 ; dct4 out3 - paddw m%1, m%7 ; dct4 out0 - paddw m%7, m%6, m%3 ; dct4 out1 - psubw m%6, m%3 ; dct4 out2 - paddw m%2, m%7, m%8 ; out1 - psubw m%7, m%8 ; out6 - psubw m%8, m%1, m%10 ; out7 - paddw m%1, m%10 ; out0 - paddw m%3, m%6, m%5 ; out2 - psubw m%6, m%5 ; out5 - psubw m%5, m%4, m%9 ; out4 - paddw m%4, m%9 ; out3 + psubsw m%4, m%1, m%7 ; dct4 out3 + paddsw m%1, m%7 ; dct4 out0 + paddsw m%7, m%6, m%3 ; dct4 out1 + psubsw m%6, m%3 ; dct4 out2 + paddsw m%2, m%7, m%8 ; out1 + psubsw m%7, m%8 ; out6 + psubsw m%8, m%1, m%10 ; out7 + paddsw m%1, m%10 ; out0 + paddsw m%3, m%6, m%5 ; out2 + psubsw m%6, m%5 ; out5 + psubsw m%5, m%4, m%9 ; out4 + paddsw m%4, m%9 ; out3 %endmacro ; in1 = %1, in3 = %2, in5 = %3, in7 = %4 @@ -272,25 +275,25 @@ SECTION .text ITX_MULSUB_2W %5, %4, %9, %10, %11, 3166, 2598 ; t9a, t14a ITX_MULSUB_2W %3, %6, %9, %10, %11, 1931, 3612 ; t10a, t13a ITX_MULSUB_2W %7, %2, %9, %10, %11, 3920, 1189 ; t11a, t12a - psubw m%9, m%2, m%6 ; t13 - paddw m%6, m%2 ; t12 - psubw m%2, m%8, m%4 ; t14 - paddw m%8, m%4 ; t15 - psubw m%4, m%7, m%3 ; t10 - paddw m%3, m%7 ; t11 - psubw m%7, m%1, m%5 ; t9 - paddw m%1, m%5 ; t8 + psubsw m%9, m%2, m%6 ; t13 + paddsw m%6, m%2 ; t12 + psubsw m%2, m%8, m%4 ; t14 + paddsw m%8, m%4 ; t15 + psubsw m%4, m%7, m%3 ; t10 + paddsw m%3, m%7 ; t11 + psubsw m%7, m%1, m%5 ; t9 + paddsw m%1, m%5 ; t8 ITX_MULSUB_2W %2, %7, %5, %10, %11, 1567, 3784 ; t9a, t14a ITX_MULSUB_2W %9, %4, %5, %10, %11, m3784, 1567 ; t10a, t13a vpbroadcastd m%10, [o(pw_2896x8)] - psubw m%5, m%2, m%9 ; t10 - paddw m%2, m%9 ; t9 - psubw m%9, m%1, m%3 ; t11a - paddw m%1, m%3 ; t8a - psubw m%3, m%7, m%4 ; t13 - paddw m%7, m%4 ; t14 - psubw m%4, m%8, m%6 ; t12a - paddw m%8, m%6 ; t15a + psubsw m%5, m%2, m%9 ; t10 + paddsw m%2, m%9 ; t9 + psubsw m%9, m%1, m%3 ; t11a + paddsw m%1, m%3 ; t8a + psubsw m%3, m%7, m%4 ; t13 + paddsw m%7, m%4 ; t14 + psubsw m%4, m%8, m%6 ; t12a + paddsw m%8, m%6 ; t15a paddw m%6, m%3, m%5 ; t13a psubw m%3, m%5 ; t10a paddw m%5, m%4, m%9 ; t12 @@ -455,8 +458,8 @@ ALIGN function_align vpbroadcastd m4, [o(pw_2896x8)] pmulhrsw m0, m4 ; t0 t1 %endif - psubw m1, m0, m2 ; out3 out2 - paddw m0, m2 ; out0 out1 + psubsw m1, m0, m2 ; out3 out2 + paddsw m0, m2 ; out0 out1 %endmacro %macro IADST4_1D_PACKED 0 @@ -690,22 +693,22 @@ cglobal iidentity_4x4_internal, 0, 5, 6, dst, stride, c, eob, tx2 ITX_MUL2X_PACK 4, 1, 2, 6, 3406, 2276, 1 ; t5a t6a ITX_MUL2X_PACK 3, 1, 2, 6, 1567, 3784 ; t3 t2 vpbroadcastd m6, [o(pw_2896x8)] - psubw m2, m5, m4 ; t4 t7 - paddw m5, m4 ; t5a t6a + psubsw m2, m5, m4 ; t4 t7 + paddsw m5, m4 ; t5a t6a pshufd m4, m2, q1032 psubw m1, m2, m4 paddw m4, m2 vpblendd m4, m4, m1, 0xcc pmulhrsw m0, m6 ; t0 t1 pmulhrsw m4, m6 ; t6 t5 - psubw m1, m0, m3 ; tmp3 tmp2 - paddw m0, m3 ; tmp0 tmp1 + psubsw m1, m0, m3 ; tmp3 tmp2 + paddsw m0, m3 ; tmp0 tmp1 shufps m2, m5, m4, q1032 ; t7 t6 vpblendd m5, m5, m4, 0xcc ; t4 t5 - psubw m3, m0, m2 ; out7 out6 - paddw m0, m2 ; out0 out1 - psubw m2, m1, m5 ; out4 out5 - paddw m1, m5 ; out3 out2 + psubsw m3, m0, m2 ; out7 out6 + paddsw m0, m2 ; out0 out1 + psubsw m2, m1, m5 ; out4 out5 + paddsw m1, m5 ; out3 out2 %endmacro %macro IADST8_1D_PACKED 0 @@ -718,19 +721,19 @@ cglobal iidentity_4x4_internal, 0, 5, 6, dst, stride, c, eob, tx2 ITX_MUL2X_PACK 1, 4, 5, 6, 1931, 3612 ; t2a t3a ITX_MUL2X_PACK 2, 4, 5, 6, 3166, 2598 ; t4a t5a ITX_MUL2X_PACK 3, 4, 5, 6, 3920, 1189 ; t6a t7a - psubw m4, m0, m2 ; t4 t5 - paddw m0, m2 ; t0 t1 - psubw m5, m1, m3 ; t6 t7 - paddw m1, m3 ; t2 t3 + psubsw m4, m0, m2 ; t4 t5 + paddsw m0, m2 ; t0 t1 + psubsw m5, m1, m3 ; t6 t7 + paddsw m1, m3 ; t2 t3 shufps m2, m5, m4, q1032 punpckhwd m4, m2 punpcklwd m5, m2 ITX_MUL2X_PACK 4, 2, 3, 6, 1567, 3784, 1 ; t5a t4a ITX_MUL2X_PACK 5, 2, 3, 6, 3784, 1567 ; t7a t6a - psubw m2, m0, m1 ; t2 t3 - paddw m0, m1 ; out0 -out7 - psubw m1, m4, m5 ; t7 t6 - paddw m4, m5 ; out6 -out1 + psubsw m2, m0, m1 ; t2 t3 + paddsw m0, m1 ; out0 -out7 + psubsw m1, m4, m5 ; t7 t6 + paddsw m4, m5 ; out6 -out1 vpbroadcastd m5, [o(pw_2896x8)] vpblendd m3, m0, m4, 0x33 ; out6 -out7 vpblendd m0, m0, m4, 0xcc ; out0 -out1 @@ -978,10 +981,10 @@ cglobal iidentity_4x8_internal, 0, 5, 7, dst, stride, c, eob, tx2 ITX_MUL2X_PACK 7, 2, 4, 10, 799, 4017, 1 ; t4a t7a ITX_MUL2X_PACK 3, 2, 4, 10, 3406, 2276, 1 ; t5a t6a ITX_MUL2X_PACK 6, 2, 4, 10, 1567, 3784 ; t3 t2 - psubw m2, m8, m0 ; t9 t14 - paddw m8, m0 ; t8 t15 - psubw m0, m1, m5 ; t10 t13 - paddw m1, m5 ; t11 t12 + psubsw m2, m8, m0 ; t9 t14 + paddsw m8, m0 ; t8 t15 + psubsw m0, m1, m5 ; t10 t13 + paddsw m1, m5 ; t11 t12 %if mmsize > 16 vbroadcasti128 m5, [o(deint_shuf)] %else @@ -993,12 +996,12 @@ cglobal iidentity_4x8_internal, 0, 5, 7, dst, stride, c, eob, tx2 ITX_MUL2X_PACK 2, 4, _, 10, 4, 5, 4 ; t9a t14a vpbroadcastd m4, [o(pw_m1567_m3784)] ; reuse pw_m3784_1567 ITX_MUL2X_PACK 0, 5, _, 10, 5, 4, 4 ; t10a t13a - psubw m5, m7, m3 ; t5a t6a - paddw m7, m3 ; t4 t7 - psubw m4, m8, m1 ; t11a t12a - paddw m8, m1 ; t8a t15a - paddw m1, m2, m0 ; t9 t14 - psubw m2, m0 ; t10 t13 + psubsw m5, m7, m3 ; t5a t6a + paddsw m7, m3 ; t4 t7 + psubsw m4, m8, m1 ; t11a t12a + paddsw m8, m1 ; t8a t15a + paddsw m1, m2, m0 ; t9 t14 + psubsw m2, m0 ; t10 t13 punpckhqdq m0, m8, m1 ; t15a t14 punpcklqdq m8, m1 ; t8a t9 pshufd m3, m5, q1032 @@ -1016,20 +1019,20 @@ cglobal iidentity_4x8_internal, 0, 5, 7, dst, stride, c, eob, tx2 pmulhrsw m5, m1 ; t12 t13a shufps m2, m7, m3, q1032 ; t7 t6 vpblendd m7, m7, m3, 0xcc ; t4 t5 - psubw m1, m9, m6 ; dct4 out3 out2 - paddw m9, m6 ; dct4 out0 out1 - psubw m3, m9, m2 ; dct8 out7 out6 - paddw m9, m2 ; dct8 out0 out1 - psubw m2, m1, m7 ; dct8 out4 out5 - paddw m1, m7 ; dct8 out3 out2 - psubw m7, m9, m0 ; out15 out14 - paddw m0, m9 ; out0 out1 - psubw m6, m1, m5 ; out12 out13 - paddw m1, m5 ; out3 out2 - psubw m5, m2, m4 ; out11 out10 - paddw m2, m4 ; out4 out5 - psubw m4, m3, m8 ; out8 out9 - paddw m3, m8 ; out7 out6 + psubsw m1, m9, m6 ; dct4 out3 out2 + paddsw m9, m6 ; dct4 out0 out1 + psubsw m3, m9, m2 ; dct8 out7 out6 + paddsw m9, m2 ; dct8 out0 out1 + psubsw m2, m1, m7 ; dct8 out4 out5 + paddsw m1, m7 ; dct8 out3 out2 + psubsw m7, m9, m0 ; out15 out14 + paddsw m0, m9 ; out0 out1 + psubsw m6, m1, m5 ; out12 out13 + paddsw m1, m5 ; out3 out2 + psubsw m5, m2, m4 ; out11 out10 + paddsw m2, m4 ; out4 out5 + psubsw m4, m3, m8 ; out8 out9 + paddsw m3, m8 ; out7 out6 %endmacro INV_TXFM_4X16_FN dct, dct, 0 @@ -1150,20 +1153,20 @@ ALIGN function_align ITX_MUL4X_PACK 1, 2, 5, 6, 8, 1751, 3703, 2440, 3290, 3 ITX_MUL4X_PACK 3, 2, 5, 6, 8, 3035, 2751, 3513, 2106, 3 ITX_MUL4X_PACK 4, 2, 5, 6, 8, 3857, 1380, 4052, 601, 3 - psubw m2, m0, m3 ; t9a t8a t11a t10a - paddw m0, m3 ; t1a t0a t3a t2a - psubw m3, m1, m4 ; t13a t12a t15a t14a - paddw m1, m4 ; t5a t4a t7a t6a + psubsw m2, m0, m3 ; t9a t8a t11a t10a + paddsw m0, m3 ; t1a t0a t3a t2a + psubsw m3, m1, m4 ; t13a t12a t15a t14a + paddsw m1, m4 ; t5a t4a t7a t6a ITX_MUL4X_PACK 2, 4, 5, 6, 8, 799, 4017, 3406, 2276, 3 psubw m6, m7, m5 ITX_MUL2X_PACK 3, 5, _, 8, 6, 4, 6 vpbroadcastd m6, [o(pw_m3784_1567)] vpbroadcastd m5, [o(pw_1567_3784)] - psubw m4, m0, m1 ; t5 t4 t7 t6 - paddw m0, m1 ; t1 t0 t3 t2 - psubw m1, m2, m3 ; t13a t12a t15a t14a - paddw m2, m3 ; t9a t8a t11a t10a - psubw m3, m7, m6 + psubsw m4, m0, m1 ; t5 t4 t7 t6 + paddsw m0, m1 ; t1 t0 t3 t2 + psubsw m1, m2, m3 ; t13a t12a t15a t14a + paddsw m2, m3 ; t9a t8a t11a t10a + psubw m3, m7, m6 ; pw_3784_m1567 vpblendd m6, m6, m3, 0xf0 ITX_MUL2X_PACK 4, 3, _, 8, 6, 5, 4 ; t4a t5a t7a t6a ITX_MUL2X_PACK 1, 3, _, 8, 6, 5, 4 ; t12 t13 t15 t14 @@ -1176,10 +1179,10 @@ ALIGN function_align vinserti128 m4, m4, xm1, 1 ; t4a t5a t12 t13 vpbroadcastd m5, [o(pw_2896x8)] pshufd m2, m2, q1032 ; t6a t7a t14 t15 - psubw m1, m0, m3 ; t3a t2a t11 t10 - paddw m0, m3 ; -out15 out0 out14 -out1 - paddw m3, m4, m2 ; -out3 out12 out2 -out13 - psubw m4, m2 ; t6 t7 t14a t15a + psubsw m1, m0, m3 ; t3a t2a t11 t10 + paddsw m0, m3 ; -out15 out0 out14 -out1 + paddsw m3, m4, m2 ; -out3 out12 out2 -out13 + psubsw m4, m2 ; t6 t7 t14a t15a shufps m2, m1, m4, q1032 ; t2a t6 t10 t14a vpblendd m4, m4, m1, 0x33 ; t3a t7 t11 t15a paddw m1, m2, m4 @@ -1899,53 +1902,53 @@ ALIGN function_align ITX_MUL2X_PACK 6, 4, 9, 10, 3513, 2106, 3 ; t10 t11 ITX_MUL2X_PACK 7, 4, 9, 10, 3857, 1380, 3 ; t12 t13 ITX_MUL2X_PACK 8, 4, 9, 10, 4052, 601, 3 ; t14 t15 - psubw m4, m0, m5 ; t9a t8a - paddw m0, m5 ; t1a t0a - psubw m5, m1, m6 ; t11a t10a - paddw m1, m6 ; t3a t2a - psubw m6, m2, m7 ; t13a t12a - paddw m2, m7 ; t5a t4a - psubw m7, m3, m8 ; t15a t14a - paddw m3, m8 ; t7a t6a + psubsw m4, m0, m5 ; t9a t8a + paddsw m0, m5 ; t1a t0a + psubsw m5, m1, m6 ; t11a t10a + paddsw m1, m6 ; t3a t2a + psubsw m6, m2, m7 ; t13a t12a + paddsw m2, m7 ; t5a t4a + psubsw m7, m3, m8 ; t15a t14a + paddsw m3, m8 ; t7a t6a vpbroadcastd m11, [o(pw_m4017_799)] vpbroadcastd m12, [o(pw_799_4017)] pxor m9, m9 ITX_MUL2X_PACK 4, 8, _, 10, 11, 12, 6 ; t8 t9 - psubw m8, m9, m11 + psubw m8, m9, m11 ; pw_4017_m799 ITX_MUL2X_PACK 6, 12, _, 10, 12, 8, 6 ; t12 t13 vpbroadcastd m11, [o(pw_m2276_3406)] vpbroadcastd m12, [o(pw_3406_2276)] ITX_MUL2X_PACK 5, 8, _, 10, 11, 12, 6 ; t10 t11 - psubw m8, m9, m11 + psubw m8, m9, m11 ; pw_2276_m3406 ITX_MUL2X_PACK 7, 12, _, 10, 12, 8, 6 ; t14 t15 - psubw m8, m1, m3 ; t7 t6 - paddw m1, m3 ; t3 t2 - psubw m3, m0, m2 ; t5 t4 - paddw m0, m2 ; t1 t0 - psubw m2, m5, m7 ; t14a t15a - paddw m7, m5 ; t10a t11a - psubw m5, m4, m6 ; t12a t13a - paddw m4, m6 ; t8a t9a + psubsw m8, m1, m3 ; t7 t6 + paddsw m1, m3 ; t3 t2 + psubsw m3, m0, m2 ; t5 t4 + paddsw m0, m2 ; t1 t0 + psubsw m2, m5, m7 ; t14a t15a + paddsw m7, m5 ; t10a t11a + psubsw m5, m4, m6 ; t12a t13a + paddsw m4, m6 ; t8a t9a vpbroadcastd m11, [o(pw_m3784_1567)] vpbroadcastd m12, [o(pw_1567_3784)] ITX_MUL2X_PACK 3, 6, _, 10, 11, 12, 4 ; t4a t5a - psubw m6, m9, m11 + psubw m6, m9, m11 ; pw_3784_m1567 ITX_MUL2X_PACK 8, 12, _, 10, 12, 6, 4 ; t6a t7a vpbroadcastd m11, [o(pw_m1567_3784)] vpbroadcastd m12, [o(pw_3784_1567)] ITX_MUL2X_PACK 2, 6, _, 10, 11, 12, 4 ; t15 t14 - psubw m6, m9, m11 + psubw m6, m9, m11 ; pw_1567_m3784 ITX_MUL2X_PACK 5, 12, _, 10, 12, 6, 4 ; t13 t12 vbroadcasti128 m11, [o(deint_shuf)] vpbroadcastd m12, [o(pw_2896x8)] - psubw m6, m0, m1 ; t3a t2a - paddw m0, m1 ; -out15 out0 - paddw m1, m2, m5 ; -out13 out2 - psubw m5, m2 ; t15a t14a - paddw m2, m4, m7 ; -out1 out14 - psubw m4, m7 ; t10 t11 - psubw m7, m3, m8 ; t6 t7 - paddw m8, m3 ; -out3 out12 + psubsw m6, m0, m1 ; t3a t2a + paddsw m0, m1 ; -out15 out0 + paddsw m1, m2, m5 ; -out13 out2 + psubsw m5, m2 ; t15a t14a + paddsw m2, m4, m7 ; -out1 out14 + psubsw m4, m7 ; t10 t11 + psubsw m7, m3, m8 ; t6 t7 + paddsw m8, m3 ; -out3 out12 REPX {pshufb x, m11}, m6, m4, m0, m2 vpblendd m3, m6, m4, 0xcc ; t3a t11 shufps m6, m6, m4, q1032 ; t2a t10 @@ -2577,25 +2580,25 @@ ALIGN function_align ITX_MULSUB_2W 3, 4, 8, 9, 10, 3166, 2598 ; t5a, t4a ITX_MULSUB_2W 1, 6, 8, 9, 10, 3920, 1189 ; t7a, t6a ITX_MULSUB_2W 5, 2, 8, 9, 10, 1931, 3612 ; t3a, t2a - psubw m8, m2, m6 ; t6 - paddw m2, m6 ; t2 - psubw m6, m0, m4 ; t4 - paddw m0, m4 ; t0 - psubw m4, m5, m1 ; t7 - paddw m5, m1 ; t3 - psubw m1, m7, m3 ; t5 - paddw m7, m3 ; t1 + psubsw m8, m2, m6 ; t6 + paddsw m2, m6 ; t2 + psubsw m6, m0, m4 ; t4 + paddsw m0, m4 ; t0 + psubsw m4, m5, m1 ; t7 + paddsw m5, m1 ; t3 + psubsw m1, m7, m3 ; t5 + paddsw m7, m3 ; t1 ITX_MULSUB_2W 6, 1, 3, 9, 10, 1567, 3784 ; t5a, t4a ITX_MULSUB_2W 4, 8, 3, 9, 10, 3784, 1567 ; t6a, t7a - psubw m9, m6, m8 ; t7 - paddw m6, m8 ; out6 + psubsw m9, m6, m8 ; t7 + paddsw m6, m8 ; out6 vpbroadcastd m8, [o(pw_2896x8)] - psubw m3, m7, m5 ; t3 - paddw m7, m5 ; -out7 - psubw m5, m0, m2 ; t2 - paddw m0, m2 ; out0 - psubw m2, m1, m4 ; t6 - paddw m1, m4 ; -out1 + psubsw m3, m7, m5 ; t3 + paddsw m7, m5 ; -out7 + psubsw m5, m0, m2 ; t2 + paddsw m0, m2 ; out0 + psubsw m2, m1, m4 ; t6 + paddsw m1, m4 ; -out1 psubw m4, m5, m3 paddw m3, m5 psubw m5, m2, m9 @@ -2956,25 +2959,25 @@ ALIGN function_align mova [rsp+gprsize+32*0], m6 ; tmp3 IDCT16_1D_ODDHALF 9, 3, 5, 7, 1, 11, 13, 14, 6, 10, 15 mova m6, [rsp+gprsize+32*1] ; tmp5 - psubw m15, m0, m14 ; out15 - paddw m0, m14 ; out0 - psubw m14, m2, m13 ; out14 - paddw m2, m13 ; out1 + psubsw m15, m0, m14 ; out15 + paddsw m0, m14 ; out0 + psubsw m14, m2, m13 ; out14 + paddsw m2, m13 ; out1 mova [rsp+gprsize+32*1], m2 - psubw m13, m4, m11 ; out13 - paddw m2, m4, m11 ; out2 - psubw m11, m8, m7 ; out11 - paddw m4, m8, m7 ; out4 + psubsw m13, m4, m11 ; out13 + paddsw m2, m4, m11 ; out2 + psubsw m11, m8, m7 ; out11 + paddsw m4, m8, m7 ; out4 mova m7, [rsp+gprsize+32*2] ; tmp7 - psubw m10, m6, m5 ; out10 - paddw m5, m6 ; out5 - psubw m8, m7, m9 ; out8 - paddw m7, m9 ; out7 - psubw m9, m12, m3 ; out9 - paddw m6, m12, m3 ; out6 + psubsw m10, m6, m5 ; out10 + paddsw m5, m6 ; out5 + psubsw m8, m7, m9 ; out8 + paddsw m7, m9 ; out7 + psubsw m9, m12, m3 ; out9 + paddsw m6, m12, m3 ; out6 mova m3, [rsp+gprsize+32*0] ; tmp3 - psubw m12, m3, m1 ; out12 - paddw m3, m1 ; out3 + psubsw m12, m3, m1 ; out12 + paddsw m3, m1 ; out3 ret INV_TXFM_16X16_FN adst, dct @@ -3009,24 +3012,24 @@ ALIGN function_align ITX_MULSUB_2W 9, 6, 0, 4, 15, 2440, 3290 ; t7, t6 ITX_MULSUB_2W 5, 10, 0, 4, 15, 3513, 2106 ; t11, t10 ITX_MULSUB_2W 1, 14, 0, 4, 15, 4052, 601 ; t15, t14 - psubw m0, m2, m10 ; t10a - paddw m2, m10 ; t2a - psubw m10, m13, m5 ; t11a - paddw m13, m5 ; t3a - psubw m5, m6, m14 ; t14a - paddw m6, m14 ; t6a - psubw m14, m9, m1 ; t15a - paddw m9, m1 ; t7a + psubsw m0, m2, m10 ; t10a + paddsw m2, m10 ; t2a + psubsw m10, m13, m5 ; t11a + paddsw m13, m5 ; t3a + psubsw m5, m6, m14 ; t14a + paddsw m6, m14 ; t6a + psubsw m14, m9, m1 ; t15a + paddsw m9, m1 ; t7a ITX_MULSUB_2W 0, 10, 1, 4, 15, 3406, 2276 ; t11, t10 ITX_MULSUB_2W 14, 5, 1, 4, 15, 2276, 3406 ; t14, t15 - psubw m1, m10, m14 ; t14a - paddw m10, m14 ; t10a - psubw m14, m0, m5 ; t15a - paddw m0, m5 ; t11a - psubw m5, m2, m6 ; t6 - paddw m2, m6 ; t2 - psubw m6, m13, m9 ; t7 - paddw m13, m9 ; t3 + psubsw m1, m10, m14 ; t14a + paddsw m10, m14 ; t10a + psubsw m14, m0, m5 ; t15a + paddsw m0, m5 ; t11a + psubsw m5, m2, m6 ; t6 + paddsw m2, m6 ; t2 + psubsw m6, m13, m9 ; t7 + paddsw m13, m9 ; t3 ITX_MULSUB_2W 6, 5, 4, 9, 15, 3784, 1567 ; t6a, t7a ITX_MULSUB_2W 14, 1, 4, 9, 15, 3784, 1567 ; t14, t15 mova m9, [rsp+gprsize+32*0] ; in15 @@ -3039,46 +3042,46 @@ ALIGN function_align ITX_MULSUB_2W 11, 6, 2, 10, 15, 1751, 3703 ; t5, t4 ITX_MULSUB_2W 7, 8, 2, 10, 15, 3035, 2751 ; t9, t8 ITX_MULSUB_2W 3, 12, 2, 10, 15, 3857, 1380 ; t13, t12 - psubw m10, m4, m8 ; t8a - paddw m8, m4 ; t0a - psubw m4, m9, m7 ; t9a - paddw m9, m7 ; t1a - psubw m7, m6, m12 ; t12a - paddw m6, m12 ; t4a - psubw m12, m11, m3 ; t13a - paddw m11, m3 ; t5a + psubsw m10, m4, m8 ; t8a + paddsw m8, m4 ; t0a + psubsw m4, m9, m7 ; t9a + paddsw m9, m7 ; t1a + psubsw m7, m6, m12 ; t12a + paddsw m6, m12 ; t4a + psubsw m12, m11, m3 ; t13a + paddsw m11, m3 ; t5a ITX_MULSUB_2W 10, 4, 2, 3, 15, 799, 4017 ; t9, t8 ITX_MULSUB_2W 12, 7, 2, 3, 15, 4017, 799 ; t12, t13 - psubw m3, m9, m11 ; t5 - paddw m9, m11 ; t1 - psubw m11, m4, m12 ; t12a - paddw m4, m12 ; t8a - paddw m12, m8, m6 ; t0 - psubw m8, m6 ; t4 - paddw m6, m10, m7 ; t9a - psubw m10, m7 ; t13a + psubsw m3, m9, m11 ; t5 + paddsw m9, m11 ; t1 + psubsw m11, m4, m12 ; t12a + paddsw m4, m12 ; t8a + paddsw m12, m8, m6 ; t0 + psubsw m8, m6 ; t4 + paddsw m6, m10, m7 ; t9a + psubsw m10, m7 ; t13a ITX_MULSUB_2W 8, 3, 2, 7, 15, 1567, 3784 ; t5a, t4a ITX_MULSUB_2W 11, 10, 2, 7, 15, 1567, 3784 ; t13, t12 mova m7, [rsp+gprsize+32*0] ; t10a mova m2, [rsp+gprsize+32*1] ; t6a - paddw m15, m9, m13 ; -out15 - psubw m9, m13 ; t3a - paddw m13, m11, m1 ; -out13 - psubw m11, m1 ; t15a - psubw m1, m4, m7 ; t10 - paddw m7, m4 ; -out1 - psubw m4, m3, m2 ; t6 - paddw m3, m2 ; -out3 - paddw m2, m10, m14 ; out2 - psubw m10, m14 ; t14a - paddw m14, m6, m0 ; out14 - psubw m6, m0 ; t11 + paddsw m15, m9, m13 ; -out15 + psubsw m9, m13 ; t3a + paddsw m13, m11, m1 ; -out13 + psubsw m11, m1 ; t15a + psubsw m1, m4, m7 ; t10 + paddsw m7, m4 ; -out1 + psubsw m4, m3, m2 ; t6 + paddsw m3, m2 ; -out3 + paddsw m2, m10, m14 ; out2 + psubsw m10, m14 ; t14a + paddsw m14, m6, m0 ; out14 + psubsw m6, m0 ; t11 mova m0, [rsp+gprsize+32*2] ; t2 mova [rsp+gprsize+32*1], m7 - psubw m7, m12, m0 ; t2a - paddw m0, m12 ; out0 - paddw m12, m8, m5 ; out12 - psubw m8, m5 ; t7 + psubsw m7, m12, m0 ; t2a + paddsw m0, m12 ; out0 + paddsw m12, m8, m5 ; out12 + psubsw m8, m5 ; t7 paddw m5, m10, m11 ; -out5 psubw m10, m11 ; out10 psubw m11, m4, m8 ; -out11 @@ -3277,6 +3280,15 @@ ALIGN function_align %endif %endmacro +%macro ITX_UNPACK_MULHRSW 7 ; dst1, dst2/src, tmp, coef[1-4] + vpbroadcastd m%3, [r5-pw_201_4091x8+pw_%4_%5x8] + punpcklwd m%1, m%2, m%2 + pmulhrsw m%1, m%3 + vpbroadcastd m%3, [r5-pw_201_4091x8+pw_%6_%7x8] + punpckhwd m%2, m%2 + pmulhrsw m%2, m%3 +%endmacro + cglobal inv_txfm_add_dct_dct_8x32, 4, 4, 0, dst, stride, c, eob lea rax, [o_base] test eobd, eobd @@ -3431,22 +3443,11 @@ ALIGN function_align mova [rsp+gprsize+1*32], m1 mova m0, [rsp+gprsize+2*32] mova [rsp+gprsize+2*32], m6 - punpcklwd m1, m8, m8 - punpckhwd m8, m8 - punpcklwd m15, m9, m9 - punpckhwd m9, m9 - punpcklwd m14, m0, m0 - punpckhwd m0, m0 - punpcklwd m13, m11, m11 - punpckhwd m11, m11 - ITX_MULHRSW_SHL3 1, 6, 201, 4091 ; t16a, t31a - ITX_MULHRSW_SHL3 8, 6, m601, 4052 ; t23a, t24a - ITX_MULHRSW_SHL3 15, 6, 995, 3973 ; t20a, t27a - ITX_MULHRSW_SHL3 9, 6, m1380, 3857 ; t19a, t28a - ITX_MULHRSW_SHL3 14, 6, 1751, 3703 ; t18a, t29a - ITX_MULHRSW_SHL3 0, 6, m2106, 3513 ; t21a, t26a - ITX_MULHRSW_SHL3 13, 6, 2440, 3290 ; t22a, t25a - ITX_MULHRSW_SHL3 11, 6, m2751, 3035 ; t17a, t30a + lea r5, [rax-(o_base)+pw_201_4091x8] + ITX_UNPACK_MULHRSW 1, 8, 6, 201, 4091, m601, 4052 ; t16a, t31a, t23a, t24a + ITX_UNPACK_MULHRSW 15, 9, 6, 995, 3973, m1380, 3857 ; t20a, t27a, t19a, t28a + ITX_UNPACK_MULHRSW 14, 0, 6, 1751, 3703, m2106, 3513 ; t18a, t29a, t21a, t26a + ITX_UNPACK_MULHRSW 13, 11, 6, 2440, 3290, m2751, 3035 ; t22a, t25a, t17a, t30a jmp .main2 ALIGN function_align .main: @@ -3474,74 +3475,74 @@ ALIGN function_align ITX_MUL2X_PACK 13, 6, 12, 10, 2440, 3290, 3 ; t22a, t25a ITX_MUL2X_PACK 11, 6, 12, 10, 3035, 2751, 3 ; t17a, t30a .main2: - psubw m6, m1, m11 ; t17 t30 - paddw m1, m11 ; t16 t31 - psubw m11, m9, m14 ; t18 t29 - paddw m9, m14 ; t19 t28 - psubw m14, m15, m0 ; t21 t26 - paddw m15, m0 ; t20 t27 - psubw m0, m8, m13 ; t22 t25 - paddw m8, m13 ; t23 t24 + psubsw m6, m1, m11 ; t17 t30 + paddsw m1, m11 ; t16 t31 + psubsw m11, m9, m14 ; t18 t29 + paddsw m9, m14 ; t19 t28 + psubsw m14, m15, m0 ; t21 t26 + paddsw m15, m0 ; t20 t27 + psubsw m0, m8, m13 ; t22 t25 + paddsw m8, m13 ; t23 t24 ITX_MUL2X_PACK 6, 12, 13, 10, 799, 4017, 3 ; t17a t30a ITX_MUL2X_PACK 11, 12, 13, 10, m4017, 799, 3 ; t18a t29a ITX_MUL2X_PACK 14, 12, 13, 10, 3406, 2276, 3 ; t21a t26a ITX_MUL2X_PACK 0, 12, 13, 10, m2276, 3406, 3 ; t22a t25a - psubw m13, m1, m9 ; t19a t28a - paddw m1, m9 ; t16a t31a - psubw m9, m8, m15 ; t20a t27a - paddw m8, m15 ; t23a t24a - psubw m15, m6, m11 ; t18 t29 - paddw m6, m11 ; t17 t30 - psubw m11, m0, m14 ; t21 t26 - paddw m0, m14 ; t22 t25 + psubsw m13, m1, m9 ; t19a t28a + paddsw m1, m9 ; t16a t31a + psubsw m9, m8, m15 ; t20a t27a + paddsw m8, m15 ; t23a t24a + psubsw m15, m6, m11 ; t18 t29 + paddsw m6, m11 ; t17 t30 + psubsw m11, m0, m14 ; t21 t26 + paddsw m0, m14 ; t22 t25 ITX_MUL2X_PACK 15, 12, 14, 10, 1567, 3784, 1 ; t18a t29a ITX_MUL2X_PACK 13, 12, 14, 10, 1567, 3784, 1 ; t19 t28 ITX_MUL2X_PACK 9, 12, 14, 10, m3784, 1567, 1 ; t20 t27 ITX_MUL2X_PACK 11, 12, 14, 10, m3784, 1567, 1 ; t21a t26a vbroadcasti128 m12, [o(deint_shuf)] REPX {pshufb x, m12}, m0, m1, m6, m8 - psubw m14, m1, m8 ; t23 t24 - paddw m1, m8 ; t16 t31 - psubw m8, m6, m0 ; t22a t25a - paddw m6, m0 ; t17a t30a - psubw m0, m15, m11 ; t21 t26 - paddw m15, m11 ; t18 t29 - psubw m11, m13, m9 ; t20a t27a - paddw m13, m9 ; t19a t28a + psubsw m14, m1, m8 ; t23 t24 + paddsw m1, m8 ; t16 t31 + psubsw m8, m6, m0 ; t22a t25a + paddsw m6, m0 ; t17a t30a + psubsw m0, m15, m11 ; t21 t26 + paddsw m15, m11 ; t18 t29 + psubsw m11, m13, m9 ; t20a t27a + paddsw m13, m9 ; t19a t28a vpbroadcastd m12, [o(pw_2896x8)] - punpcklqdq m9, m11, m0 ; t20a t21 - punpckhqdq m11, m0 ; t27a t26 - punpcklqdq m0, m14, m8 ; t23 t22a - punpckhqdq m14, m8 ; t24 t25a - psubw m8, m11, m9 ; t20 t21a - paddw m11, m9 ; t27 t26a - psubw m9, m14, m0 ; t23a t22 - paddw m14, m0 ; t24a t25 - REPX {pmulhrsw x, m12}, m8, m9, m14, m11 + punpcklqdq m9, m11, m0 ; t20a t21 + punpckhqdq m11, m0 ; t27a t26 + punpcklqdq m0, m14, m8 ; t23 t22a + punpckhqdq m14, m8 ; t24 t25a + psubw m8, m11, m9 ; t20 t21a + paddw m11, m9 ; t27 t26a + psubw m9, m14, m0 ; t23a t22 + paddw m14, m0 ; t24a t25 + REPX {pmulhrsw x, m12}, m8, m9, m14, m11 punpcklqdq m0, m1, m6 ; t16 t17a punpckhqdq m1, m6 ; t31 t30a - psubw m10, m5, m8 ; out20 out21 - paddw m5, m8 ; out11 out10 - psubw m6, m3, m14 ; out24 out25 - paddw m3, m14 ; out7 out6 - psubw m8, m7, m0 ; out16 out17 - paddw m7, m0 ; out15 out14 + psubsw m10, m5, m8 ; out20 out21 + paddsw m5, m8 ; out11 out10 + psubsw m6, m3, m14 ; out24 out25 + paddsw m3, m14 ; out7 out6 + psubsw m8, m7, m0 ; out16 out17 + paddsw m7, m0 ; out15 out14 mova m0, [rsp+gprsize+0*32] punpcklqdq m12, m13, m15 ; t19a t18 punpckhqdq m13, m15 ; t28a t29 - psubw m15, m0, m1 ; out31 out30 - paddw m0, m1 ; out0 out1 + psubsw m15, m0, m1 ; out31 out30 + paddsw m0, m1 ; out0 out1 mova m1, [rsp+gprsize+1*32] mova [rsp+gprsize+0*32], m6 mova m6, [rsp+gprsize+2*32] - psubw m14, m1, m13 ; out28 out29 - paddw m1, m13 ; out3 out2 - psubw m13, m2, m11 ; out27 out26 - paddw m2, m11 ; out4 out5 - psubw m11, m4, m9 ; out23 out22 - paddw m4, m9 ; out8 out9 - psubw m9, m6, m12 ; out19 out18 - paddw m6, m12 ; out12 out13 + psubsw m14, m1, m13 ; out28 out29 + paddsw m1, m13 ; out3 out2 + psubsw m13, m2, m11 ; out27 out26 + paddsw m2, m11 ; out4 out5 + psubsw m11, m4, m9 ; out23 out22 + paddsw m4, m9 ; out8 out9 + psubsw m9, m6, m12 ; out19 out18 + paddsw m6, m12 ; out12 out13 ret %macro LOAD_PACKED_16X2 4 ; dst, tmp, row[1-2] @@ -3872,8 +3873,8 @@ cglobal inv_txfm_add_identity_identity_32x8, 4, 6, 10, dst, stride, c, eob %macro IDCT32_PASS2_END 7 ; coefs[1-2], tmp[1-2], rnd, offset[1-2] mova m%4, [%2] - paddw m%3, m%1, m%4 - psubw m%1, m%4 + paddsw m%3, m%1, m%4 + psubsw m%1, m%4 pmovzxbw m%4, [dstq+%6] pmulhrsw m%3, m%5 pmulhrsw m%1, m%5 @@ -4056,29 +4057,29 @@ ALIGN function_align ITX_MULSUB_2W 6, 9, 7, 8, 15, 2440, 3290 ; t22a, t25a ITX_MULSUB_2W 14, 1, 7, 8, 15, 4052, 601 ; t23a, t24a .main2: - psubw m7, m12, m4 ; t18 - paddw m12, m4 ; t19 - psubw m4, m2, m10 ; t21 - paddw m2, m10 ; t20 - psubw m10, m14, m6 ; t22 - paddw m14, m6 ; t23 - psubw m6, m1, m9 ; t25 - paddw m1, m9 ; t24 - psubw m9, m13, m5 ; t26 - paddw m13, m5 ; t27 - psubw m5, m3, m11 ; t29 - paddw m3, m11 ; t28 + psubsw m7, m12, m4 ; t18 + paddsw m12, m4 ; t19 + psubsw m4, m2, m10 ; t21 + paddsw m2, m10 ; t20 + psubsw m10, m14, m6 ; t22 + paddsw m14, m6 ; t23 + psubsw m6, m1, m9 ; t25 + paddsw m1, m9 ; t24 + psubsw m9, m13, m5 ; t26 + paddsw m13, m5 ; t27 + psubsw m5, m3, m11 ; t29 + paddsw m3, m11 ; t28 ITX_MULSUB_2W 5, 7, 8, 11, 15, m4017, 799 ; t18a, t29a ITX_MULSUB_2W 9, 4, 8, 11, 15, 3406, 2276 ; t21a, t26a ITX_MULSUB_2W 6, 10, 8, 11, 15, m2276, 3406 ; t22a, t25a - psubw m8, m14, m2 ; t20a - paddw m14, m2 ; t23a - psubw m2, m1, m13 ; t27a - paddw m1, m13 ; t24a - psubw m13, m6, m9 ; t21 - paddw m6, m9 ; t22 - psubw m9, m10, m4 ; t26 - paddw m10, m4 ; t25 + psubsw m8, m14, m2 ; t20a + paddsw m14, m2 ; t23a + psubsw m2, m1, m13 ; t27a + paddsw m1, m13 ; t24a + psubsw m13, m6, m9 ; t21 + paddsw m6, m9 ; t22 + psubsw m9, m10, m4 ; t26 + paddsw m10, m4 ; t25 ITX_MULSUB_2W 2, 8, 4, 11, 15, m3784, 1567 ; t20, t27 ITX_MULSUB_2W 9, 13, 4, 11, 15, m3784, 1567 ; t21a, t26a mova m4, [rsp+gprsize+32*0] ; in31 @@ -4089,31 +4090,31 @@ ALIGN function_align mova [rsp+gprsize+32*2], m1 ; t24a ITX_MULSUB_2W 0, 4, 1, 11, 15, 201, 4091 ; t16a, t31a ITX_MULSUB_2W 14, 6, 1, 11, 15, 3035, 2751 ; t17a, t30a - psubw m1, m0, m14 ; t17 - paddw m0, m14 ; t16 - psubw m14, m4, m6 ; t30 - paddw m4, m6 ; t31 + psubsw m1, m0, m14 ; t17 + paddsw m0, m14 ; t16 + psubsw m14, m4, m6 ; t30 + paddsw m4, m6 ; t31 ITX_MULSUB_2W 14, 1, 6, 11, 15, 799, 4017 ; t17a, t30a - psubw m6, m0, m12 ; t19a - paddw m0, m12 ; t16a - psubw m12, m4, m3 ; t28a - paddw m4, m3 ; t31a - psubw m3, m14, m5 ; t18 - paddw m14, m5 ; t17 - psubw m5, m1, m7 ; t29 - paddw m1, m7 ; t30 + psubsw m6, m0, m12 ; t19a + paddsw m0, m12 ; t16a + psubsw m12, m4, m3 ; t28a + paddsw m4, m3 ; t31a + psubsw m3, m14, m5 ; t18 + paddsw m14, m5 ; t17 + psubsw m5, m1, m7 ; t29 + paddsw m1, m7 ; t30 ITX_MULSUB_2W 5, 3, 7, 11, 15, 1567, 3784 ; t18a, t29a ITX_MULSUB_2W 12, 6, 7, 11, 15, 1567, 3784 ; t19, t28 - psubw m7, m1, m10 ; t25a - paddw m1, m10 ; t30a - psubw m10, m5, m9 ; t21 - paddw m5, m9 ; t18 - psubw m9, m12, m2 ; t20a - paddw m12, m2 ; t19a - psubw m2, m3, m13 ; t26 - paddw m3, m13 ; t29 - psubw m13, m6, m8 ; t27a - paddw m6, m8 ; t28a + psubsw m7, m1, m10 ; t25a + paddsw m1, m10 ; t30a + psubsw m10, m5, m9 ; t21 + paddsw m5, m9 ; t18 + psubsw m9, m12, m2 ; t20a + paddsw m12, m2 ; t19a + psubsw m2, m3, m13 ; t26 + paddsw m3, m13 ; t29 + psubsw m13, m6, m8 ; t27a + paddsw m6, m8 ; t28a mova [tmp1q-32*2], m5 mova [tmp1q-32*1], m12 mova [tmp2q+32*0], m6 @@ -4123,12 +4124,12 @@ ALIGN function_align mova m6, [rsp+gprsize+32*1] ; t23 mova m3, [rsp+gprsize+32*2] ; t24a vpbroadcastd m8, [o(pw_2896x8)] - psubw m1, m14, m5 ; t22a - paddw m14, m5 ; t17a - psubw m5, m0, m6 ; t23 - paddw m0, m6 ; t16 - psubw m6, m4, m3 ; t24 - paddw m4, m3 ; t31 + psubsw m1, m14, m5 ; t22a + paddsw m14, m5 ; t17a + psubsw m5, m0, m6 ; t23 + paddsw m0, m6 ; t16 + psubsw m6, m4, m3 ; t24 + paddsw m4, m3 ; t31 mova [tmp1q-32*4], m0 mova [tmp1q-32*3], m14 mova [tmp2q+32*3], m4 @@ -4241,13 +4242,13 @@ ALIGN function_align ; Perform the final sumsub step and YMM lane shuffling %macro IDCT32_PASS1_END 4 ; row[1-2], tmp[1-2] mova m%3, [tmp2q+32*( 3-%1)] - psubw m%4, m%1, m%3 - paddw m%1, m%3 + psubsw m%4, m%1, m%3 + paddsw m%1, m%3 mova m%3, [tmp1q+32*(11-%2)] mova [tmp1q+32*(11-%2)+16], xm%4 vextracti128 [tmp2q+32*( 3-%1)+16], m%4, 1 - paddw m%4, m%2, m%3 - psubw m%2, m%3 + paddsw m%4, m%2, m%3 + psubsw m%2, m%3 mova [tmp1q+32*(11-%2)], xm%2 vextracti128 [tmp2q+32*( 3-%1)], m%2, 1 vperm2i128 m%2, m%1, m%4, 0x31 @@ -4708,12 +4709,12 @@ cglobal inv_txfm_add_identity_identity_32x32, 4, 6, 10, dst, stride, c, eob mova m%5, [tmp1q-32*(45-%1)] mova m%4, [tmp2q-32*(20+%1)] %endif - psubw m%6, m%5, m%4 ; idct32 out31-n - paddw m%5, m%4 ; idct32 out 0+n - psubw m%4, m%6, m%3 ; out32+n - paddw m%6, m%3 ; out31-n - psubw m%3, m%5, m%2 ; out63-n - paddw m%5, m%2 ; out 0+n + psubsw m%6, m%5, m%4 ; idct32 out31-n + paddsw m%5, m%4 ; idct32 out 0+n + psubsw m%4, m%6, m%3 ; out32+n + paddsw m%6, m%3 ; out31-n + psubsw m%3, m%5, m%2 ; out63-n + paddsw m%5, m%2 ; out 0+n %if %0 == 6 ; pass 1 %if %1 & 1 mova [tmp2q-32*(19-%1)], m%4 @@ -4948,25 +4949,25 @@ ALIGN function_align pmulhrsw m2, m13 ; t34a pmulhrsw m8, m3 ; t60a pmulhrsw m3, m12 ; t35a - psubw m12, m0, m1 ; t33 - paddw m0, m1 ; t32 - psubw m1, m3, m2 ; t34 - paddw m3, m2 ; t35 - psubw m2, m8, m9 ; t61 - paddw m8, m9 ; t60 - psubw m9, m11, m10 ; t62 - paddw m11, m10 ; t63 + psubsw m12, m0, m1 ; t33 + paddsw m0, m1 ; t32 + psubsw m1, m3, m2 ; t34 + paddsw m3, m2 ; t35 + psubsw m2, m8, m9 ; t61 + paddsw m8, m9 ; t60 + psubsw m9, m11, m10 ; t62 + paddsw m11, m10 ; t63 ITX_MULSUB_2W 2, 1, 10, 13, 15, m4076, 401 ; t34a, t61a vpbroadcastd m14, [o(pw_401_4076)] ITX_MULSUB_2W 9, 12, 10, 13, 15, 14, 13 ; t33a, t62a - psubw m10, m0, m3 ; t35a - paddw m0, m3 ; t32a - psubw m3, m11, m8 ; t60a - paddw m11, m8 ; t63a - psubw m8, m9, m2 ; t34 - paddw m9, m2 ; t33 - psubw m2, m12, m1 ; t61 - paddw m12, m1 ; t62 + psubsw m10, m0, m3 ; t35a + paddsw m0, m3 ; t32a + psubsw m3, m11, m8 ; t60a + paddsw m11, m8 ; t63a + psubsw m8, m9, m2 ; t34 + paddsw m9, m2 ; t33 + psubsw m2, m12, m1 ; t61 + paddsw m12, m1 ; t62 mova [tmp1q-32*4], m0 mova [tmp1q-32*3], m9 mova [tmp2q+32*2], m12 @@ -4995,25 +4996,25 @@ ALIGN function_align pmulhrsw m6, m9 ; t38a pmulhrsw m0, m7 ; t56a pmulhrsw m7, m8 ; t39a - psubw m8, m4, m5 ; t37 - paddw m4, m5 ; t36 - psubw m5, m7, m6 ; t38 - paddw m7, m6 ; t39 - psubw m6, m0, m1 ; t57 - paddw m0, m1 ; t56 - psubw m1, m3, m2 ; t58 - paddw m3, m2 ; t59 + psubsw m8, m4, m5 ; t37 + paddsw m4, m5 ; t36 + psubsw m5, m7, m6 ; t38 + paddsw m7, m6 ; t39 + psubsw m6, m0, m1 ; t57 + paddsw m0, m1 ; t56 + psubsw m1, m3, m2 ; t58 + paddsw m3, m2 ; t59 ITX_MULSUB_2W 6, 5, 2, 9, 15, m2598, 3166 ; t38a, t57a vpbroadcastd m10, [o(pw_3166_2598)] ITX_MULSUB_2W 1, 8, 2, 9, 15, 10, 9 ; t37a, t58a - psubw m2, m7, m4 ; t36a - paddw m7, m4 ; t39a - psubw m4, m0, m3 ; t59a - paddw m0, m3 ; t56a - psubw m3, m6, m1 ; t37 - paddw m6, m1 ; t38 - psubw m1, m5, m8 ; t58 - paddw m5, m8 ; t57 + psubsw m2, m7, m4 ; t36a + paddsw m7, m4 ; t39a + psubsw m4, m0, m3 ; t59a + paddsw m0, m3 ; t56a + psubsw m3, m6, m1 ; t37 + paddsw m6, m1 ; t38 + psubsw m1, m5, m8 ; t58 + paddsw m5, m8 ; t57 mova [tmp1q+32*2], m6 mova [tmp1q+32*3], m7 mova [tmp2q-32*4], m0 @@ -5055,24 +5056,24 @@ ALIGN function_align mova m3, [tmp2q-32* 4] ; t47a mova m6, [tmp1q+32*11] ; t56a mova m7, [tmp2q+32*12] ; t63a - psubw m8, m0, m1 ; t39 - paddw m0, m1 ; t32 - psubw m1, m3, m2 ; t40 - paddw m3, m2 ; t47 - psubw m2, m4, m5 ; t55 - paddw m4, m5 ; t48 - psubw m5, m7, m6 ; t56 - paddw m7, m6 ; t63 + psubsw m8, m0, m1 ; t39 + paddsw m0, m1 ; t32 + psubsw m1, m3, m2 ; t40 + paddsw m3, m2 ; t47 + psubsw m2, m4, m5 ; t55 + paddsw m4, m5 ; t48 + psubsw m5, m7, m6 ; t56 + paddsw m7, m6 ; t63 ITX_MULSUB_2W 5, 8, 6, 9, 15, 11, 12 ; t39a, t56a ITX_MULSUB_2W 2, 1, 6, 9, 15, 12, 13 ; t40a, t55a - psubw m6, m0, m3 ; t47a - paddw m0, m3 ; t32a - psubw m3, m7, m4 ; t48a - paddw m7, m4 ; t63a - psubw m4, m5, m2 ; t40 - paddw m5, m2 ; t39 - psubw m2, m8, m1 ; t55 - paddw m8, m1 ; t56 + psubsw m6, m0, m3 ; t47a + paddsw m0, m3 ; t32a + psubsw m3, m7, m4 ; t48a + paddsw m7, m4 ; t63a + psubsw m4, m5, m2 ; t40 + paddsw m5, m2 ; t39 + psubsw m2, m8, m1 ; t55 + paddsw m8, m1 ; t56 psubw m1, m2, m4 ; t40a paddw m2, m4 ; t55a psubw m4, m3, m6 ; t47 diff --git a/third_party/dav1d/src/x86/itx_init_tmpl.c b/third_party/dav1d/src/x86/itx_init_tmpl.c index 35795ff69b12..c4aa5bc9bcd1 100644 --- a/third_party/dav1d/src/x86/itx_init_tmpl.c +++ b/third_party/dav1d/src/x86/itx_init_tmpl.c @@ -78,6 +78,9 @@ decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x32_avx2); decl_itx_fn(dav1d_inv_txfm_add_dct_dct_64x64_avx2); decl_itx17_fns(4, 4, ssse3); +decl_itx16_fns(4, 8, ssse3); +decl_itx16_fns(8, 4, ssse3); +decl_itx16_fns(8, 8, ssse3); void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c) { #define assign_itx_fn(pfx, w, h, type, type_enum, ext) \ @@ -121,7 +124,10 @@ void bitfn(dav1d_itx_dsp_init_x86)(Dav1dInvTxfmDSPContext *const c) { if (!(flags & DAV1D_X86_CPU_FLAG_SSSE3)) return; #if BITDEPTH == 8 - assign_itx17_fn(, 4, 4, ssse3); + assign_itx17_fn(, 4, 4, ssse3); + assign_itx16_fn(R, 4, 8, ssse3); + assign_itx16_fn(R, 8, 4, ssse3); + assign_itx16_fn(, 8, 8, ssse3); #endif if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) return; diff --git a/third_party/dav1d/src/x86/itx_ssse3.asm b/third_party/dav1d/src/x86/itx_ssse3.asm index 2b253728e693..8e69a3b1928d 100644 --- a/third_party/dav1d/src/x86/itx_ssse3.asm +++ b/third_party/dav1d/src/x86/itx_ssse3.asm @@ -29,116 +29,154 @@ SECTION_RODATA 16 -deint_shuf: db 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 +deint_shuf: db 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 + +deint_shuf1: db 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15 +deint_shuf2: db 8, 9, 0, 1, 10, 11, 2, 3, 12, 13, 4, 5, 14, 15, 6, 7 + +%macro COEF_PAIR 2 +pw_%1_m%2: times 4 dw %1, -%2 +pw_%2_%1: times 4 dw %2, %1 +%endmacro + +;adst4 +pw_1321_3803: times 4 dw 1321, 3803 +pw_2482_m1321: times 4 dw 2482, -1321 +pw_3344_2482: times 4 dw 3344, 2482 +pw_3344_m3803: times 4 dw 3344, -3803 +pw_m6688_m3803: times 4 dw -6688, -3803 + +COEF_PAIR 1567, 3784 +COEF_PAIR 799, 4017 +COEF_PAIR 3406, 2276 +COEF_PAIR 401, 4076 +COEF_PAIR 1931, 3612 +COEF_PAIR 3166, 2598 +COEF_PAIR 3920, 1189 +COEF_PAIR 3784, 1567 + +pd_2048: times 4 dd 2048 +pw_2048: times 8 dw 2048 +pw_4096: times 8 dw 4096 +pw_16384: times 8 dw 16384 +pw_m16384: times 8 dw -16384 +pw_2896x8: times 8 dw 2896*8 +pw_3344x8: times 8 dw 3344*8 +pw_5793x4: times 8 dw 5793*4 -qw_2896x8: times 8 dw 2896*8 -qw_1567_m3784: times 4 dw 1567, -3784 -qw_3784_1567: times 4 dw 3784, 1567 - -qw_1321_3803: times 4 dw 1321, 3803 -qw_2482_m1321: times 4 dw 2482, -1321 -qw_3344_2482: times 4 dw 3344, 2482 -qw_3344_m3803: times 4 dw 3344, -3803 -qw_m6688_m3803: times 4 dw -6688, -3803 -qw_3344x8: times 8 dw 3344*8 -qw_5793x4: times 8 dw 5793*4 - -pd_2048: times 4 dd 2048 -qw_2048: times 8 dw 2048 - iadst4_dconly1a: times 2 dw 10568, 19856, 26752, 30424 -iadst4_dconly1b: times 2 dw 30424, 26752, 19856, 10568 +iadst4_dconly1b: times 2 dw 30424, 26752, 19856, 10568 iadst4_dconly2a: dw 10568, 10568, 10568, 10568, 19856, 19856, 19856, 19856 -iadst4_dconly2b: dw 26752, 26752, 26752, 26752, 30424, 30424, 30424, 30424 - -SECTION .text - -%define m(x) mangle(private_prefix %+ _ %+ x %+ SUFFIX) - -%macro ITX4_END 4-5 2048 ; row[1-4], rnd -%if %5 - mova m2, [qw_%5] - pmulhrsw m0, m2 - pmulhrsw m1, m2 -%endif - lea r2, [dstq+strideq*2] -%assign %%i 1 -%rep 4 - %if %1 & 2 - CAT_XDEFINE %%row_adr, %%i, r2 + strideq*(%1&1) - %else - CAT_XDEFINE %%row_adr, %%i, dstq + strideq*(%1&1) - %endif - %assign %%i %%i + 1 - %rotate 1 -%endrep - - movd m2, [%%row_adr1] ;dst0 - movd m4, [%%row_adr2] ;dst1 - punpckldq m2, m4 ;high: dst1 :low: dst0 - movd m3, [%%row_adr3] ;dst2 - movd m4, [%%row_adr4] ;dst3 - punpckldq m3, m4 ;high: dst3 :low: dst2 - - pxor m4, m4 - punpcklbw m2, m4 ;extend byte to word - punpcklbw m3, m4 ;extend byte to word - - paddw m0, m2 ;high: dst1 + out1 ;low: dst0 + out0 - paddw m1, m3 ;high: dst3 + out3 ;low: dst2 + out2 - - packuswb m0, m1 ;high->low: dst3 + out3, dst2 + out2, dst1 + out1, dst0 + out0 - - movd [%%row_adr1], m0 ;store dst0 + out0 - pshuflw m1, m0, q1032 - movd [%%row_adr2], m1 ;store dst1 + out1 - punpckhqdq m0, m0 - movd [%%row_adr3], m0 ;store dst2 + out2 - psrlq m0, 32 - movd [%%row_adr4], m0 ;store dst3 + out3 - - ret -%endmacro - - +iadst4_dconly2b: dw 26752, 26752, 26752, 26752, 30424, 30424, 30424, 30424 + +SECTION .text + +%macro REPX 2-* + %xdefine %%f(x) %1 +%rep %0 - 1 + %rotate 1 + %%f(%1) +%endrep +%endmacro + +%define m(x) mangle(private_prefix %+ _ %+ x %+ SUFFIX) + +%if ARCH_X86_64 +%define o(x) x +%else +%define o(x) r5-$$+x ; PIC +%endif + +%macro WRITE_4X4 9 ;src[1-2], tmp[1-3], row[1-4] + lea r2, [dstq+strideq*2] +%assign %%i 1 +%rotate 5 +%rep 4 + %if %1 & 2 + CAT_XDEFINE %%row_adr, %%i, r2 + strideq*(%1&1) + %else + CAT_XDEFINE %%row_adr, %%i, dstq + strideq*(%1&1) + %endif + %assign %%i %%i + 1 + %rotate 1 +%endrep + + movd m%3, [%%row_adr1] ;dst0 + movd m%5, [%%row_adr2] ;dst1 + punpckldq m%3, m%5 ;high: dst1 :low: dst0 + movd m%4, [%%row_adr3] ;dst2 + movd m%5, [%%row_adr4] ;dst3 + punpckldq m%4, m%5 ;high: dst3 :low: dst2 + + pxor m%5, m%5 + punpcklbw m%3, m%5 ;extend byte to word + punpcklbw m%4, m%5 ;extend byte to word + + paddw m%1, m%3 ;high: dst1 + out1 ;low: dst0 + out0 + paddw m%2, m%4 ;high: dst3 + out3 ;low: dst2 + out2 + + packuswb m%1, m%2 ;high->low: dst3 + out3, dst2 + out2, dst1 + out1, dst0 + out0 + + movd [%%row_adr1], m%1 ;store dst0 + out0 + pshuflw m%2, m%1, q1032 + movd [%%row_adr2], m%2 ;store dst1 + out1 + punpckhqdq m%1, m%1 + movd [%%row_adr3], m%1 ;store dst2 + out2 + psrlq m%1, 32 + movd [%%row_adr4], m%1 ;store dst3 + out3 +%endmacro + +%macro ITX4_END 4-5 2048 ; row[1-4], rnd +%if %5 + mova m2, [o(pw_%5)] + pmulhrsw m0, m2 + pmulhrsw m1, m2 +%endif + + WRITE_4X4 0, 1, 2, 3, 4, %1, %2, %3, %4 + ret +%endmacro + + ; flags: 1 = swap, 2: coef_regs %macro ITX_MUL2X_PACK 5-6 0 ; dst/src, tmp[1], rnd, coef[1-2], flags %if %6 & 2 pmaddwd m%2, m%4, m%1 pmaddwd m%1, m%5 %elif %6 & 1 - pmaddwd m%2, m%1, [qw_%5_%4] - pmaddwd m%1, [qw_%4_m%5] + pmaddwd m%2, m%1, [o(pw_%5_%4)] + pmaddwd m%1, [o(pw_%4_m%5)] %else - pmaddwd m%2, m%1, [qw_%4_m%5] - pmaddwd m%1, [qw_%5_%4] + pmaddwd m%2, m%1, [o(pw_%4_m%5)] + pmaddwd m%1, [o(pw_%5_%4)] %endif paddd m%2, m%3 paddd m%1, m%3 psrad m%2, 12 psrad m%1, 12 packssdw m%1, m%2 -%endmacro - -%macro IDCT4_1D_PACKED 0-1 ;qw_2896x8 - punpckhwd m2, m0, m1 ;unpacked in1 in3 - psubw m3, m0, m1 - paddw m0, m1 - punpcklqdq m0, m3 ;high: in0-in2 ;low: in0+in2 - - mova m3, [pd_2048] - ITX_MUL2X_PACK 2, 1, 3, 1567, 3784 - -%if %0 == 1 - pmulhrsw m0, m%1 -%else - pmulhrsw m0, [qw_2896x8] ;high: t1 ;low: t0 -%endif - - psubw m1, m0, m2 ;high: out2 ;low: out3 - paddw m0, m2 ;high: out1 ;low: out0 -%endmacro - +%endmacro + +%macro IDCT4_1D_PACKED 0-1 ;pw_2896x8 + punpckhwd m2, m0, m1 ;unpacked in1 in3 + psubw m3, m0, m1 + paddw m0, m1 + punpcklqdq m0, m3 ;high: in0-in2 ;low: in0+in2 + + mova m3, [o(pd_2048)] + ITX_MUL2X_PACK 2, 1, 3, 1567, 3784 + +%if %0 == 1 + pmulhrsw m0, m%1 +%else + pmulhrsw m0, [o(pw_2896x8)] ;high: t1 ;low: t0 +%endif + + psubsw m1, m0, m2 ;high: out2 ;low: out3 + paddsw m0, m2 ;high: out1 ;low: out0 +%endmacro + + %macro IADST4_1D_PACKED 0 punpcklwd m2, m0, m1 ;unpacked in0 in2 punpckhwd m3, m0, m1 ;unpacked in1 in3 @@ -146,15 +184,14 @@ SECTION .text punpckhqdq m1, m1 ; paddw m1, m0 ;low: in0 - in2 + in3 - pmaddwd m0, m2, [qw_1321_3803] ;1321 * in0 + 3803 * in2 - pmaddwd m2, [qw_2482_m1321] ;2482 * in0 - 1321 * in2 - pmaddwd m4, m3, [qw_3344_2482] ;3344 * in1 + 2482 * in3 - pmaddwd m5, m3, [qw_3344_m3803] ;3344 * in1 - 3803 * in3 + pmaddwd m0, m2, [o(pw_1321_3803)] ;1321 * in0 + 3803 * in2 + pmaddwd m2, [o(pw_2482_m1321)] ;2482 * in0 - 1321 * in2 + pmaddwd m4, m3, [o(pw_3344_2482)] ;3344 * in1 + 2482 * in3 + pmaddwd m5, m3, [o(pw_3344_m3803)];3344 * in1 - 3803 * in3 paddd m4, m0 ;t0 + t3 - - pmaddwd m3, [qw_m6688_m3803] ;-2 * 3344 * in1 - 3803 * in3 - pmulhrsw m1, [qw_3344x8] ;low: out2 - mova m0, [pd_2048] + pmaddwd m3, [o(pw_m6688_m3803)] ;-2 * 3344 * in1 - 3803 * in3 + pmulhrsw m1, [o(pw_3344x8)] ;low: out2 + mova m0, [o(pd_2048)] paddd m2, m0 paddd m0, m4 ;t0 + t3 + 2048 paddd m5, m2 ;t1 + t3 + 2048 @@ -166,37 +203,54 @@ SECTION .text psrad m2, 12 ;out3 packssdw m0, m5 ;high: out1 ;low: out0 packssdw m2, m2 ;high: out3 ;low: out3 -%endmacro - -%macro INV_TXFM_FN 4 ; type1, type2, fast_thresh, size -cglobal inv_txfm_add_%1_%2_%4, 4, 5, 0, dst, stride, coeff, eob, tx2 - %undef cmp - lea tx2q, [m(i%2_%4_internal).pass2] -%if %3 > 0 - cmp eobd, %3 - jle %%end -%elif %3 == 0 - test eobd, eobd - jz %%end -%endif - call i%1_%4_internal - RET -ALIGN function_align -%%end: -%endmacro - -%macro INV_TXFM_4X4_FN 2-3 -1 ; type1, type2, fast_thresh - INV_TXFM_FN %1, %2, %3, 4x4 +%endmacro + +%macro INV_TXFM_FN 5+ ; type1, type2, fast_thresh, size, xmm/stack +cglobal inv_txfm_add_%1_%2_%4, 4, 6, %5, dst, stride, coeff, eob, tx2 + %undef cmp + %define %%p1 m(i%1_%4_internal) +%if ARCH_X86_32 + LEA r5, $$ +%endif +%if has_epilogue +%if %3 > 0 + cmp eobd, %3 + jle %%end +%elif %3 == 0 + test eobd, eobd + jz %%end +%endif + lea tx2q, [o(m(i%2_%4_internal).pass2)] + call %%p1 + RET +%%end: +%else + lea tx2q, [o(m(i%2_%4_internal).pass2)] +%if %3 > 0 + cmp eobd, %3 + jg %%p1 +%elif %3 == 0 + test eobd, eobd + jnz %%p1 +%else + times ((%%end - %%p1) >> 31) & 1 jmp %%p1 +ALIGN function_align +%%end: +%endif +%endif +%endmacro + +%macro INV_TXFM_4X4_FN 2-3 -1 ; type1, type2, fast_thresh + INV_TXFM_FN %1, %2, %3, 4x4, 6 %ifidn %1_%2, dct_identity - mova m0, [qw_2896x8] + mova m0, [o(pw_2896x8)] pmulhrsw m0, [coeffq] paddw m0, m0 - pmulhrsw m0, [qw_5793x4] + pmulhrsw m0, [o(pw_5793x4)] punpcklwd m0, m0 punpckhdq m1, m0, m0 punpckldq m0, m0 - call m(iadst_4x4_internal).end - RET + TAIL_CALL m(iadst_4x4_internal).end %elifidn %1_%2, identity_dct mova m1, [coeffq+16*0] mova m2, [coeffq+16*1] @@ -205,69 +259,73 @@ ALIGN function_align punpcklwd m0, m1 punpcklqdq m0, m0 paddw m0, m0 - pmulhrsw m0, [qw_5793x4] - pmulhrsw m0, [qw_2896x8] + pmulhrsw m0, [o(pw_5793x4)] + pmulhrsw m0, [o(pw_2896x8)] mova m1, m0 - call m(iadst_4x4_internal).end - RET + TAIL_CALL m(iadst_4x4_internal).end %elif %3 >= 0 pshuflw m0, [coeffq], q0000 punpcklqdq m0, m0 %ifidn %1, dct - mova m1, [qw_2896x8] + mova m1, [o(pw_2896x8)] pmulhrsw m0, m1 %elifidn %1, adst - pmulhrsw m0, [iadst4_dconly1a] + pmulhrsw m0, [o(iadst4_dconly1a)] %elifidn %1, flipadst - pmulhrsw m0, [iadst4_dconly1b] + pmulhrsw m0, [o(iadst4_dconly1b)] %endif mov [coeffq], eobd ;0 %ifidn %2, dct %ifnidn %1, dct - pmulhrsw m0, [qw_2896x8] + pmulhrsw m0, [o(pw_2896x8)] %else pmulhrsw m0, m1 %endif mova m1, m0 - call m(iadst_4x4_internal).end2 - RET + TAIL_CALL m(iadst_4x4_internal).end2 %else ; adst / flipadst - pmulhrsw m1, m0, [iadst4_dconly2b] - pmulhrsw m0, [iadst4_dconly2a] - call m(i%2_4x4_internal).end2 - RET + pmulhrsw m1, m0, [o(iadst4_dconly2b)] + pmulhrsw m0, [o(iadst4_dconly2a)] + TAIL_CALL m(i%2_4x4_internal).end2 %endif %endif -%endmacro - - -INIT_XMM ssse3 - -cglobal idct_4x4_internal, 0, 0, 4, dst, stride, coeff, eob, tx2 - mova m0, [coeffq+16*0] ;high: in1 ;low: in0 - mova m1, [coeffq+16*1] ;high: in3 ;low in2 - - IDCT4_1D_PACKED - - mova m2, [deint_shuf] - shufps m3, m0, m1, q1331 - shufps m0, m1, q0220 - pshufb m0, m2 ;high: in1 ;low: in0 - pshufb m1, m3, m2 ;high: in3 ;low :in2 +%endmacro + +INIT_XMM ssse3 + +INV_TXFM_4X4_FN dct, dct, 0 +INV_TXFM_4X4_FN dct, adst, 0 +INV_TXFM_4X4_FN dct, flipadst, 0 +INV_TXFM_4X4_FN dct, identity, 3 + +cglobal idct_4x4_internal, 0, 0, 0, dst, stride, coeff, eob, tx2 + mova m0, [coeffq+16*0] ;high: in1 ;low: in0 + mova m1, [coeffq+16*1] ;high: in3 ;low in2 + + IDCT4_1D_PACKED + + mova m2, [o(deint_shuf)] + shufps m3, m0, m1, q1331 + shufps m0, m1, q0220 + pshufb m0, m2 ;high: in1 ;low: in0 + pshufb m1, m3, m2 ;high: in3 ;low :in2 jmp tx2q -.pass2: - IDCT4_1D_PACKED - - pxor m2, m2 - mova [coeffq+16*0], m2 - mova [coeffq+16*1], m2 ;memset(coeff, 0, sizeof(*coeff) * sh * sw); - - ITX4_END 0, 1, 3, 2 - -INV_TXFM_4X4_FN dct, dct, 0 - -cglobal iadst_4x4_internal, 0, 0, 6, dst, stride, coeff, eob, tx2 +.pass2: + IDCT4_1D_PACKED + + pxor m2, m2 + mova [coeffq+16*0], m2 + mova [coeffq+16*1], m2 ;memset(coeff, 0, sizeof(*coeff) * sh * sw); + + ITX4_END 0, 1, 3, 2 + +INV_TXFM_4X4_FN adst, dct, 0 +INV_TXFM_4X4_FN adst, adst, 0 +INV_TXFM_4X4_FN adst, flipadst, 0 +INV_TXFM_4X4_FN adst, identity + +cglobal iadst_4x4_internal, 0, 0, 0, dst, stride, coeff, eob, tx2 mova m0, [coeffq+16*0] mova m1, [coeffq+16*1] call .main @@ -294,11 +352,12 @@ ALIGN function_align IADST4_1D_PACKED ret -INV_TXFM_4X4_FN adst, adst, 0 -INV_TXFM_4X4_FN dct, adst, 0 -INV_TXFM_4X4_FN adst, dct, 0 +INV_TXFM_4X4_FN flipadst, dct, 0 +INV_TXFM_4X4_FN flipadst, adst, 0 +INV_TXFM_4X4_FN flipadst, flipadst, 0 +INV_TXFM_4X4_FN flipadst, identity -cglobal iflipadst_4x4_internal, 0, 0, 6, dst, stride, coeff, eob, tx2 +cglobal iflipadst_4x4_internal, 0, 0, 0, dst, stride, coeff, eob, tx2 mova m0, [coeffq+16*0] mova m1, [coeffq+16*1] call m(iadst_4x4_internal).main @@ -321,16 +380,15 @@ cglobal iflipadst_4x4_internal, 0, 0, 6, dst, stride, coeff, eob, tx2 .end2: ITX4_END 3, 2, 1, 0 -INV_TXFM_4X4_FN flipadst, flipadst, 0 -INV_TXFM_4X4_FN flipadst, dct, 0 -INV_TXFM_4X4_FN flipadst, adst, 0 -INV_TXFM_4X4_FN dct, flipadst, 0 -INV_TXFM_4X4_FN adst, flipadst, 0 +INV_TXFM_4X4_FN identity, dct, 3 +INV_TXFM_4X4_FN identity, adst +INV_TXFM_4X4_FN identity, flipadst +INV_TXFM_4X4_FN identity, identity -cglobal iidentity_4x4_internal, 0, 0, 6, dst, stride, coeff, eob, tx2 +cglobal iidentity_4x4_internal, 0, 0, 0, dst, stride, coeff, eob, tx2 mova m0, [coeffq+16*0] mova m1, [coeffq+16*1] - mova m2, [qw_5793x4] + mova m2, [o(pw_5793x4)] paddw m0, m0 paddw m1, m1 pmulhrsw m0, m2 @@ -343,21 +401,13 @@ cglobal iidentity_4x4_internal, 0, 0, 6, dst, stride, coeff, eob, tx2 jmp tx2q .pass2: - mova m2, [qw_5793x4] + mova m2, [o(pw_5793x4)] paddw m0, m0 paddw m1, m1 pmulhrsw m0, m2 pmulhrsw m1, m2 jmp m(iadst_4x4_internal).end -INV_TXFM_4X4_FN identity, identity -INV_TXFM_4X4_FN identity, dct, 3 -INV_TXFM_4X4_FN identity, adst -INV_TXFM_4X4_FN identity, flipadst -INV_TXFM_4X4_FN dct, identity, 3 -INV_TXFM_4X4_FN adst, identity -INV_TXFM_4X4_FN flipadst, identity - %macro IWHT4_1D_PACKED 0 punpckhqdq m3, m0, m1 ;low: in1 high: in3 punpcklqdq m0, m1 ;low: in0 high: in2 @@ -391,4 +441,1038 @@ cglobal inv_txfm_add_wht_wht_4x4, 3, 3, 4, dst, stride, coeff IWHT4_1D_PACKED shufpd m0, m2, 0x01 - ITX4_END 0, 3, 2, 1, 0 + ITX4_END 0, 3, 2, 1, 0 + + +%macro IDCT8_1D_PACKED 0 + mova m6, [o(pd_2048)] + punpckhwd m5, m0, m3 ;unpacked in1 in7 + punpckhwd m4, m2, m1 ;unpacked in5 in3 + punpcklwd m1, m3 ;unpacked in2 in6 + psubw m3, m0, m2 + paddw m0, m2 + punpcklqdq m0, m3 ;low: in0+in4 high: in0-in4 + ITX_MUL2X_PACK 5, 2, 6, 799, 4017, 1 ;low: t4a high: t7a + ITX_MUL2X_PACK 4, 2, 6, 3406, 2276, 1 ;low: t5a high: t6a + ITX_MUL2X_PACK 1, 2, 6, 1567, 3784 ;low: t3 high: t2 + mova m6, [o(pw_2896x8)] + psubsw m2, m5, m4 ;low: t5a high: t6a + paddsw m5, m4 ;low: t4 high: t7 + punpckhqdq m4, m2, m2 ;low: t6a high: t6a + psubw m3, m4, m2 ;low: t6a - t5a + paddw m4, m2 ;low: t6a + t5a + punpcklqdq m4, m3 ;low: t6a + t5a high: t6a - t5a + pmulhrsw m0, m6 ;low: t0 high: t1 + pmulhrsw m4, m6 ;low: t6 high: t5 + shufps m2, m5, m4, q1032 ;low: t7 high: t6 + shufps m5, m4, q3210 ;low: t4 high: t5 + psubsw m4, m0, m1 ;low: tmp3 high: tmp2 + paddsw m0, m1 ;low: tmp0 high: tmp1 + psubsw m3, m0, m2 ;low: out7 high: out6 + paddsw m0, m2 ;low: out0 high: out1 + psubsw m2, m4, m5 ;low: out4 high: out5 + paddsw m1, m4, m5 ;low: out3 high: out2 +%endmacro + +;dst1 = (src1 * coef1 - src2 * coef2 + rnd) >> 12 +;dst2 = (src1 * coef2 + src2 * coef1 + rnd) >> 12 +%macro ITX_MULSUB_2W 7 ; dst/src[1-2], tmp[1-2], rnd, coef[1-2] + punpckhwd m%3, m%1, m%2 + punpcklwd m%1, m%2 +%if %7 < 8 + pmaddwd m%2, m%7, m%1 + pmaddwd m%4, m%7, m%3 +%else + mova m%2, [o(pw_%7_%6)] + pmaddwd m%4, m%3, m%2 + pmaddwd m%2, m%1 +%endif + paddd m%4, m%5 + paddd m%2, m%5 + psrad m%4, 12 + psrad m%2, 12 + packssdw m%2, m%4 ;dst2 +%if %7 < 8 + pmaddwd m%3, m%6 + pmaddwd m%1, m%6 +%else + mova m%4, [o(pw_%6_m%7)] + pmaddwd m%3, m%4 + pmaddwd m%1, m%4 +%endif + paddd m%3, m%5 + paddd m%1, m%5 + psrad m%3, 12 + psrad m%1, 12 + packssdw m%1, m%3 ;dst1 +%endmacro + +%macro IDCT4_1D 7 ; src[1-4], tmp[1-2], pd_2048 + ITX_MULSUB_2W %2, %4, %5, %6, %7, 1567, 3784 ;t2, t3 + mova m%6, [o(pw_2896x8)] + paddw m%5, m%1, m%3 + psubw m%1, m%3 + pmulhrsw m%1, m%6 ;t1 + pmulhrsw m%5, m%6 ;t0 + psubsw m%3, m%1, m%2 ;out2 + paddsw m%2, m%1 ;out1 + paddsw m%1, m%5, m%4 ;out0 + psubsw m%5, m%4 ;out3 + mova m%4, m%5 +%endmacro + +%macro IADST4_1D 0 + mova m4, m2 + psubw m2, m0, m4 + paddw m2, m3 ;low: in0 - in2 + in3 + + punpckhwd m6, m0, m4 ;unpacked in0 in2 + punpckhwd m7, m1, m3 ;unpacked in1 in3 + punpcklwd m0, m4 ;unpacked in0 in2 + punpcklwd m1, m3 ;unpacked in1 in3 + + pmaddwd m4, m0, [o(pw_1321_3803)] ;1321 * in0 + 3803 * in2 + pmaddwd m0, [o(pw_2482_m1321)] ;2482 * in0 - 1321 * in2 + pmaddwd m3, m1, [o(pw_3344_2482)] ;3344 * in1 + 2482 * in3 + pmaddwd m5, m1, [o(pw_3344_m3803)] ;3344 * in1 - 3803 * in3 + paddd m3, m4 ;t0 + t3 + + pmaddwd m1, [o(pw_m6688_m3803)] ;-2 * 3344 * in1 - 3803 * in3 + pmulhrsw m2, [o(pw_3344x8)] ;out2 + mova m4, [o(pd_2048)] + paddd m0, m4 + paddd m4, m3 ;t0 + t3 + 2048 + paddd m5, m0 ;t1 + t3 + 2048 + paddd m3, m0 + paddd m3, m1 ;t0 + t1 - t3 + 2048 + + psrad m4, 12 ;out0 + psrad m5, 12 ;out1 + psrad m3, 12 ;out3 + packssdw m0, m4, m5 ;low: out0 high: out1 + + pmaddwd m4, m6, [o(pw_1321_3803)] ;1321 * in0 + 3803 * in2 + pmaddwd m6, [o(pw_2482_m1321)] ;2482 * in0 - 1321 * in2 + pmaddwd m1, m7, [o(pw_3344_2482)] ;3344 * in1 + 2482 * in3 + pmaddwd m5, m7, [o(pw_3344_m3803)] ;3344 * in1 - 3803 * in3 + paddd m1, m4 ;t0 + t3 + pmaddwd m7, [o(pw_m6688_m3803)] ;-2 * 3344 * in1 - 3803 * in3 + + mova m4, [o(pd_2048)] + paddd m6, m4 + paddd m4, m1 ;t0 + t3 + 2048 + paddd m5, m6 ;t1 + t3 + 2048 + paddd m1, m6 + paddd m1, m7 ;t0 + t1 - t3 + 2048 + + psrad m4, 12 ;out0 + psrad m5, 12 ;out1 + psrad m1, 12 ;out3 + packssdw m3, m1 ;out3 + packssdw m4, m5 ;low: out0 high: out1 + + punpckhqdq m1, m0, m4 ;out1 + punpcklqdq m0, m4 ;out0 +%endmacro + +%macro IADST8_1D_PACKED 0 + mova m6, [o(pd_2048)] + punpckhwd m4, m3, m0 ;unpacked in7 in0 + punpckhwd m5, m2, m1 ;unpacked in5 in2 + punpcklwd m1, m2 ;unpacked in3 in4 + punpcklwd m0, m3 ;unpacked in1 in6 + ITX_MUL2X_PACK 4, 2, 6, 401, 4076 ;low: t0a high: t1a + ITX_MUL2X_PACK 5, 2, 6, 1931, 3612 ;low: t2a high: t3a + ITX_MUL2X_PACK 1, 2, 6, 3166, 2598 ;low: t4a high: t5a + ITX_MUL2X_PACK 0, 2, 6, 3920, 1189 ;low: t6a high: t7a + + psubsw m3, m4, m1 ;low: t4 high: t5 + paddsw m4, m1 ;low: t0 high: t1 + psubsw m2, m5, m0 ;low: t6 high: t7 + paddsw m5, m0 ;low: t2 high: t3 + + shufps m1, m3, m2, q1032 + punpckhwd m2, m1 + punpcklwd m3, m1 + ITX_MUL2X_PACK 3, 0, 6, 1567, 3784, 1 ;low: t5a high: t4a + ITX_MUL2X_PACK 2, 0, 6, 3784, 1567 ;low: t7a high: t6a + + psubsw m1, m4, m5 ;low: t2 high: t3 + paddsw m4, m5 ;low: out0 high: -out7 + psubsw m5, m3, m2 ;low: t7 high: t6 + paddsw m3, m2 ;low: out6 high: -out1 + shufps m0, m4, m3, q3210 ;low: out0 high: -out1 + shufps m3, m4, q3210 ;low: out6 high: -out7 + + shufps m4, m1, m5, q1032 ;low: t3 high: t7 + shufps m1, m5, q3210 ;low: t2 high: t6 + mova m5, [o(pw_2896x8)] + psubw m2, m1, m4 ;low: t2-t3 high: t6-t7 + paddw m1, m4 ;low: t2+t3 high: t6+t7 + pmulhrsw m2, m5 ;low: out4 high: -out5 + shufps m1, m1, q1032 + pmulhrsw m1, m5 ;low: out2 high: -out3 +%endmacro + +%macro WRITE_4X8 4 ;row[1-4] + WRITE_4X4 0, 1, 4, 5, 6, %1, %2, %3, %4 + lea dstq, [dstq+strideq*4] + WRITE_4X4 2, 3, 4, 5, 6, %1, %2, %3, %4 +%endmacro + +%macro INV_4X8 0 + punpckhwd m4, m2, m3 + punpcklwd m2, m3 + punpckhwd m3, m0, m1 + punpcklwd m0, m1 + punpckhdq m1, m0, m2 ;low: in2 high: in3 + punpckldq m0, m2 ;low: in0 high: in1 + punpckldq m2, m3, m4 ;low: in4 high: in5 + punpckhdq m3, m4 ;low: in6 high: in7 +%endmacro + +%macro INV_TXFM_4X8_FN 2-3 -1 ; type1, type2, fast_thresh + INV_TXFM_FN %1, %2, %3, 4x8, 8 +%if %3 >= 0 +%ifidn %1_%2, dct_identity + mova m1, [o(pw_2896x8)] + pmulhrsw m0, m1, [coeffq] + pmulhrsw m0, m1 + pmulhrsw m0, [o(pw_4096)] + punpckhwd m2, m0, m0 + punpcklwd m0, m0 + punpckhdq m1, m0, m0 + punpckldq m0, m0 + punpckhdq m3, m2, m2 + punpckldq m2, m2 + TAIL_CALL m(iadst_4x8_internal).end3 +%elifidn %1_%2, identity_dct + movd m0, [coeffq+16*0] + punpcklwd m0, [coeffq+16*1] + movd m1, [coeffq+16*2] + punpcklwd m1, [coeffq+16*3] + mova m2, [o(pw_2896x8)] + punpckldq m0, m1 + pmulhrsw m0, m2 + paddw m0, m0 + pmulhrsw m0, [o(pw_5793x4)] + pmulhrsw m0, m2 + pmulhrsw m0, [o(pw_2048)] + punpcklqdq m0, m0 + mova m1, m0 + mova m2, m0 + mova m3, m0 + TAIL_CALL m(iadst_4x8_internal).end3 +%elifidn %1_%2, dct_dct + pshuflw m0, [coeffq], q0000 + punpcklqdq m0, m0 + mova m1, [o(pw_2896x8)] + pmulhrsw m0, m1 + mov [coeffq], eobd + pmulhrsw m0, m1 + pmulhrsw m0, m1 + pmulhrsw m0, [o(pw_2048)] + mova m1, m0 + mova m2, m0 + mova m3, m0 + TAIL_CALL m(iadst_4x8_internal).end4 +%else ; adst_dct / flipadst_dct + pshuflw m0, [coeffq], q0000 + punpcklqdq m0, m0 + mova m1, [o(pw_2896x8)] + pmulhrsw m0, m1 +%ifidn %1, adst + pmulhrsw m0, [o(iadst4_dconly1a)] +%else ; flipadst + pmulhrsw m0, [o(iadst4_dconly1b)] +%endif + mov [coeffq], eobd + pmulhrsw m0, m1 + pmulhrsw m0, [o(pw_2048)] + mova m1, m0 + mova m2, m0 + mova m3, m0 + TAIL_CALL m(iadst_4x8_internal).end4 +%endif +%endif +%endmacro + +INV_TXFM_4X8_FN dct, dct, 0 +INV_TXFM_4X8_FN dct, identity, 7 +INV_TXFM_4X8_FN dct, adst +INV_TXFM_4X8_FN dct, flipadst + +cglobal idct_4x8_internal, 0, 0, 0, dst, stride, coeff, eob, tx2 + mova m3, [o(pw_2896x8)] + pmulhrsw m0, m3, [coeffq+16*0] + pmulhrsw m1, m3, [coeffq+16*1] + pmulhrsw m2, m3, [coeffq+16*2] + pmulhrsw m3, [coeffq+16*3] + + call m(idct_8x4_internal).main + call m(iadst_4x8_internal).inversion + jmp tx2q + +.pass2: + call .main + shufps m1, m1, q1032 + shufps m3, m3, q1032 + mova m4, [o(pw_2048)] + jmp m(iadst_4x8_internal).end2 + +ALIGN function_align +.main: + IDCT8_1D_PACKED + ret + + +INV_TXFM_4X8_FN adst, dct, 0 +INV_TXFM_4X8_FN adst, adst +INV_TXFM_4X8_FN adst, flipadst +INV_TXFM_4X8_FN adst, identity + +cglobal iadst_4x8_internal, 0, 0, 0, dst, stride, coeff, eob, tx2 + mova m3, [o(pw_2896x8)] + pmulhrsw m0, m3, [coeffq+16*0] + pmulhrsw m1, m3, [coeffq+16*1] + pmulhrsw m2, m3, [coeffq+16*2] + pmulhrsw m3, [coeffq+16*3] + + call m(iadst_8x4_internal).main + call .inversion + jmp tx2q + +.pass2: + shufps m0, m0, q1032 + shufps m1, m1, q1032 + call .main + mova m4, [o(pw_2048)] + pxor m5, m5 + psubw m5, m4 + +.end: + punpcklqdq m4, m5 + +.end2: + pmulhrsw m0, m4 + pmulhrsw m1, m4 + pmulhrsw m2, m4 + pmulhrsw m3, m4 + +.end3: + pxor m5, m5 + mova [coeffq+16*0], m5 + mova [coeffq+16*1], m5 + mova [coeffq+16*2], m5 + mova [coeffq+16*3], m5 + +.end4: + WRITE_4X8 0, 1, 2, 3 + RET + +ALIGN function_align +.main: + IADST8_1D_PACKED + ret + +ALIGN function_align +.inversion: + INV_4X8 + ret + +INV_TXFM_4X8_FN flipadst, dct, 0 +INV_TXFM_4X8_FN flipadst, adst +INV_TXFM_4X8_FN flipadst, flipadst +INV_TXFM_4X8_FN flipadst, identity + +cglobal iflipadst_4x8_internal, 0, 0, 0, dst, stride, coeff, eob, tx2 + mova m3, [o(pw_2896x8)] + pmulhrsw m0, m3, [coeffq+16*0] + pmulhrsw m1, m3, [coeffq+16*1] + pmulhrsw m2, m3, [coeffq+16*2] + pmulhrsw m3, [coeffq+16*3] + + call m(iadst_8x4_internal).main + + punpcklwd m4, m3, m2 + punpckhwd m3, m2 + punpcklwd m5, m1, m0 + punpckhwd m1, m0 + punpckldq m2, m3, m1 ;low: in4 high: in5 + punpckhdq m3, m1 ;low: in6 high: in7 + punpckldq m0, m4, m5 ;low: in0 high: in1 + punpckhdq m1, m4, m5 ;low: in2 high: in3 + jmp tx2q + +.pass2: + shufps m0, m0, q1032 + shufps m1, m1, q1032 + call m(iadst_4x8_internal).main + + mova m4, m0 + mova m5, m1 + pshufd m0, m3, q1032 + pshufd m1, m2, q1032 + pshufd m2, m5, q1032 + pshufd m3, m4, q1032 + mova m5, [o(pw_2048)] + pxor m4, m4 + psubw m4, m5 + jmp m(iadst_4x8_internal).end + +INV_TXFM_4X8_FN identity, dct, 3 +INV_TXFM_4X8_FN identity, adst +INV_TXFM_4X8_FN identity, flipadst +INV_TXFM_4X8_FN identity, identity + +cglobal iidentity_4x8_internal, 0, 0, 0, dst, stride, coeff, eob, tx2 + mova m3, [o(pw_2896x8)] + pmulhrsw m0, m3, [coeffq+16*0] + pmulhrsw m1, m3, [coeffq+16*1] + pmulhrsw m2, m3, [coeffq+16*2] + pmulhrsw m3, [coeffq+16*3] + + mova m5, [o(pw_5793x4)] + paddw m0, m0 + paddw m1, m1 + paddw m2, m2 + paddw m3, m3 + pmulhrsw m0, m5 + pmulhrsw m1, m5 + pmulhrsw m2, m5 + pmulhrsw m3, m5 + + call m(iadst_4x8_internal).inversion + jmp tx2q + +.pass2: + mova m4, [o(pw_4096)] + jmp m(iadst_4x8_internal).end2 + + +%macro WRITE_8X2 5 ;coefs[1-2], tmp[1-3] + movq m%3, [dstq ] + movq m%4, [dstq+strideq] + pxor m%5, m%5 + punpcklbw m%3, m%5 ;extend byte to word + punpcklbw m%4, m%5 ;extend byte to word +%ifnum %1 + paddw m%3, m%1 +%else + paddw m%3, %1 +%endif +%ifnum %2 + paddw m%4, m%2 +%else + paddw m%4, %2 +%endif + packuswb m%3, m%4 + movq [dstq ], m%3 + punpckhqdq m%3, m%3 + movq [dstq+strideq], m%3 +%endmacro + +%macro WRITE_8X4 7 ;coefs[1-4], tmp[1-3] + WRITE_8X2 %1, %2, %5, %6, %7 + lea dstq, [dstq+strideq*2] + WRITE_8X2 %3, %4, %5, %6, %7 +%endmacro + +%macro INV_TXFM_8X4_FN 2-3 -1 ; type1, type2, fast_thresh + INV_TXFM_FN %1, %2, %3, 8x4, 8 +%if %3 >= 0 +%ifidn %1_%2, dct_identity + mova m0, [o(pw_2896x8)] + pmulhrsw m1, m0, [coeffq] + pmulhrsw m1, m0 + paddw m1, m1 + pmulhrsw m1, [o(pw_5793x4)] + pmulhrsw m1, [o(pw_2048)] + punpcklwd m1, m1 + punpckhdq m2, m1, m1 + punpckldq m1, m1 + punpckhdq m3, m2, m2 + punpckldq m2, m2 + punpckldq m0, m1, m1 + punpckhdq m1, m1 +%elifidn %1_%2, identity_dct + mova m0, [coeffq+16*0] + mova m1, [coeffq+16*1] + mova m2, [coeffq+16*2] + mova m3, [coeffq+16*3] + punpckhwd m4, m0, m1 + punpcklwd m0, m1 + punpckhwd m5, m2, m3 + punpcklwd m2, m3 + punpcklwd m0, m4 + punpcklwd m2, m5 + punpcklqdq m0, m2 + mova m4, [o(pw_2896x8)] + pmulhrsw m0, m4 + paddw m0, m0 + pmulhrsw m0, m4 + pmulhrsw m0, [o(pw_2048)] + mova m1, m0 + mova m2, m0 + mova m3, m0 +%else + pshuflw m0, [coeffq], q0000 + punpcklqdq m0, m0 + mova m1, [o(pw_2896x8)] + pmulhrsw m0, m1 + pmulhrsw m0, m1 +%ifidn %2, dct + mova m2, [o(pw_2048)] + pmulhrsw m0, m1 + pmulhrsw m0, m2 + mova m1, m0 + mova m2, m0 + mova m3, m0 +%else ; adst / flipadst + pmulhrsw m2, m0, [o(iadst4_dconly2b)] + pmulhrsw m0, [o(iadst4_dconly2a)] + mova m1, [o(pw_2048)] + pmulhrsw m0, m1 + pmulhrsw m2, m1 +%ifidn %2, adst + punpckhqdq m1, m0, m0 + punpcklqdq m0, m0 + punpckhqdq m3, m2, m2 + punpcklqdq m2, m2 +%else ; flipadst + mova m3, m0 + punpckhqdq m0, m2, m2 + punpcklqdq m1, m2, m2 + punpckhqdq m2, m3, m3 + punpcklqdq m3, m3 +%endif +%endif +%endif + TAIL_CALL m(iadst_8x4_internal).end2 +%endif +%endmacro + +INV_TXFM_8X4_FN dct, dct, 0 +INV_TXFM_8X4_FN dct, adst, 0 +INV_TXFM_8X4_FN dct, flipadst, 0 +INV_TXFM_8X4_FN dct, identity, 3 + +cglobal idct_8x4_internal, 0, 0, 0, dst, stride, coeff, eob, tx2 + mova m3, [o(pw_2896x8)] + pmulhrsw m0, m3, [coeffq+16*0] + pmulhrsw m1, m3, [coeffq+16*1] + pmulhrsw m2, m3, [coeffq+16*2] + pmulhrsw m3, [coeffq+16*3] + + call m(idct_4x8_internal).main + + mova m4, [o(deint_shuf1)] + mova m5, [o(deint_shuf2)] + pshufb m0, m4 + pshufb m1, m5 + pshufb m2, m4 + pshufb m3, m5 + punpckhdq m4, m0, m1 + punpckldq m0, m1 + punpckhdq m5, m2, m3 + punpckldq m2, m3 + punpckhqdq m1, m0, m2 ;in1 + punpcklqdq m0, m2 ;in0 + punpckhqdq m3, m4, m5 ;in3 + punpcklqdq m2 ,m4, m5 ;in2 + jmp tx2q + +.pass2: + call .main + jmp m(iadst_8x4_internal).end + +ALIGN function_align +.main: + mova m6, [o(pd_2048)] + IDCT4_1D 0, 1, 2, 3, 4, 5, 6 + ret + +INV_TXFM_8X4_FN adst, dct +INV_TXFM_8X4_FN adst, adst +INV_TXFM_8X4_FN adst, flipadst +INV_TXFM_8X4_FN adst, identity + +cglobal iadst_8x4_internal, 0, 0, 0, dst, stride, coeff, eob, tx2 + mova m3, [o(pw_2896x8)] + pmulhrsw m0, m3, [coeffq+16*0] + pmulhrsw m1, m3, [coeffq+16*1] + pmulhrsw m2, m3, [coeffq+16*2] + pmulhrsw m3, [coeffq+16*3] + + shufps m0, m0, q1032 + shufps m1, m1, q1032 + call m(iadst_4x8_internal).main + + punpckhwd m4, m0, m1 + punpcklwd m0, m1 + punpckhwd m1, m2, m3 + punpcklwd m2, m3 + pxor m5, m5 + psubw m3, m5, m1 + psubw m5, m4 + punpckhdq m4, m5, m3 + punpckldq m5, m3 + punpckhdq m3, m0, m2 + punpckldq m0, m2 + punpckhwd m1, m0, m5 ;in1 + punpcklwd m0, m5 ;in0 + punpcklwd m2, m3, m4 ;in2 + punpckhwd m3, m4 ;in3 + jmp tx2q + +.pass2: + call .main + +.end: + mova m4, [o(pw_2048)] + pmulhrsw m0, m4 + pmulhrsw m1, m4 + pmulhrsw m2, m4 + pmulhrsw m3, m4 + +.end2: + pxor m6, m6 + mova [coeffq+16*0], m6 + mova [coeffq+16*1], m6 + mova [coeffq+16*2], m6 + mova [coeffq+16*3], m6 +.end3: + WRITE_8X4 0, 1, 2, 3, 4, 5, 6 + RET + +ALIGN function_align +.main: + IADST4_1D + ret + +INV_TXFM_8X4_FN flipadst, dct +INV_TXFM_8X4_FN flipadst, adst +INV_TXFM_8X4_FN flipadst, flipadst +INV_TXFM_8X4_FN flipadst, identity + +cglobal iflipadst_8x4_internal, 0, 0, 0, dst, stride, coeff, eob, tx2 + mova m3, [o(pw_2896x8)] + pmulhrsw m0, m3, [coeffq+16*0] + pmulhrsw m1, m3, [coeffq+16*1] + pmulhrsw m2, m3, [coeffq+16*2] + pmulhrsw m3, [coeffq+16*3] + + shufps m0, m0, q1032 + shufps m1, m1, q1032 + call m(iadst_4x8_internal).main + + punpckhwd m5, m3, m2 + punpcklwd m3, m2 + punpckhwd m2, m1, m0 + punpcklwd m1, m0 + + pxor m0, m0 + psubw m4, m0, m2 + psubw m0, m5 + punpckhdq m2, m0, m4 + punpckldq m0, m4 + punpckhdq m4, m3, m1 + punpckldq m3, m1 + punpckhwd m1, m0, m3 ;in1 + punpcklwd m0, m3 ;in0 + punpckhwd m3, m2, m4 ;in3 + punpcklwd m2, m4 ;in2 + jmp tx2q + +.pass2: + call m(iadst_8x4_internal).main + mova m4, m0 + mova m5, m1 + mova m0, m3 + mova m1, m2 + mova m2, m5 + mova m3, m4 + jmp m(iadst_8x4_internal).end + +INV_TXFM_8X4_FN identity, dct, 7 +INV_TXFM_8X4_FN identity, adst +INV_TXFM_8X4_FN identity, flipadst +INV_TXFM_8X4_FN identity, identity + +cglobal iidentity_8x4_internal, 0, 0, 0, dst, stride, coeff, eob, tx2 + mova m3, [o(pw_2896x8)] + pmulhrsw m0, m3, [coeffq+16*0] + pmulhrsw m1, m3, [coeffq+16*1] + pmulhrsw m2, m3, [coeffq+16*2] + pmulhrsw m3, [coeffq+16*3] + paddw m0, m0 + paddw m1, m1 + paddw m2, m2 + paddw m3, m3 + + punpckhwd m4, m0, m1 + punpcklwd m0, m1 + punpckhwd m1, m2, m3 + punpcklwd m2, m3 + punpckhdq m5, m4, m1 + punpckldq m4, m1 + punpckhdq m3, m0, m2 + punpckldq m0, m2 + punpckhwd m1, m0, m4 ;in1 + punpcklwd m0, m4 ;in0 + punpcklwd m2, m3, m5 ;in2 + punpckhwd m3, m5 ;in3 + jmp tx2q + +.pass2: + mova m4, [o(pw_5793x4)] + paddw m0, m0 + paddw m1, m1 + paddw m2, m2 + paddw m3, m3 + pmulhrsw m0, m4 + pmulhrsw m1, m4 + pmulhrsw m2, m4 + pmulhrsw m3, m4 + jmp m(iadst_8x4_internal).end + +%macro INV_TXFM_8X8_FN 2-3 -1 ; type1, type2, fast_thresh + INV_TXFM_FN %1, %2, %3, 8x8, 8 +%ifidn %1_%2, dct_identity + mova m0, [o(pw_2896x8)] + pmulhrsw m0, [coeffq] + mova m1, [o(pw_16384)] + pmulhrsw m0, m1 + psrlw m1, 2 + pmulhrsw m0, m1 + punpckhwd m7, m0, m0 + punpcklwd m0, m0 + pshufd m3, m0, q3333 + pshufd m2, m0, q2222 + pshufd m1, m0, q1111 + pshufd m0, m0, q0000 + call m(iadst_8x4_internal).end2 + pshufd m3, m7, q3333 + pshufd m2, m7, q2222 + pshufd m1, m7, q1111 + pshufd m0, m7, q0000 + lea dstq, [dstq+strideq*2] + TAIL_CALL m(iadst_8x4_internal).end3 +%elif %3 >= 0 +%ifidn %1, dct + pshuflw m0, [coeffq], q0000 + punpcklwd m0, m0 + mova m1, [o(pw_2896x8)] + pmulhrsw m0, m1 + mova m2, [o(pw_16384)] + mov [coeffq], eobd + pmulhrsw m0, m2 + psrlw m2, 3 + pmulhrsw m0, m1 + pmulhrsw m0, m2 +.end: + mov r2d, 2 +.end2: + lea r3, [strideq*3] +.loop: + WRITE_8X4 0, 0, 0, 0, 1, 2, 3 + lea dstq, [dstq+strideq*2] + dec r2d + jg .loop + RET +%else ; identity + mova m0, [coeffq+16*0] + mova m1, [coeffq+16*1] + mova m2, [coeffq+16*2] + mova m3, [coeffq+16*3] + punpcklwd m0, [coeffq+16*4] + punpcklwd m1, [coeffq+16*5] + punpcklwd m2, [coeffq+16*6] + punpcklwd m3, [coeffq+16*7] + punpcklwd m0, m2 + punpcklwd m1, m3 + punpcklwd m0, m1 + pmulhrsw m0, [o(pw_2896x8)] + pmulhrsw m0, [o(pw_2048)] + pxor m4, m4 + REPX {mova [coeffq+16*x], m4}, 0, 1, 2, 3, 4, 5, 6, 7 + jmp m(inv_txfm_add_dct_dct_8x8).end +%endif +%endif +%endmacro + +%macro ITX_8X8_LOAD_COEFS 0 + mova m0, [coeffq+16*0] + mova m1, [coeffq+16*1] + mova m2, [coeffq+16*2] + mova m3, [coeffq+16*3] + mova m4, [coeffq+16*4] + mova m5, [coeffq+16*5] + mova m6, [coeffq+16*6] +%endmacro + +%macro IDCT8_1D_ODDHALF 7 ; src[1-4], tmp[1-2], pd_2048 + ITX_MULSUB_2W %1, %4, %5, %6, %7, 799, 4017 ;t4a, t7a + ITX_MULSUB_2W %3, %2, %5, %6, %7, 3406, 2276 ;t5a, t6a + psubsw m%5, m%1, m%3 ;t5a + paddsw m%1, m%3 ;t4 + psubsw m%6, m%4, m%2 ;t6a + paddsw m%4, m%2 ;t7 + mova m%3, [o(pw_2896x8)] + psubw m%2, m%6, m%5 ;t6a - t5a + paddw m%6, m%5 ;t6a + t5a + pmulhrsw m%2, m%3 ;t5 + pmulhrsw m%3, m%6 ;t6 +%endmacro + +INV_TXFM_8X8_FN dct, dct, 0 +INV_TXFM_8X8_FN dct, identity, 7 +INV_TXFM_8X8_FN dct, adst +INV_TXFM_8X8_FN dct, flipadst + +cglobal idct_8x8_internal, 0, 0, 0, dst, stride, coeff, eob, tx2 + ITX_8X8_LOAD_COEFS + call .main + +.pass1_end: + mova m7, [o(pw_16384)] + REPX {pmulhrsw x, m7}, m0, m2, m4, m6 + mova [coeffq+16*6], m6 + +.pass1_end2: + REPX {pmulhrsw x, m7}, m1, m3, m5 + pmulhrsw m7, [coeffq+16*7] + +.pass1_end3: + punpcklwd m6, m1, m5 ;10 50 11 51 12 52 13 53 + punpckhwd m1, m5 ;14 54 15 55 16 56 17 57 + punpckhwd m5, m0, m4 ;04 44 05 45 06 46 07 47 + punpcklwd m0, m4 ;00 40 01 41 02 42 03 43 + punpckhwd m4, m3, m7 ;34 74 35 75 36 76 37 77 + punpcklwd m3, m7 ;30 70 31 71 32 72 33 73 + punpckhwd m7, m1, m4 ;16 36 56 76 17 37 57 77 + punpcklwd m1, m4 ;14 34 54 74 15 35 55 75 + punpckhwd m4, m6, m3 ;12 32 52 72 13 33 53 73 + punpcklwd m6, m3 ;10 30 50 70 11 31 51 71 + mova [coeffq+16*5], m6 + mova m6, [coeffq+16*6] + punpckhwd m3, m2, m6 ;24 64 25 65 26 66 27 67 + punpcklwd m2, m6 ;20 60 21 61 22 62 23 63 + punpckhwd m6, m5, m3 ;06 26 46 66 07 27 47 67 + punpcklwd m5, m3 ;04 24 44 64 05 25 45 65 + punpckhwd m3, m0, m2 ;02 22 42 62 03 23 43 63 + punpcklwd m0, m2 ;00 20 40 60 01 21 41 61 + + punpckhwd m2, m6, m7 ;07 17 27 37 47 57 67 77 + punpcklwd m6, m7 ;06 16 26 36 46 56 66 76 + mova [coeffq+16*7], m2 + punpcklwd m2, m3, m4 ;02 12 22 32 42 52 62 72 + punpckhwd m3, m4 ;03 13 23 33 43 53 63 73 + punpcklwd m4, m5, m1 ;04 14 24 34 44 54 64 74 + punpckhwd m5, m1 ;05 15 25 35 45 55 65 75 + mova m7, [coeffq+16*5] + punpckhwd m1, m0, m7 ;01 11 21 31 41 51 61 71 + punpcklwd m0, m7 ;00 10 20 30 40 50 60 70 + jmp tx2q + +.pass2: + call .main + +.end: + mova m7, [o(pw_2048)] + REPX {pmulhrsw x, m7}, m0, m2, m4, m6 + mova [coeffq+16*6], m6 + +.end2: + REPX {pmulhrsw x, m7}, m1, m3, m5 + pmulhrsw m7, [coeffq+16*7] + mova [coeffq+16*5], m5 + mova [coeffq+16*7], m7 + +.end3: + WRITE_8X4 0, 1, 2, 3, 5, 6, 7 + lea dstq, [dstq+strideq*2] + WRITE_8X4 4, [coeffq+16*5], [coeffq+16*6], [coeffq+16*7], 5, 6, 7 + + pxor m7, m7 + REPX {mova [coeffq+16*x], m7}, 0, 1, 2, 3, 4, 5, 6, 7 + ret + +ALIGN function_align +.main: + mova [coeffq+16*6], m3 + mova [coeffq+16*5], m1 + mova m7, [o(pd_2048)] + IDCT4_1D 0, 2, 4, 6, 1, 3, 7 + mova m3, [coeffq+16*5] + mova [coeffq+16*5], m2 + mova m2, [coeffq+16*6] + mova [coeffq+16*6], m4 + mova m4, [coeffq+16*7] + mova [coeffq+16*7], m6 + IDCT8_1D_ODDHALF 3, 2, 5, 4, 1, 6, 7 + mova m6, [coeffq+16*7] + psubsw m7, m0, m4 ;out7 + paddsw m0, m4 ;out0 + mova [coeffq+16*7], m7 + mova m1, [coeffq+16*5] + psubsw m4, m6, m3 ;out4 + paddsw m3, m6 ;out3 + mova m7, [coeffq+16*6] + psubsw m6, m1, m5 ;out6 + paddsw m1, m5 ;out1 + psubsw m5, m7, m2 ;out5 + paddsw m2, m7 ;out2 + ret + + +INV_TXFM_8X8_FN adst, dct +INV_TXFM_8X8_FN adst, adst +INV_TXFM_8X8_FN adst, flipadst +INV_TXFM_8X8_FN adst, identity + +cglobal iadst_8x8_internal, 0, 0, 0, dst, stride, coeff, eob, tx2 + ITX_8X8_LOAD_COEFS + call .main + mova m7, [o(pw_16384)] + REPX {pmulhrsw x, m7}, m0, m2, m4, m6 + mova [coeffq+16*6], m6 + pxor m6, m6 + psubw m6, m7 + mova m7, m6 + jmp m(idct_8x8_internal).pass1_end2 + +ALIGN function_align +.pass2: + call .main + mova m7, [o(pw_2048)] + REPX {pmulhrsw x, m7}, m0, m2, m4, m6 + mova [coeffq+16*6], m6 + pxor m6, m6 + psubw m6, m7 + mova m7, m6 + jmp m(idct_8x8_internal).end2 + +ALIGN function_align +.main: + mova [coeffq+16*6], m3 + mova [coeffq+16*5], m4 + mova m7, [o(pd_2048)] + ITX_MULSUB_2W 5, 2, 3, 4, 7, 1931, 3612 ;t3a, t2a + ITX_MULSUB_2W 1, 6, 3, 4, 7, 3920, 1189 ;t7a, t6a + paddsw m3, m2, m6 ;t2 + psubsw m2, m6 ;t6 + paddsw m4, m5, m1 ;t3 + psubsw m5, m1 ;t7 + ITX_MULSUB_2W 5, 2, 1, 6, 7, 3784, 1567 ;t6a, t7a + + mova m6, [coeffq+16*5] + mova [coeffq+16*5], m5 + mova m1, [coeffq+16*6] + mova [coeffq+16*6], m2 + mova m5, [coeffq+16*7] + mova [coeffq+16*7], m3 + ITX_MULSUB_2W 5, 0, 2, 3, 7, 401, 4076 ;t1a, t0a + ITX_MULSUB_2W 1, 6, 2, 3, 7, 3166, 2598 ;t5a, t4a + psubsw m2, m0, m6 ;t4 + paddsw m0, m6 ;t0 + paddsw m3, m5, m1 ;t1 + psubsw m5, m1 ;t5 + ITX_MULSUB_2W 2, 5, 1, 6, 7, 1567, 3784 ;t5a, t4a + + mova m7, [coeffq+16*7] + paddsw m1, m3, m4 ;-out7 + psubsw m3, m4 ;t3 + mova [coeffq+16*7], m1 + psubsw m4, m0, m7 ;t2 + paddsw m0, m7 ;out0 + mova m6, [coeffq+16*5] + mova m7, [coeffq+16*6] + paddsw m1, m5, m6 ;-out1 + psubsw m5, m6 ;t6 + paddsw m6, m2, m7 ;out6 + psubsw m2, m7 ;t7 + paddw m7, m4, m3 ;t2 + t3 + psubw m4, m3 ;t2 - t3 + paddw m3, m5, m2 ;t6 + t7 + psubw m5, m2 ;t6 - t7 + mova m2, [o(pw_2896x8)] + pmulhrsw m4, m2 ;out4 + pmulhrsw m5, m2 ;-out5 + pmulhrsw m7, m2 ;-out3 + pmulhrsw m2, m3 ;out2 + mova m3, m7 + ret + +INV_TXFM_8X8_FN flipadst, dct +INV_TXFM_8X8_FN flipadst, adst +INV_TXFM_8X8_FN flipadst, flipadst +INV_TXFM_8X8_FN flipadst, identity + +cglobal iflipadst_8x8_internal, 0, 0, 0, dst, stride, coeff, eob, tx2 + ITX_8X8_LOAD_COEFS + call m(iadst_8x8_internal).main + mova m7, [o(pw_m16384)] + pmulhrsw m1, m7 + mova [coeffq+16*6], m1 + mova m1, m6 + mova m6, m2 + pmulhrsw m2, m5, m7 + mova m5, m6 + mova m6, m4 + pmulhrsw m4, m3, m7 + mova m3, m6 + mova m6, m0 + mova m0, m7 + pxor m7, m7 + psubw m7, m0 + pmulhrsw m0, [coeffq+16*7] + REPX {pmulhrsw x, m7}, m1, m3, m5 + pmulhrsw m7, m6 + jmp m(idct_8x8_internal).pass1_end3 + +ALIGN function_align +.pass2: + call m(iadst_8x8_internal).main + mova m7, [o(pw_2048)] + REPX {pmulhrsw x, m7}, m0, m2, m4, m6 + mova [coeffq+16*5], m2 + mova m2, m0 + pxor m0, m0 + psubw m0, m7 + mova m7, m2 + pmulhrsw m1, m0 + pmulhrsw m2, m5, m0 + mova [coeffq+16*6], m1 + mova m5, m4 + mova m1, m6 + pmulhrsw m4, m3, m0 + pmulhrsw m0, [coeffq+16*7] + mova m3, m5 + mova [coeffq+16*7], m7 + jmp m(idct_8x8_internal).end3 + +INV_TXFM_8X8_FN identity, dct, 7 +INV_TXFM_8X8_FN identity, adst +INV_TXFM_8X8_FN identity, flipadst +INV_TXFM_8X8_FN identity, identity + +cglobal iidentity_8x8_internal, 0, 0, 0, dst, stride, coeff, eob, tx2 + mova m0, [coeffq+16*0] + mova m1, [coeffq+16*1] + mova m2, [coeffq+16*2] + mova m3, [coeffq+16*3] + mova m4, [coeffq+16*4] + mova m5, [coeffq+16*5] + mova m7, [coeffq+16*7] + jmp m(idct_8x8_internal).pass1_end3 + +ALIGN function_align +.pass2: + mova m7, [o(pw_4096)] + REPX {pmulhrsw x, m7}, m0, m1, m2, m3, m4, m5, m6 + pmulhrsw m7, [coeffq+16*7] + mova [coeffq+16*5], m5 + mova [coeffq+16*6], m6 + mova [coeffq+16*7], m7 + jmp m(idct_8x8_internal).end3 diff --git a/third_party/dav1d/src/x86/mc_init_tmpl.c b/third_party/dav1d/src/x86/mc_init_tmpl.c index e8e1b0e65919..608e0c6da537 100644 --- a/third_party/dav1d/src/x86/mc_init_tmpl.c +++ b/third_party/dav1d/src/x86/mc_init_tmpl.c @@ -38,6 +38,7 @@ decl_mc_fn(dav1d_put_8tap_sharp_avx2); decl_mc_fn(dav1d_put_8tap_sharp_regular_avx2); decl_mc_fn(dav1d_put_8tap_sharp_smooth_avx2); decl_mc_fn(dav1d_put_bilin_avx2); +decl_mc_fn(dav1d_put_bilin_ssse3); decl_mct_fn(dav1d_prep_8tap_regular_avx2); decl_mct_fn(dav1d_prep_8tap_regular_smooth_avx2); @@ -69,6 +70,7 @@ decl_warp8x8_fn(dav1d_warp_affine_8x8_avx2); decl_warp8x8t_fn(dav1d_warp_affine_8x8t_avx2); decl_emu_edge_fn(dav1d_emu_edge_avx2); +decl_emu_edge_fn(dav1d_emu_edge_ssse3); void bitfn(dav1d_mc_dsp_init_x86)(Dav1dMCDSPContext *const c) { #define init_mc_fn(type, name, suffix) \ @@ -82,6 +84,8 @@ void bitfn(dav1d_mc_dsp_init_x86)(Dav1dMCDSPContext *const c) { return; #if BITDEPTH == 8 + init_mc_fn (FILTER_2D_BILINEAR, bilin, ssse3); + c->avg = dav1d_avg_ssse3; c->w_avg = dav1d_w_avg_ssse3; c->mask = dav1d_mask_ssse3; @@ -89,6 +93,7 @@ void bitfn(dav1d_mc_dsp_init_x86)(Dav1dMCDSPContext *const c) { c->blend = dav1d_blend_ssse3; c->blend_v = dav1d_blend_v_ssse3; c->blend_h = dav1d_blend_h_ssse3; + c->emu_edge = dav1d_emu_edge_ssse3; #endif if (!(flags & DAV1D_X86_CPU_FLAG_AVX2)) diff --git a/third_party/dav1d/src/x86/mc_ssse3.asm b/third_party/dav1d/src/x86/mc_ssse3.asm index afecb6803e87..e9eafc56d40c 100644 --- a/third_party/dav1d/src/x86/mc_ssse3.asm +++ b/third_party/dav1d/src/x86/mc_ssse3.asm @@ -45,7 +45,10 @@ obmc_masks: db 0, 0, 0, 0 db 45, 19, 47, 17, 48, 16, 50, 14, 51, 13, 52, 12, 53, 11, 55, 9 db 56, 8, 57, 7, 58, 6, 59, 5, 60, 4, 60, 4, 61, 3, 62, 2 db 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0, 64, 0 -blend_shuf: db 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3 + +bilin_h_shuf4: db 1, 0, 2, 1, 3, 2, 4, 3, 9, 8, 10, 9, 11, 10, 12, 11 +bilin_h_shuf8: db 1, 0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7 +blend_shuf: db 0, 1, 0, 1, 0, 1, 0, 1, 2, 3, 2, 3, 2, 3, 2, 3 pb_64: times 16 db 64 pw_8: times 8 dw 8 @@ -76,10 +79,650 @@ BIDIR_JMP_TABLE blend_ssse3, 4, 8, 16, 32 BIDIR_JMP_TABLE blend_v_ssse3, 2, 4, 8, 16, 32 BIDIR_JMP_TABLE blend_h_ssse3, 2, 4, 8, 16, 16, 16, 16 +%macro BASE_JMP_TABLE 3-* + %xdefine %1_%2_table (%%table - %3) + %xdefine %%base %1_%2 + %%table: + %rep %0 - 2 + dw %%base %+ _w%3 - %%base + %rotate 1 + %endrep +%endmacro + +%xdefine put_ssse3 mangle(private_prefix %+ _put_bilin_ssse3.put) + +BASE_JMP_TABLE put, ssse3, 2, 4, 8, 16, 32, 64, 128 + +%macro HV_JMP_TABLE 5-* + %xdefine %%prefix mangle(private_prefix %+ _%1_%2_%3) + %xdefine %%base %1_%3 + %assign %%types %4 + %if %%types & 1 + %xdefine %1_%2_h_%3_table (%%h - %5) + %%h: + %rep %0 - 4 + dw %%prefix %+ .h_w%5 - %%base + %rotate 1 + %endrep + %rotate 4 + %endif + %if %%types & 2 + %xdefine %1_%2_v_%3_table (%%v - %5) + %%v: + %rep %0 - 4 + dw %%prefix %+ .v_w%5 - %%base + %rotate 1 + %endrep + %rotate 4 + %endif + %if %%types & 4 + %xdefine %1_%2_hv_%3_table (%%hv - %5) + %%hv: + %rep %0 - 4 + dw %%prefix %+ .hv_w%5 - %%base + %rotate 1 + %endrep + %endif +%endmacro + +HV_JMP_TABLE put, bilin, ssse3, 7, 2, 4, 8, 16, 32, 64, 128 + +%define table_offset(type, fn) type %+ fn %+ SUFFIX %+ _table - type %+ SUFFIX + SECTION .text INIT_XMM ssse3 +%if ARCH_X86_32 +DECLARE_REG_TMP 1 +cglobal put_bilin, 4, 8, 0, dst, ds, src, ss, w, h, mxy, bak +%define base t0-put_ssse3 +%else +DECLARE_REG_TMP 7 +%define base 0 +cglobal put_bilin, 4, 8, 0, dst, ds, src, ss, w, h, mxy +%endif +; +%macro RESTORE_DSQ_32 1 + %if ARCH_X86_32 + mov %1, dsm ; restore dsq + %endif +%endmacro +; + movifnidn mxyd, r6m ; mx + LEA t0, put_ssse3 + tzcnt wd, wm + mov hd, hm + test mxyd, mxyd + jnz .h + mov mxyd, r7m ; my + test mxyd, mxyd + jnz .v +.put: + movzx wd, word [t0+wq*2+table_offset(put,)] + add wq, t0 + lea r6, [ssq*3] + RESTORE_DSQ_32 t0 + jmp wq +.put_w2: + movzx r4d, word [srcq+ssq*0] + movzx r6d, word [srcq+ssq*1] + lea srcq, [srcq+ssq*2] + mov [dstq+dsq*0], r4w + mov [dstq+dsq*1], r6w + lea dstq, [dstq+dsq*2] + sub hd, 2 + jg .put_w2 + RET +.put_w4: + mov r4d, [srcq+ssq*0] + mov r6d, [srcq+ssq*1] + lea srcq, [srcq+ssq*2] + mov [dstq+dsq*0], r4d + mov [dstq+dsq*1], r6d + lea dstq, [dstq+dsq*2] + sub hd, 2 + jg .put_w4 + RET +.put_w8: + movq m0, [srcq+ssq*0] + movq m1, [srcq+ssq*1] + lea srcq, [srcq+ssq*2] + movq [dstq+dsq*0], m0 + movq [dstq+dsq*1], m1 + lea dstq, [dstq+dsq*2] + sub hd, 2 + jg .put_w8 + RET +.put_w16: + lea r4, [dsq*3] +.put_w16_in: + movu m0, [srcq+ssq*0] + movu m1, [srcq+ssq*1] + movu m2, [srcq+ssq*2] + movu m3, [srcq+r6 ] + lea srcq, [srcq+ssq*4] + mova [dstq+dsq*0], m0 + mova [dstq+dsq*1], m1 + mova [dstq+dsq*2], m2 + mova [dstq+r4 ], m3 + lea dstq, [dstq+dsq*4] + sub hd, 4 + jg .put_w16_in + RET +.put_w32: + movu m0, [srcq+ssq*0+16*0] + movu m1, [srcq+ssq*0+16*1] + movu m2, [srcq+ssq*1+16*0] + movu m3, [srcq+ssq*1+16*1] + lea srcq, [srcq+ssq*2] + mova [dstq+dsq*0+16*0], m0 + mova [dstq+dsq*0+16*1], m1 + mova [dstq+dsq*1+16*0], m2 + mova [dstq+dsq*1+16*1], m3 + lea dstq, [dstq+dsq*2] + sub hd, 2 + jg .put_w32 + RET +.put_w64: + movu m0, [srcq+16*0] + movu m1, [srcq+16*1] + movu m2, [srcq+16*2] + movu m3, [srcq+16*3] + add srcq, ssq + mova [dstq+16*0], m0 + mova [dstq+16*1], m1 + mova [dstq+16*2], m2 + mova [dstq+16*3], m3 + add dstq, dsq + dec hd + jg .put_w64 + RET +.put_w128: + movu m0, [srcq+16*0] + movu m1, [srcq+16*1] + movu m2, [srcq+16*2] + movu m3, [srcq+16*3] + mova [dstq+16*0], m0 + mova [dstq+16*1], m1 + mova [dstq+16*2], m2 + mova [dstq+16*3], m3 + movu m0, [srcq+16*4] + movu m1, [srcq+16*5] + movu m2, [srcq+16*6] + movu m3, [srcq+16*7] + mova [dstq+16*4], m0 + mova [dstq+16*5], m1 + mova [dstq+16*6], m2 + mova [dstq+16*7], m3 + add srcq, ssq + add dstq, dsq + dec hd + jg .put_w128 + RET +.h: + ; (16 * src[x] + (mx * (src[x + 1] - src[x])) + 8) >> 4 + ; = ((16 - mx) * src[x] + mx * src[x + 1] + 8) >> 4 + imul mxyd, 0xff01 + mova m4, [base+bilin_h_shuf8] + mova m0, [base+bilin_h_shuf4] + WIN64_SPILL_XMM 7 + add mxyd, 16 << 8 + movd m5, mxyd + mov mxyd, r7m ; my + pshuflw m5, m5, q0000 + punpcklqdq m5, m5 + test mxyd, mxyd + jnz .hv + movzx wd, word [t0+wq*2+table_offset(put, _bilin_h)] + mova m6, [base+pw_2048] + add wq, t0 + RESTORE_DSQ_32 t0 + jmp wq +.h_w2: + pshufd m4, m4, q3120 ; m4 = {1, 0, 2, 1, 5, 4, 6, 5} +.h_w2_loop: + movd m0, [srcq+ssq*0] + movd m1, [srcq+ssq*1] + lea srcq, [srcq+ssq*2] + punpckldq m0, m1 + pshufb m0, m4 + pmaddubsw m0, m5 + pmulhrsw m0, m6 + packuswb m0, m0 + movd r6d, m0 + mov [dstq+dsq*0], r6w + shr r6d, 16 + mov [dstq+dsq*1], r6w + lea dstq, [dstq+dsq*2] + sub hd, 2 + jg .h_w2_loop + RET +.h_w4: + movq m4, [srcq+ssq*0] + movhps m4, [srcq+ssq*1] + lea srcq, [srcq+ssq*2] + pshufb m4, m0 + pmaddubsw m4, m5 + pmulhrsw m4, m6 + packuswb m4, m4 + movd [dstq+dsq*0], m4 + pshufd m4, m4, q0101 + movd [dstq+dsq*1], m4 + lea dstq, [dstq+dsq*2] + sub hd, 2 + jg .h_w4 + RET +.h_w8: + movu m0, [srcq+ssq*0] + movu m1, [srcq+ssq*1] + lea srcq, [srcq+ssq*2] + pshufb m0, m4 + pshufb m1, m4 + pmaddubsw m0, m5 + pmaddubsw m1, m5 + pmulhrsw m0, m6 + pmulhrsw m1, m6 + packuswb m0, m1 + movq [dstq+dsq*0], m0 + movhps [dstq+dsq*1], m0 + lea dstq, [dstq+dsq*2] + sub hd, 2 + jg .h_w8 + RET +.h_w16: + movu m0, [srcq+8*0] + movu m1, [srcq+8*1] + add srcq, ssq + pshufb m0, m4 + pshufb m1, m4 + pmaddubsw m0, m5 + pmaddubsw m1, m5 + pmulhrsw m0, m6 + pmulhrsw m1, m6 + packuswb m0, m1 + mova [dstq], m0 + add dstq, dsq + dec hd + jg .h_w16 + RET +.h_w32: + movu m0, [srcq+mmsize*0+8*0] + movu m1, [srcq+mmsize*0+8*1] + movu m2, [srcq+mmsize*1+8*0] + movu m3, [srcq+mmsize*1+8*1] + add srcq, ssq + pshufb m0, m4 + pshufb m1, m4 + pshufb m2, m4 + pshufb m3, m4 + pmaddubsw m0, m5 + pmaddubsw m1, m5 + pmaddubsw m2, m5 + pmaddubsw m3, m5 + pmulhrsw m0, m6 + pmulhrsw m1, m6 + pmulhrsw m2, m6 + pmulhrsw m3, m6 + packuswb m0, m1 + packuswb m2, m3 + mova [dstq+16*0], m0 + mova [dstq+16*1], m2 + add dstq, dsq + dec hd + jg .h_w32 + RET +.h_w64: + mov r6, -16*3 +.h_w64_loop: + movu m0, [srcq+r6+16*3+8*0] + movu m1, [srcq+r6+16*3+8*1] + pshufb m0, m4 + pshufb m1, m4 + pmaddubsw m0, m5 + pmaddubsw m1, m5 + pmulhrsw m0, m6 + pmulhrsw m1, m6 + packuswb m0, m1 + mova [dstq+r6+16*3], m0 + add r6, 16 + jle .h_w64_loop + add srcq, ssq + add dstq, dsq + dec hd + jg .h_w64 + RET +.h_w128: + mov r6, -16*7 +.h_w128_loop: + movu m0, [srcq+r6+16*7+8*0] + movu m1, [srcq+r6+16*7+8*1] + pshufb m0, m4 + pshufb m1, m4 + pmaddubsw m0, m5 + pmaddubsw m1, m5 + pmulhrsw m0, m6 + pmulhrsw m1, m6 + packuswb m0, m1 + mova [dstq+r6+16*7], m0 + add r6, 16 + jle .h_w128_loop + add srcq, ssq + add dstq, dsq + dec hd + jg .h_w128 + RET +.v: + movzx wd, word [t0+wq*2+table_offset(put, _bilin_v)] + %assign stack_offset stack_offset - stack_size_padded + WIN64_SPILL_XMM 8 + imul mxyd, 0xff01 + mova m7, [base+pw_2048] + add mxyd, 16 << 8 + add wq, t0 + movd m6, mxyd + pshuflw m6, m6, q0000 + punpcklqdq m6, m6 + RESTORE_DSQ_32 t0 + jmp wq +.v_w2: + movd m0, [srcq+ssq*0] +.v_w2_loop: + pinsrw m0, [srcq+ssq*1], 1 ; 0 1 + lea srcq, [srcq+ssq*2] + pshuflw m2, m0, q2301 + pinsrw m0, [srcq+ssq*0], 0 ; 2 1 + punpcklbw m1, m0, m2 + pmaddubsw m1, m6 + pmulhrsw m1, m7 + packuswb m1, m1 + movd r6d, m1 + mov [dstq+dsq*1], r6w + shr r6d, 16 + mov [dstq+dsq*0], r6w + lea dstq, [dstq+dsq*2] + sub hd, 2 + jg .v_w2_loop + RET +.v_w4: + movd m0, [srcq+ssq*0] +.v_w4_loop: + movd m1, [srcq+ssq*1] + lea srcq, [srcq+ssq*2] + punpckldq m2, m0, m1 ; 0 1 + movd m0, [srcq+ssq*0] + punpckldq m1, m0 ; 1 2 + punpcklbw m1, m2 + pmaddubsw m1, m6 + pmulhrsw m1, m7 + packuswb m1, m1 + movd [dstq+dsq*0], m1 + psrlq m1, 32 + movd [dstq+dsq*1], m1 + ; + lea dstq, [dstq+dsq*2] + sub hd, 2 + jg .v_w4_loop + RET +.v_w8: + movq m0, [srcq+ssq*0] +.v_w8_loop: + movddup m2, [srcq+ssq*1] + lea srcq, [srcq+ssq*2] + punpcklqdq m3, m0, m2 ; 0 1 m2qh:m0ql + movddup m0, [srcq+ssq*0] + punpcklqdq m4, m2, m0 ; 1 2 m0qh:m2ql + punpcklbw m1, m4, m3 + punpckhbw m4, m3 + pmaddubsw m1, m6 + pmaddubsw m4, m6 + pmulhrsw m1, m7 + pmulhrsw m4, m7 + packuswb m1, m4 + movq [dstq+dsq*0], m1 + movhps [dstq+dsq*1], m1 + lea dstq, [dstq+dsq*2] + sub hd, 2 + jg .v_w8_loop + RET + ; +%macro PUT_BILIN_V_W16 0 + movu m0, [srcq+ssq*0] +%%loop: + movu m4, [srcq+ssq*1] + lea srcq, [srcq+ssq*2] + punpcklbw m1, m4, m0 + punpckhbw m3, m4, m0 + movu m0, [srcq+ssq*0] + punpcklbw m2, m0, m4 + pmaddubsw m1, m6 + pmaddubsw m3, m6 + pmulhrsw m1, m7 + pmulhrsw m3, m7 + packuswb m1, m3 + mova [dstq+dsq*0], m1 + punpckhbw m3, m0, m4 + pmaddubsw m2, m6 + pmaddubsw m3, m6 + pmulhrsw m2, m7 + pmulhrsw m3, m7 + packuswb m2, m3 + mova [dstq+dsq*1], m2 + lea dstq, [dstq+dsq*2] + sub hd, 2 + jg %%loop +%endmacro + ; +.v_w16: + PUT_BILIN_V_W16 + RET +.v_w16gt: + mov r4, dstq + mov r6, srcq +.v_w16gt_loop: +%if ARCH_X86_32 + mov bakm, t0q + RESTORE_DSQ_32 t0 + PUT_BILIN_V_W16 + mov t0q, bakm +%else + PUT_BILIN_V_W16 +%endif + mov hw, t0w + add r4, mmsize + add r6, mmsize + mov dstq, r4 + mov srcq, r6 + sub t0d, 1<<16 + jg .v_w16gt + RET +.v_w32: + lea t0d, [hq+(1<<16)] + jmp .v_w16gt +.v_w64: + lea t0d, [hq+(3<<16)] + jmp .v_w16gt +.v_w128: + lea t0d, [hq+(7<<16)] + jmp .v_w16gt +.hv: + ; (16 * src[x] + (my * (src[x + src_stride] - src[x])) + 128) >> 8 + ; = (src[x] + ((my * (src[x + src_stride] - src[x])) >> 4) + 8) >> 4 + movzx wd, word [t0+wq*2+table_offset(put, _bilin_hv)] + %assign stack_offset stack_offset - stack_size_padded + WIN64_SPILL_XMM 8 + shl mxyd, 11 ; can't shift by 12 due to signed overflow + mova m7, [base+pw_2048] + movd m6, mxyd + add wq, t0 + pshuflw m6, m6, q0000 + punpcklqdq m6, m6 + jmp wq +.hv_w2: + RESTORE_DSQ_32 t0 + movd m0, [srcq+ssq*0] + pshufd m0, m0, q0000 ; src[x - src_stride] + pshufb m0, m4 + pmaddubsw m0, m5 +.hv_w2_loop: + movd m1, [srcq+ssq*1] ; src[x] + lea srcq, [srcq+ssq*2] + movhps m1, [srcq+ssq*0] ; src[x + src_stride] + pshufd m1, m1, q3120 + pshufb m1, m4 + pmaddubsw m1, m5 ; 1 _ 2 _ + shufps m2, m0, m1, q1032 ; 0 _ 1 _ + mova m0, m1 + psubw m1, m2 ; src[x + src_stride] - src[x] + paddw m1, m1 + pmulhw m1, m6 ; (my * (src[x + src_stride] - src[x]) + paddw m1, m2 ; src[x] + (my * (src[x + src_stride] - src[x]) + pmulhrsw m1, m7 + packuswb m1, m1 + pshuflw m1, m1, q2020 + movd r6d, m1 + mov [dstq+dsq*0], r6w + shr r6d, 16 + mov [dstq+dsq*1], r6w + lea dstq, [dstq+dsq*2] + sub hd, 2 + jg .hv_w2_loop + RET +.hv_w4: + mova m4, [base+bilin_h_shuf4] + RESTORE_DSQ_32 t0 + movddup xm0, [srcq+ssq*0] + pshufb m0, m4 + pmaddubsw m0, m5 +.hv_w4_loop: + movq m1, [srcq+ssq*1] + lea srcq, [srcq+ssq*2] + movhps m1, [srcq+ssq*0] + pshufb m1, m4 + pmaddubsw m1, m5 ; 1 2 + shufps m2, m0, m1, q1032 ; 0 1 + mova m0, m1 + psubw m1, m2 + paddw m1, m1 + pmulhw m1, m6 + paddw m1, m2 + pmulhrsw m1, m7 + packuswb m1, m1 + movd [dstq+dsq*0], m1 + psrlq m1, 32 + movd [dstq+dsq*1], m1 + lea dstq, [dstq+dsq*2] + sub hd, 2 + jg .hv_w4_loop + RET +.hv_w8: + RESTORE_DSQ_32 t0 + movu m0, [srcq+ssq*0+8*0] + pshufb m0, m4 + pmaddubsw m0, m5 +.hv_w8_loop: + movu m2, [srcq+ssq*1+8*0] + lea srcq, [srcq+ssq*2] + movu m3, [srcq+ssq*0+8*0] + pshufb m2, m4 + pshufb m3, m4 + pmaddubsw m2, m5 + psubw m1, m2, m0 + paddw m1, m1 + pmulhw m1, m6 + paddw m1, m0 + pmaddubsw m0, m3, m5 + psubw m3, m0, m2 + paddw m3, m3 + pmulhw m3, m6 + paddw m3, m2 + pmulhrsw m1, m7 + pmulhrsw m3, m7 + packuswb m1, m3 + movq [dstq+dsq*0], m1 + movhps [dstq+dsq*1], m1 + lea dstq, [dstq+dsq*2] + sub hd, 2 + jg .hv_w8_loop + RET + ; + ; 32bit has ssq, dsq free +%macro PUT_BILIN_HV_W16 0 + movu m0, [srcq+8*0] + movu m1, [srcq+8*1] + pshufb m0, m4 + pshufb m1, m4 + pmaddubsw m0, m5 + pmaddubsw m1, m5 + %if WIN64 + movaps r4m, xmm8 + %endif +%%loop: +%if ARCH_X86_32 + %define m3back [dstq] + %define dsqval dsm +%else + %define m3back m8 + %define dsqval dsq +%endif + add srcq, ssq + movu m2, [srcq+8*1] + pshufb m2, m4 + pmaddubsw m2, m5 + psubw m3, m2, m1 + paddw m3, m3 + pmulhw m3, m6 + paddw m3, m1 + mova m1, m2 + pmulhrsw m3, m7 + mova m3back, m3 + movu m2, [srcq+8*0] + pshufb m2, m4 + pmaddubsw m2, m5 + psubw m3, m2, m0 + paddw m3, m3 + pmulhw m3, m6 + paddw m3, m0 + mova m0, m2 + pmulhrsw m3, m7 + packuswb m3, m3back + mova [dstq], m3 + add dstq, dsqval + dec hd + jg %%loop + %if WIN64 + movaps xmm8, r4m + %endif + %undef m3back + %undef dsqval +%endmacro + ; +.hv_w16: + PUT_BILIN_HV_W16 + RET +.hv_w16gt: + mov r4, dstq + mov r6, srcq +.hv_w16gt_loop: + PUT_BILIN_HV_W16 + mov hw, t0w + add r4, mmsize + add r6, mmsize + mov dstq, r4 + mov srcq, r6 + sub t0d, 1<<16 + jg .hv_w16gt_loop + RET +.hv_w32: + lea t0d, [hq+(1<<16)] + jmp .hv_w16gt +.hv_w64: + lea t0d, [hq+(3<<16)] + jmp .hv_w16gt +.hv_w128: + lea t0d, [hq+(7<<16)] + jmp .hv_w16gt + %if WIN64 DECLARE_REG_TMP 6, 4 %else @@ -186,7 +829,7 @@ DECLARE_REG_TMP 6, 7 %endmacro cglobal avg, 4, 7, 3, dst, stride, tmp1, tmp2, w, h, stride3 - lea r6, [avg_ssse3_table] + LEA r6, avg_ssse3_table tzcnt wd, wm ; leading zeros movifnidn hd, hm ; move h(stack) to h(register) if not already that register movsxd wq, dword [r6+wq*4] ; push table entry matching the tile width (tzcnt) in widen reg @@ -216,7 +859,7 @@ cglobal avg, 4, 7, 3, dst, stride, tmp1, tmp2, w, h, stride3 %define W_AVG_INC_PTR AVG_INC_PTR cglobal w_avg, 4, 7, 6, dst, stride, tmp1, tmp2, w, h, stride3 - lea r6, [w_avg_ssse3_table] + LEA r6, w_avg_ssse3_table tzcnt wd, wm movifnidn hd, hm movd m0, r6m @@ -269,11 +912,12 @@ cglobal mask, 4, 8, 7, dst, stride, tmp1, tmp2, w, h, mask, stride3 cglobal mask, 4, 7, 7, dst, stride, tmp1, tmp2, w, mask, stride3 %define hd dword r5m %endif - lea r6, [mask_ssse3_table] +%define base r6-mask_ssse3_table + LEA r6, mask_ssse3_table tzcnt wd, wm movsxd wq, dword [r6+wq*4] pxor m4, m4 - mova m5, [pw_2048+r6-mask_ssse3_table] + mova m5, [base+pw_2048] add wq, r6 mov maskq, r6m BIDIR_FN MASK @@ -284,9 +928,9 @@ cglobal mask, 4, 7, 7, dst, stride, tmp1, tmp2, w, mask, stride3 %define reg_pw_27 m9 %define reg_pw_2048 m10 %else - %define reg_pw_8 [pw_8] - %define reg_pw_27 [pw_26] ; 64 - 38 - %define reg_pw_2048 [pw_2048] + %define reg_pw_8 [base+pw_8] + %define reg_pw_27 [base+pw_26] ; 64 - 38 + %define reg_pw_2048 [base+pw_2048] %endif %macro W_MASK_420_B 2 ; src_offset in bytes, mask_out @@ -323,63 +967,60 @@ cglobal mask, 4, 7, 7, dst, stride, tmp1, tmp2, w, mask, stride3 W_MASK_420_B (%1*16), %2 %endmacro +%define base r6-w_mask_420_ssse3_table %if ARCH_X86_64 ; args: dst, stride, tmp1, tmp2, w, h, mask, sign -cglobal w_mask_420, 4, 9, 11, dst, stride, tmp1, tmp2, w, h, mask, stride3 - lea r7, [w_mask_420_ssse3_table] +cglobal w_mask_420, 4, 8, 11, dst, stride, tmp1, tmp2, w, h, mask + lea r6, [w_mask_420_ssse3_table] mov wd, wm - tzcnt r8d, wd + tzcnt r7d, wd movifnidn hd, hm - mov maskq, maskmp movd m0, r7m pshuflw m0, m0, q0000 ; sign punpcklqdq m0, m0 - movsxd r8, dword [r7+r8*4] - mova reg_pw_8, [pw_8] - mova reg_pw_27, [pw_26] ; 64 - 38 - mova reg_pw_2048, [pw_2048] - mova m6, [pw_258] ; 64 * 4 + 2 + movsxd r7, [r6+r7*4] + mova reg_pw_8, [base+pw_8] + mova reg_pw_27, [base+pw_26] ; 64 - 38 + mova reg_pw_2048, [base+pw_2048] + mova m6, [base+pw_258] ; 64 * 4 + 2 + add r7, r6 + mov maskq, maskmp psubw m6, m0 - add r8, r7 W_MASK_420 0, 4 - lea stride3q, [strideq*3] - jmp r8 - %define dst_bak r8 - %define loop_w r7 - %define orig_w wq + jmp r7 + %define loop_w r7d %else -cglobal w_mask_420, 4, 7, 8, dst, stride, tmp1, tmp2, w, mask, stride3 - tzcnt r6d, r4m - mov wd, w_mask_420_ssse3_table - add wd, [wq+r6*4] +cglobal w_mask_420, 4, 7, 8, dst, stride, tmp1, tmp2, w, mask + tzcnt wd, wm + LEA r6, w_mask_420_ssse3_table + mov wd, [r6+wq*4] mov maskq, r6mp movd m0, r7m pshuflw m0, m0, q0000 ; sign punpcklqdq m0, m0 - mova m6, [pw_258] ; 64 * 4 + 2 + mova m6, [base+pw_258] ; 64 * 4 + 2 + add wq, r6 psubw m6, m0 W_MASK_420 0, 4 - lea stride3q, [strideq*3] jmp wd - %define dst_bak r0m - %define loop_w r6q - %define orig_w r4m - %define hd dword r5m + %define loop_w dword r0m + %define hd dword r5m %endif .w4_loop: add tmp1q, 2*16 add tmp2q, 2*16 W_MASK_420 0, 4 - lea dstq, [dstq+strideq*4] + lea dstq, [dstq+strideq*2] add maskq, 4 .w4: movd [dstq ], m0 ; copy m0[0] pshuflw m1, m0, q1032 movd [dstq+strideq*1], m1 ; copy m0[1] + lea dstq, [dstq+strideq*2] punpckhqdq m0, m0 - movd [dstq+strideq*2], m0 ; copy m0[2] + movd [dstq+strideq*0], m0 ; copy m0[2] psrlq m0, 32 - movd [dstq+stride3q ], m0 ; copy m0[3] + movd [dstq+strideq*1], m0 ; copy m0[3] pshufd m5, m4, q3131; DBDB even lines repeated pshufd m4, m4, q2020; CACA odd lines repeated psubw m1, m6, m4 ; m9 == 64 * 4 + 2 @@ -409,20 +1050,19 @@ cglobal w_mask_420, 4, 7, 8, dst, stride, tmp1, tmp2, w, mask, stride3 jg .w8_loop RET .w16: ; w32/64/128 - mov dst_bak, dstq - mov loop_w, orig_w ; use width as counter %if ARCH_X86_32 - mov wq, orig_w ; because we altered it in 32bit setup + mov wd, wm ; because we altered it in 32bit setup %endif + mov loop_w, wd ; use width as counter jmp .w16ge_inner_loop_first .w16ge_loop: lea tmp1q, [tmp1q+wq*2] ; skip even line pixels lea tmp2q, [tmp2q+wq*2] ; skip even line pixels + sub dstq, wq + mov loop_w, wd lea dstq, [dstq+strideq*2] - mov dst_bak, dstq - mov loop_w, orig_w .w16ge_inner_loop: - W_MASK_420_B 0, 4 + W_MASK_420_B 0, 4 .w16ge_inner_loop_first: mova [dstq ], m0 W_MASK_420_B wq*2, 5 ; load matching even line (offset = widthpx * (16+16)) @@ -438,7 +1078,6 @@ cglobal w_mask_420, 4, 7, 8, dst, stride, tmp1, tmp2, w, mask, stride3 add dstq, 16 sub loop_w, 16 jg .w16ge_inner_loop - mov dstq, dst_bak sub hd, 2 jg .w16ge_loop RET @@ -470,7 +1109,7 @@ cglobal w_mask_420, 4, 7, 8, dst, stride, tmp1, tmp2, w, mask, stride3 cglobal blend, 3, 7, 7, dst, ds, tmp, w, h, mask %define base r6-blend_ssse3_table - lea r6, [blend_ssse3_table] + LEA r6, blend_ssse3_table tzcnt wd, wm movifnidn hd, hm movifnidn maskq, maskmp @@ -546,7 +1185,7 @@ cglobal blend, 3, 7, 7, dst, ds, tmp, w, h, mask cglobal blend_v, 3, 6, 8, dst, ds, tmp, w, h, mask %define base r5-blend_v_ssse3_table - lea r5, [blend_v_ssse3_table] + LEA r5, blend_v_ssse3_table tzcnt wd, wm movifnidn hd, hm movsxd wq, dword [r5+wq*4] @@ -646,15 +1285,21 @@ cglobal blend_v, 3, 6, 8, dst, ds, tmp, w, h, mask jg .w32_loop RET -cglobal blend_h, 4, 7, 6, dst, ds, tmp, w, h, mask -%define base r5-blend_h_ssse3_table - lea r5, [blend_h_ssse3_table] +cglobal blend_h, 3, 7, 6, dst, ds, tmp, w, h, mask +%define base t0-blend_h_ssse3_table +%if ARCH_X86_32 + ; We need to keep the PIC pointer for w4, reload wd from stack instead + DECLARE_REG_TMP 6 +%else + DECLARE_REG_TMP 5 mov r6d, wd - tzcnt wd, wd +%endif + LEA t0, blend_h_ssse3_table + tzcnt wd, wm mov hd, hm - movsxd wq, dword [r5+wq*4] + movsxd wq, dword [t0+wq*4] mova m5, [base+pw_512] - add wq, r5 + add wq, t0 lea maskq, [base+obmc_masks+hq*4] neg hq jmp wq @@ -678,7 +1323,11 @@ cglobal blend_h, 4, 7, 6, dst, ds, tmp, w, h, mask jl .w2 RET .w4: +%if ARCH_X86_32 + mova m3, [base+blend_shuf] +%else mova m3, [blend_shuf] +%endif .w4_loop: movd m0, [dstq+dsq*0] movd m2, [dstq+dsq*1] @@ -716,6 +1365,9 @@ cglobal blend_h, 4, 7, 6, dst, ds, tmp, w, h, mask RET ; w16/w32/w64/w128 .w16: +%if ARCH_X86_32 + mov r6d, wm +%endif sub dsq, r6 .w16_loop0: movd m3, [maskq+hq*2] @@ -735,3 +1387,373 @@ cglobal blend_h, 4, 7, 6, dst, ds, tmp, w, h, mask inc hq jl .w16_loop0 RET + +; emu_edge args: +; const intptr_t bw, const intptr_t bh, const intptr_t iw, const intptr_t ih, +; const intptr_t x, const intptr_t y, pixel *dst, const ptrdiff_t dst_stride, +; const pixel *ref, const ptrdiff_t ref_stride +; +; bw, bh total filled size +; iw, ih, copied block -> fill bottom, right +; x, y, offset in bw/bh -> fill top, left +cglobal emu_edge, 10, 13, 2, bw, bh, iw, ih, x, \ + y, dst, dstride, src, sstride, \ + bottomext, rightext, blk + ; we assume that the buffer (stride) is larger than width, so we can + ; safely overwrite by a few bytes + pxor m1, m1 + +%if ARCH_X86_64 + %define reg_zero r12q + %define reg_tmp r10 + %define reg_src srcq + %define reg_bottomext bottomextq + %define reg_rightext rightextq + %define reg_blkm r9m +%else + %define reg_zero r6 + %define reg_tmp r0 + %define reg_src r1 + %define reg_bottomext r0 + %define reg_rightext r1 + %define reg_blkm r2m +%endif + ; + ; ref += iclip(y, 0, ih - 1) * PXSTRIDE(ref_stride) + xor reg_zero, reg_zero + lea reg_tmp, [ihq-1] + cmp yq, ihq + cmovl reg_tmp, yq + test yq, yq + cmovl reg_tmp, reg_zero +%if ARCH_X86_64 + imul reg_tmp, sstrideq + add srcq, reg_tmp +%else + imul reg_tmp, sstridem + mov reg_src, srcm + add reg_src, reg_tmp +%endif + ; + ; ref += iclip(x, 0, iw - 1) + lea reg_tmp, [iwq-1] + cmp xq, iwq + cmovl reg_tmp, xq + test xq, xq + cmovl reg_tmp, reg_zero + add reg_src, reg_tmp +%if ARCH_X86_32 + mov srcm, reg_src +%endif + ; + ; bottom_ext = iclip(y + bh - ih, 0, bh - 1) +%if ARCH_X86_32 + mov r1, r1m ; restore bh +%endif + lea reg_bottomext, [yq+bhq] + sub reg_bottomext, ihq + lea r3, [bhq-1] + cmovl reg_bottomext, reg_zero + ; + + DEFINE_ARGS bw, bh, iw, ih, x, \ + topext, dst, dstride, src, sstride, \ + bottomext, rightext, blk + + ; top_ext = iclip(-y, 0, bh - 1) + neg topextq + cmovl topextq, reg_zero + cmp reg_bottomext, bhq + cmovge reg_bottomext, r3 + cmp topextq, bhq + cmovg topextq, r3 + %if ARCH_X86_32 + mov r4m, reg_bottomext + ; + ; right_ext = iclip(x + bw - iw, 0, bw - 1) + mov r0, r0m ; restore bw + %endif + lea reg_rightext, [xq+bwq] + sub reg_rightext, iwq + lea r2, [bwq-1] + cmovl reg_rightext, reg_zero + + DEFINE_ARGS bw, bh, iw, ih, leftext, \ + topext, dst, dstride, src, sstride, \ + bottomext, rightext, blk + + ; left_ext = iclip(-x, 0, bw - 1) + neg leftextq + cmovl leftextq, reg_zero + cmp reg_rightext, bwq + cmovge reg_rightext, r2 + %if ARCH_X86_32 + mov r3m, r1 + %endif + cmp leftextq, bwq + cmovge leftextq, r2 + +%undef reg_zero +%undef reg_tmp +%undef reg_src +%undef reg_bottomext +%undef reg_rightext + + DEFINE_ARGS bw, centerh, centerw, dummy, leftext, \ + topext, dst, dstride, src, sstride, \ + bottomext, rightext, blk + + ; center_h = bh - top_ext - bottom_ext +%if ARCH_X86_64 + lea r3, [bottomextq+topextq] + sub centerhq, r3 +%else + mov r1, centerhm ; restore r1 + sub centerhq, topextq + sub centerhq, r4m + mov r1m, centerhq +%endif + ; + ; blk += top_ext * PXSTRIDE(dst_stride) + mov r2, topextq +%if ARCH_X86_64 + imul r2, dstrideq +%else + mov r6, r6m ; restore dstq + imul r2, dstridem +%endif + add dstq, r2 + mov reg_blkm, dstq ; save pointer for ext + ; + ; center_w = bw - left_ext - right_ext + mov centerwq, bwq +%if ARCH_X86_64 + lea r3, [rightextq+leftextq] + sub centerwq, r3 +%else + sub centerwq, r3m + sub centerwq, leftextq +%endif + +; vloop Macro +%macro v_loop 3 ; need_left_ext, need_right_ext, suffix + %if ARCH_X86_64 + %define reg_tmp r12 + %else + %define reg_tmp r0 + %endif +.v_loop_%3: + %if ARCH_X86_32 + mov r0, r0m + mov r1, r1m + %endif +%if %1 + test leftextq, leftextq + jz .body_%3 + ; left extension + %if ARCH_X86_64 + movd m0, [srcq] + %else + mov r3, srcm + movd m0, [r3] + %endif + pshufb m0, m1 + xor r3, r3 +.left_loop_%3: + mova [dstq+r3], m0 + add r3, mmsize + cmp r3, leftextq + jl .left_loop_%3 + ; body +.body_%3: + lea reg_tmp, [dstq+leftextq] +%endif + xor r3, r3 +.body_loop_%3: + %if ARCH_X86_64 + movu m0, [srcq+r3] + %else + mov r1, srcm + movu m0, [r1+r3] + %endif +%if %1 + movu [reg_tmp+r3], m0 +%else + movu [dstq+r3], m0 +%endif + add r3, mmsize + cmp r3, centerwq + jl .body_loop_%3 +%if %2 + ; right extension + %if ARCH_X86_64 + test rightextq, rightextq + %else + mov r1, r3m + test r1, r1 + %endif + jz .body_loop_end_%3 +%if %1 + add reg_tmp, centerwq +%else + lea reg_tmp, [dstq+centerwq] +%endif + %if ARCH_X86_64 + movd m0, [srcq+centerwq-1] + %else + mov r3, srcm + movd m0, [r3+centerwq-1] + %endif + pshufb m0, m1 + xor r3, r3 +.right_loop_%3: + movu [reg_tmp+r3], m0 + add r3, mmsize + %if ARCH_X86_64 + cmp r3, rightextq + %else + cmp r3, r3m + %endif + jl .right_loop_%3 +.body_loop_end_%3: +%endif + %if ARCH_X86_64 + add dstq, dstrideq + add srcq, sstrideq + dec centerhq + jg .v_loop_%3 + %else + add dstq, dstridem + mov r0, sstridem + add srcm, r0 + sub dword centerhm, 1 + jg .v_loop_%3 + mov r0, r0m ; restore r0 + %endif +%endmacro ; vloop MACRO + + test leftextq, leftextq + jnz .need_left_ext + %if ARCH_X86_64 + test rightextq, rightextq + jnz .need_right_ext + %else + cmp leftextq, r3m ; leftextq == 0 + jne .need_right_ext + %endif + v_loop 0, 0, 0 + jmp .body_done + + ;left right extensions +.need_left_ext: + %if ARCH_X86_64 + test rightextq, rightextq + %else + mov r3, r3m + test r3, r3 + %endif + jnz .need_left_right_ext + v_loop 1, 0, 1 + jmp .body_done + +.need_left_right_ext: + v_loop 1, 1, 2 + jmp .body_done + +.need_right_ext: + v_loop 0, 1, 3 + +.body_done: +; r0 ; bw +; r1 ;; x loop +; r4 ;; y loop +; r5 ; topextq +; r6 ;dstq +; r7 ;dstrideq +; r8 ; srcq +%if ARCH_X86_64 + %define reg_dstride dstrideq +%else + %define reg_dstride r2 +%endif + ; + ; bottom edge extension + %if ARCH_X86_64 + test bottomextq, bottomextq + jz .top + %else + xor r1, r1 + cmp r1, r4m + je .top + %endif + ; + %if ARCH_X86_64 + mov srcq, dstq + sub srcq, dstrideq + xor r1, r1 + %else + mov r3, dstq + mov reg_dstride, dstridem + sub r3, reg_dstride + mov srcm, r3 + %endif + ; +.bottom_x_loop: + %if ARCH_X86_64 + mova m0, [srcq+r1] + lea r3, [dstq+r1] + mov r4, bottomextq + %else + mov r3, srcm + mova m0, [r3+r1] + lea r3, [dstq+r1] + mov r4, r4m + %endif + ; +.bottom_y_loop: + mova [r3], m0 + add r3, reg_dstride + dec r4 + jg .bottom_y_loop + add r1, mmsize + cmp r1, bwq + jl .bottom_x_loop + +.top: + ; top edge extension + test topextq, topextq + jz .end +%if ARCH_X86_64 + mov srcq, reg_blkm +%else + mov r3, reg_blkm + mov reg_dstride, dstridem +%endif + mov dstq, dstm + xor r1, r1 + ; +.top_x_loop: +%if ARCH_X86_64 + mova m0, [srcq+r1] +%else + mov r3, reg_blkm + mova m0, [r3+r1] +%endif + lea r3, [dstq+r1] + mov r4, topextq + ; +.top_y_loop: + mova [r3], m0 + add r3, reg_dstride + dec r4 + jg .top_y_loop + add r1, mmsize + cmp r1, bwq + jl .top_x_loop + +.end: + RET + +%undef reg_dstride +%undef reg_blkm +%undef reg_tmp diff --git a/third_party/dav1d/tests/checkasm/x86/checkasm.asm b/third_party/dav1d/tests/checkasm/x86/checkasm.asm index 02468729cee0..ae8d7700e47d 100644 --- a/third_party/dav1d/tests/checkasm/x86/checkasm.asm +++ b/third_party/dav1d/tests/checkasm/x86/checkasm.asm @@ -200,7 +200,7 @@ cglobal checked_call, 1,7 jz .ok mov r3, eax mov r4, edx - lea r0, [error_message] + LEA r0, error_message mov [esp], r0 call fail_func mov edx, r4 diff --git a/toolkit/components/reputationservice/ApplicationReputation.cpp b/toolkit/components/reputationservice/ApplicationReputation.cpp index 641fc1acb707..904b90e0058f 100644 --- a/toolkit/components/reputationservice/ApplicationReputation.cpp +++ b/toolkit/components/reputationservice/ApplicationReputation.cpp @@ -483,7 +483,8 @@ static const char* const kBinaryFileExtensions[] = { ".com", // Windows executable ".command", // Mac script ".cpgz", // Mac archive - ".cpi", // Control Panel Item. Executable used for adding icons to Control Panel + ".cpi", // Control Panel Item. Executable used for adding icons + // to Control Panel //".cpio", ".cpl", // Windows executable ".crt", // Windows signed certificate diff --git a/toolkit/components/telemetry/Scalars.yaml b/toolkit/components/telemetry/Scalars.yaml index f0cfeab88d5b..7b595283d3c9 100644 --- a/toolkit/components/telemetry/Scalars.yaml +++ b/toolkit/components/telemetry/Scalars.yaml @@ -2029,6 +2029,22 @@ idb.type: - 'main' - 'content' +# The following section is for tracking the number of failure for indexedDB. +idb.failure: + fileinfo_error: + bug_numbers: + - 1432133 + description: > + Tracking the number of failure due to missing fileInfo while doing + structured clone. + expires: "70" + kind: uint + notification_emails: + - ttung@mozilla.com + release_channel_collection: opt-out + record_in_processes: + - 'main' + # The following section contains probes that record timestamps. timestamps: first_paint: diff --git a/toolkit/themes/osx/global/shared.inc b/toolkit/themes/osx/global/shared.inc index 8e8df1a26411..952717851531 100644 --- a/toolkit/themes/osx/global/shared.inc +++ b/toolkit/themes/osx/global/shared.inc @@ -16,6 +16,3 @@ %define scopeBarSeparatorBorder 1px solid #888 %define scopeBarTitleColor #6D6D6D -%define toolbarbuttonBackground linear-gradient(#FFF, #ADADAD) repeat-x -%define toolbarbuttonPressedInnerShadow inset rgba(0, 0, 0, 0.3) 0 -6px 10px, inset #000 0 1px 3px, inset rgba(0, 0, 0, 0.2) 0 1px 3px -%define toolbarbuttonInactiveBorderColor rgba(146, 146, 146, 0.84) diff --git a/toolkit/themes/shared/in-content/common.inc.css b/toolkit/themes/shared/in-content/common.inc.css index 03a2be087e19..fa551908f490 100644 --- a/toolkit/themes/shared/in-content/common.inc.css +++ b/toolkit/themes/shared/in-content/common.inc.css @@ -27,7 +27,6 @@ --in-content-border-invalid-shadow: var(--red-50-a30); --in-content-border-color: #d7d7db; --in-content-category-outline-focus: 1px dotted #0a84ff; - --in-content-category-text: rgba(12,12,13); --in-content-category-text-selected: #0a84ff; --in-content-category-text-selected-active: #0060df; --in-content-category-background-hover: rgba(12,12,13,0.1); @@ -591,7 +590,6 @@ xul|*.radio-label-box { *|*#categories > *|*.category { min-height: 48px; -moz-appearance: none; - color: var(--in-content-category-text); margin-inline-start: 34px; padding-inline-end: 10px; padding-inline-start: 10px; diff --git a/widget/gtk/IMContextWrapper.cpp b/widget/gtk/IMContextWrapper.cpp index cc842c2c4ca6..66f1bfd91890 100644 --- a/widget/gtk/IMContextWrapper.cpp +++ b/widget/gtk/IMContextWrapper.cpp @@ -279,7 +279,6 @@ class SelectionStyleProvider final { // formatting of floating-point values. style.AppendFloat(alpha); style.AppendPrintf(");"); - } nscolor selectionBackgroundColor; if (NS_SUCCEEDED( diff --git a/widget/windows/nsSound.cpp b/widget/windows/nsSound.cpp index 3c1fdeee2917..5f0b35abca9d 100644 --- a/widget/windows/nsSound.cpp +++ b/widget/windows/nsSound.cpp @@ -115,13 +115,16 @@ void nsSound::PurgeLastSound() { // Halt any currently playing sound. if (mSoundPlayer) { if (mPlayerThread) { - mPlayerThread->Dispatch(NS_NewRunnableFunction( - "nsSound::PurgeLastSound", [player = std::move(mSoundPlayer)]() { - // Capture move mSoundPlayer to lambda then - // PlaySoundW(nullptr, nullptr, SND_PURGE) will be called before - // freeing the nsSoundPlayer. - ::PlaySoundW(nullptr, nullptr, SND_PURGE); - }), NS_DISPATCH_NORMAL); + mPlayerThread->Dispatch( + NS_NewRunnableFunction("nsSound::PurgeLastSound", + [player = std::move(mSoundPlayer)]() { + // Capture move mSoundPlayer to lambda then + // PlaySoundW(nullptr, nullptr, SND_PURGE) + // will be called before freeing the + // nsSoundPlayer. + ::PlaySoundW(nullptr, nullptr, SND_PURGE); + }), + NS_DISPATCH_NORMAL); } } } diff --git a/xpcom/build/nsXULAppAPI.h b/xpcom/build/nsXULAppAPI.h index 8860e51ee75a..8fd7b3c70cec 100644 --- a/xpcom/build/nsXULAppAPI.h +++ b/xpcom/build/nsXULAppAPI.h @@ -374,8 +374,7 @@ enum GeckoProcessType { }; static const char* const kGeckoProcessTypeString[] = { -#define GECKO_PROCESS_TYPE(enum_name, string_name, xre_name) \ - string_name, +#define GECKO_PROCESS_TYPE(enum_name, string_name, xre_name) string_name, #include "mozilla/GeckoProcessTypes.h" #undef GECKO_PROCESS_TYPE }; diff --git a/xpcom/threads/TimerThread.cpp b/xpcom/threads/TimerThread.cpp index dfc955400f38..8d15b168c0ce 100644 --- a/xpcom/threads/TimerThread.cpp +++ b/xpcom/threads/TimerThread.cpp @@ -729,7 +729,8 @@ already_AddRefed TimerThread::PostTimerEvent( if (!p) { return timer.forget(); } - RefPtr event = ::new (KnownNotNull, p) nsTimerEvent(timer.forget()); + RefPtr event = + ::new (KnownNotNull, p) nsTimerEvent(timer.forget()); nsresult rv; {