Bug 1382332: Rm duplicate www. hosts from Highlights. r=liuche

MozReview-Commit-ID: L1F6ZuEsjwk

--HG--
extra : rebase_source : be57f0b3618d33ec34e23e3b5c7e343aca65425e
This commit is contained in:
Michael Comella 2017-08-10 17:12:15 -07:00
Родитель 39ca30c0aa
Коммит 6d4834baf0
2 изменённых файлов: 103 добавлений и 4 удалений

Просмотреть файл

@ -6,16 +6,23 @@
package org.mozilla.gecko.activitystream.ranking;
import android.database.Cursor;
import android.net.Uri;
import android.support.annotation.VisibleForTesting;
import android.text.TextUtils;
import android.util.Log;
import android.util.SparseArray;
import org.mozilla.gecko.activitystream.homepanel.model.Highlight;
import org.mozilla.gecko.util.MapUtils;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static android.R.attr.filter;
import static java.util.Collections.sort;
import static org.mozilla.gecko.activitystream.ranking.HighlightCandidate.FEATURE_AGE_IN_DAYS;
import static org.mozilla.gecko.activitystream.ranking.HighlightCandidate.FEATURE_BOOKMARK_AGE_IN_MILLISECONDS;
@ -47,6 +54,8 @@ import static org.mozilla.gecko.activitystream.ranking.RankingUtils.mapWithLimit
public class HighlightsRanking {
private static final String LOG_TAG = "HighlightsRanking";
private static final String WWW = "www.";
/** An array of all the features that are weighted while scoring. */
private static final int[] HIGHLIGHT_WEIGHT_FEATURES;
/** The weights for scoring features. */
@ -240,17 +249,54 @@ public class HighlightsRanking {
}
/**
* Remove candidates that are pointing to the same host.
* Remove candidates that are pointing to the same host, with special restrictions for "www." hosts.
*/
@VisibleForTesting static void dedupeSites(List<HighlightCandidate> candidates) {
final Set<String> knownHosts = new HashSet<String>();
final Map<String, HighlightCandidate> knownHostToHighlightCandidate = new HashMap<>();
final List<HighlightCandidate> wwwHighlightCandidates = new ArrayList<>();
filter(candidates, new Func1<HighlightCandidate, Boolean>() {
@Override
public Boolean call(HighlightCandidate candidate) {
return knownHosts.add(candidate.getHost());
final String host = candidate.getHost();
if (!TextUtils.isEmpty(host) && host.startsWith(WWW)) {
// Process "www." hosts later.
wwwHighlightCandidates.add(candidate);
return false;
}
return MapUtils.putIfAbsent(knownHostToHighlightCandidate, host, candidate) == null;
}
});
// Include "www." hosts only if (see ifs below):
for (final HighlightCandidate wwwCandidate : wwwHighlightCandidates) {
final String wwwCandidateHostNoWWW = wwwCandidate.getHost().substring(WWW.length()); // non-null b/c we check above.
final HighlightCandidate knownCandidate = knownHostToHighlightCandidate.get(wwwCandidateHostNoWWW);
// The same host without "www." does not exist.
if (knownCandidate == null) {
candidates.add(wwwCandidate);
return;
}
// Or if the same host exists, the paths differ.
final String wwwCandidateURLStr = wwwCandidate.getUrl();
final String correspondingCandidateURLStr = knownCandidate.getUrl();
if (wwwCandidateURLStr == null || correspondingCandidateURLStr == null) { // cannot be passed to Uri.parse.
// Error: could not create Uri. Let's keep the url to give the address the benefit of the doubt.
candidates.add(wwwCandidate);
return;
}
final String wwwCandidatePath = Uri.parse(wwwCandidateURLStr).getPath();
final String correspondingCandidatePath = Uri.parse(correspondingCandidateURLStr).getPath();
if (wwwCandidatePath != null && correspondingCandidatePath != null &&
!wwwCandidatePath.equals(correspondingCandidatePath)) {
candidates.add(wwwCandidate);
return;
}
}
}
/**

Просмотреть файл

@ -3,15 +3,19 @@
package org.mozilla.gecko.activitystream.ranking;
import android.net.Uri;
import junit.framework.Assert;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mozilla.gecko.background.testhelpers.TestRunner;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
@RunWith(TestRunner.class)
public class TestHighlightsRanking {
@Test
@ -82,4 +86,53 @@ public class TestHighlightsRanking {
candidate.updateScore(score);
return candidate;
}
@Test
public void testDedupeSitesRemoveWWWDupeSiteWithNoPath() {
final HighlightCandidate maintainedCandidate = mockCandidate("http://feedly.com");
final List<HighlightCandidate> candidates = new ArrayList<>(Arrays.asList(
mockCandidate("http://www.feedly.com"),
maintainedCandidate
));
HighlightsRanking.dedupeSites(candidates);
Assert.assertEquals("Expected www. site to be removed", 1, candidates.size());
Assert.assertEquals("Expected remaining candidate to be non-www candidate.", maintainedCandidate, candidates.get(0));
}
@Test
public void testDedupeSitesRemoveWWWDupeSiteWithSamePath() {
final HighlightCandidate maintainedCandidate = mockCandidate("http://feedly.com/feed/whatever");
final List<HighlightCandidate> candidates = new ArrayList<>(Arrays.asList(
mockCandidate("http://www.feedly.com/feed/whatever"),
maintainedCandidate
));
HighlightsRanking.dedupeSites(candidates);
Assert.assertEquals("Expected www. site to be removed", 1, candidates.size());
Assert.assertEquals("Expected remaining candidate to be non-www candidate.", maintainedCandidate, candidates.get(0));
}
@Test
public void testDedupeSitesKeepWWWDupeSiteWithDifferentPath() {
final List<HighlightCandidate> candidates = new ArrayList<>(Arrays.asList(
mockCandidate("http://www.feedly.com/home"),
mockCandidate("http://feedly.com/feed")
));
HighlightsRanking.dedupeSites(candidates);
Assert.assertEquals("Expected no candidates to be removed", 2, candidates.size());
}
private HighlightCandidate mockCandidate(final String urlStr) {
final String host = Uri.parse(urlStr).getHost();
final HighlightCandidate mock = mock(HighlightCandidate.class);
when(mock.getUrl()).thenReturn(urlStr);
when(mock.getHost()).thenReturn(host);
return mock;
}
}