generate android pages for sitemap too (#17078)

* generate android pages for sitemap too

* drop list of category pages (list of categories; NOT addons in the categories)

* fix tests (&cron)
This commit is contained in:
Andrew Williamson 2021-05-10 20:06:19 +01:00 коммит произвёл GitHub
Родитель 1a7a7653fa
Коммит 1be720e62c
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
12 изменённых файлов: 172 добавлений и 94 удалений

Просмотреть файл

@ -1,6 +1,5 @@
from datetime import datetime, timedelta from datetime import datetime, timedelta
from django.core.files.storage import default_storage as storage from django.core.files.storage import default_storage as storage
from django.http import HttpRequest
import olympia.core.logger import olympia.core.logger
@ -65,22 +64,12 @@ def gc(test_result=True):
ScannerResult.objects.filter(upload=None, version=None).delete() ScannerResult.objects.filter(upload=None, version=None).delete()
def _setup_default_prefixer():
"""`reverse` depends on a prefixer being set for an app and/or locale in the url,
and for non-requests (i.e. cron) this isn't set up."""
request = HttpRequest()
request.META['SCRIPT_NAME'] = ''
prefixer = amo.urlresolvers.Prefixer(request)
amo.reverse.set_url_prefix(prefixer)
def write_sitemaps(): def write_sitemaps():
_setup_default_prefixer() index_url = get_sitemap_path(None, None)
index_url = get_sitemap_path()
with storage.open(index_url, 'w') as index_file: with storage.open(index_url, 'w') as index_file:
index_file.write(build_sitemap()) index_file.write(build_sitemap(None, None))
for section, page in get_sitemap_section_pages(): for section, app_name, page in get_sitemap_section_pages():
filename = get_sitemap_path(section, page) filename = get_sitemap_path(section, app_name, page)
with storage.open(filename, 'w') as sitemap_file: with storage.open(filename, 'w') as sitemap_file:
content = build_sitemap(section, page) content = build_sitemap(section, app_name, page)
sitemap_file.write(content) sitemap_file.write(content)

Просмотреть файл

@ -1,6 +1,8 @@
from contextlib import contextmanager
from threading import local from threading import local
from django import urls from django import urls
from django.http import HttpRequest
# Get a pointer to Django's reverse and resolve because we're going to hijack # Get a pointer to Django's reverse and resolve because we're going to hijack
@ -59,3 +61,22 @@ def resolve(path, urlconf=None):
# Replace Django's resolve with our own. # Replace Django's resolve with our own.
urls.resolve = resolve urls.resolve = resolve
@contextmanager
def override_url_prefix(*, app_name=None, locale=None):
from olympia.amo.urlresolvers import Prefixer
old_prefixer = get_url_prefix()
request = HttpRequest()
request.META['SCRIPT_NAME'] = ''
new_prefixer = Prefixer(request)
if app_name:
new_prefixer.app = app_name
if locale:
new_prefixer.locale = locale
try:
set_url_prefix(new_prefixer)
yield
finally:
set_url_prefix(old_prefixer)

Просмотреть файл

@ -12,7 +12,7 @@ from django.urls import reverse
from olympia import amo from olympia import amo
from olympia.addons.models import Addon, AddonCategory from olympia.addons.models import Addon, AddonCategory
from olympia.amo.reverse import get_url_prefix from olympia.amo.reverse import get_url_prefix, override_url_prefix
from olympia.amo.templatetags.jinja_helpers import absolutify from olympia.amo.templatetags.jinja_helpers import absolutify
from olympia.constants.categories import CATEGORIES from olympia.constants.categories import CATEGORIES
from olympia.bandwagon.models import Collection from olympia.bandwagon.models import Collection
@ -28,6 +28,7 @@ THEMES_BY_AUTHORS_PAGE_SIZE = 12
class Sitemap(DjangoSitemap): class Sitemap(DjangoSitemap):
limit = 1000 limit = 1000
apps = amo.APP_USAGE
class AddonSitemap(Sitemap): class AddonSitemap(Sitemap):
@ -120,27 +121,32 @@ class AMOSitemap(Sitemap):
# i18n = True # TODO: support all localized urls # i18n = True # TODO: support all localized urls
changefreq = 'always' changefreq = 'always'
lastmod = datetime.datetime.now() lastmod = datetime.datetime.now()
apps = None # because some urls are app-less, we specify per item
def items(self): def items(self):
return [ return [
# frontend pages # frontend pages
'home', ('home', amo.FIREFOX),
'pages.about', ('home', amo.ANDROID),
'pages.review_guide', ('pages.about', None),
'browse.extensions', ('pages.review_guide', None),
'browse.extensions.categories', ('browse.extensions', amo.FIREFOX),
'browse.themes', # TODO: when we add /android, .themes are /firefox only ('browse.themes', amo.FIREFOX),
'browse.themes.categories', ('browse.language-tools', amo.FIREFOX),
'browse.language-tools', # TODO: when we add /android this is /firefox only
# server pages # server pages
'devhub.index', ('devhub.index', None),
'contribute.json', ('contribute.json', None),
'apps.appversions', ('apps.appversions', amo.FIREFOX),
'apps.appversions.rss', ('apps.appversions', amo.ANDROID),
] ]
def location(self, item): def location(self, item):
return reverse(item) urlname, app = item
if app:
with override_url_prefix(app_name=app.short):
return reverse(urlname)
else:
return reverse(urlname)
class CategoriesSitemap(Sitemap): class CategoriesSitemap(Sitemap):
@ -148,6 +154,7 @@ class CategoriesSitemap(Sitemap):
# i18n = True # TODO: support all localized urls # i18n = True # TODO: support all localized urls
changefreq = 'always' changefreq = 'always'
lastmod = datetime.datetime.now() lastmod = datetime.datetime.now()
apps = (amo.FIREFOX,) # category pages aren't supported on android
def items(self): def items(self):
def additems(type): def additems(type):
@ -280,20 +287,27 @@ sitemaps = {
def get_sitemap_section_pages(): def get_sitemap_section_pages():
pages = [] pages = []
for section, site in sitemaps.items(): for section, site in sitemaps.items():
pages.append((section, 1)) if not site.apps:
pages.extend((section, None, page) for page in site.paginator.page_range)
continue
for app in site.apps:
with override_url_prefix(app_name=app.short):
# Add all pages of the sitemap section. # Add all pages of the sitemap section.
for page in range(2, site.paginator.num_pages + 1): pages.extend(
pages.append((section, page)) (section, app.short, page) for page in site.paginator.page_range
)
return pages return pages
def build_sitemap(section=None, page=1): def build_sitemap(section, app_name, page=1):
if not section: if not section:
# its the index # its the index
sitemap_url = reverse('amo.sitemap') sitemap_url = reverse('amo.sitemap')
urls = ( urls = (
f'{sitemap_url}?section={section}' + ('' if page == 1 else f'&p={page}') f'{sitemap_url}?section={section}'
for section, page in get_sitemap_section_pages() + (f'&app_name={app_name}' if app_name else '')
+ (f'&p={page}' if page != 1 else '')
for section, app_name, page in get_sitemap_section_pages()
) )
return loader.render_to_string( return loader.render_to_string(
@ -305,6 +319,7 @@ def build_sitemap(section=None, page=1):
site_url = urlparse(settings.EXTERNAL_SITE_URL) site_url = urlparse(settings.EXTERNAL_SITE_URL)
# Sitemap.get_urls wants a Site instance to get the domain, so just fake it. # Sitemap.get_urls wants a Site instance to get the domain, so just fake it.
site = namedtuple('FakeSite', 'domain')(site_url.netloc) site = namedtuple('FakeSite', 'domain')(site_url.netloc)
with override_url_prefix(app_name=app_name):
xml = loader.render_to_string( xml = loader.render_to_string(
'sitemap.xml', 'sitemap.xml',
{ {
@ -322,11 +337,12 @@ def build_sitemap(section=None, page=1):
) )
def get_sitemap_path(section=None, page=1): def get_sitemap_path(section, app, page=1):
return os.path.join( return os.path.join(
settings.SITEMAP_STORAGE_PATH, settings.SITEMAP_STORAGE_PATH,
'sitemap' 'sitemap'
+ (f'-{section}' if section else '') + (f'-{section}' if section else '')
+ ('' if page == 1 else f'-{page}') + (f'-{app}' if app else '')
+ (f'-{page}' if page != 1 else '')
+ '.xml', + '.xml',
) )

Просмотреть файл

@ -0,0 +1,4 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" xmlns:xhtml="http://www.w3.org/1999/xhtml">
<url><loc>http://testserver/en-US/android/addon/delicious-pierogi/</loc><lastmod>2020-10-02</lastmod><changefreq>daily</changefreq><priority>1</priority></url><url><loc>http://testserver/en-US/android/addon/swanky-curry/</loc><lastmod>2020-10-01</lastmod><changefreq>daily</changefreq><priority>1</priority></url><url><loc>http://testserver/en-US/android/addon/spicy-pierogi/</loc><lastmod>2020-09-30</lastmod><changefreq>daily</changefreq><priority>1</priority></url>
</urlset>

Просмотреть файл

@ -1,3 +1,3 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"> <sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
<sitemap><loc>http://testserver/sitemap.xml?section=amo</loc></sitemap><sitemap><loc>http://testserver/sitemap.xml?section=addons</loc></sitemap><sitemap><loc>http://testserver/sitemap.xml?section=addons&amp;p=2</loc></sitemap></sitemapindex> <sitemap><loc>http://testserver/sitemap.xml?section=amo</loc></sitemap><sitemap><loc>http://testserver/sitemap.xml?section=addons&amp;app_name=firefox</loc></sitemap><sitemap><loc>http://testserver/sitemap.xml?section=addons&amp;app_name=firefox&amp;p=2</loc></sitemap><sitemap><loc>http://testserver/sitemap.xml?section=addons&amp;app_name=android</loc></sitemap><sitemap><loc>http://testserver/sitemap.xml?section=addons&amp;app_name=android&amp;p=2</loc></sitemap></sitemapindex>

Просмотреть файл

@ -117,25 +117,39 @@ def test_write_sitemaps():
sitemaps_dir = settings.SITEMAP_STORAGE_PATH sitemaps_dir = settings.SITEMAP_STORAGE_PATH
assert len(os.listdir(sitemaps_dir)) == 0 assert len(os.listdir(sitemaps_dir)) == 0
write_sitemaps() write_sitemaps()
assert len(os.listdir(sitemaps_dir)) == len(sitemaps) + 1 # 1 is the index assert len(os.listdir(sitemaps_dir)) == (
sum(len(sitemap.apps or ('',)) for sitemap in sitemaps.values())
+ 1 # 1 is the index
)
with open(os.path.join(sitemaps_dir, 'sitemap.xml')) as sitemap: with open(os.path.join(sitemaps_dir, 'sitemap.xml')) as sitemap:
contents = sitemap.read() contents = sitemap.read()
for section in sitemaps: entry = '<sitemap><loc>http://testserver/sitemap.xml?{params}</loc></sitemap>'
for section, sitemap in sitemaps.items():
if not sitemap.apps:
assert entry.format(params=f'section={section}') in contents
else:
for app in sitemap.apps:
assert ( assert (
f'<sitemap><loc>http://testserver/sitemap.xml?section={section}</loc>' entry.format(
in (contents) params=f'section={section}&amp;app_name={app.short}'
)
in contents
) )
with open(os.path.join(sitemaps_dir, 'sitemap-amo.xml')) as sitemap: with open(os.path.join(sitemaps_dir, 'sitemap-amo.xml')) as sitemap:
contents = sitemap.read() contents = sitemap.read()
assert '<url><loc>http://testserver/en-US/about</loc>' in contents assert '<url><loc>http://testserver/en-US/about</loc>' in contents
with open(os.path.join(sitemaps_dir, 'sitemap-addons.xml')) as sitemap: with open(os.path.join(sitemaps_dir, 'sitemap-addons-firefox.xml')) as sitemap:
contents = sitemap.read() contents = sitemap.read()
assert '<url><loc>http://testserver/en-US/firefox/' in contents assert '<url><loc>http://testserver/en-US/firefox/' in contents
with open(os.path.join(sitemaps_dir, 'sitemap-collections.xml')) as sitemap: with open(os.path.join(sitemaps_dir, 'sitemap-addons-android.xml')) as sitemap:
contents = sitemap.read()
assert '<url><loc>http://testserver/en-US/android/' in contents
with open(os.path.join(sitemaps_dir, 'sitemap-collections-firefox.xml')) as sitemap:
contents = sitemap.read() contents = sitemap.read()
assert ( assert (
'<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" ' '<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9" '

Просмотреть файл

@ -120,7 +120,12 @@ def test_addon_sitemap():
def test_amo_sitemap(): def test_amo_sitemap():
sitemap = AMOSitemap() sitemap = AMOSitemap()
for item in sitemap.items(): for item in sitemap.items():
assert sitemap.location(item) == reverse(item) urlname, app = item
assert sitemap.location(item).endswith(reverse(urlname, add_prefix=False))
if app:
assert sitemap.location(item).endswith(
f'/{app.short}{reverse(urlname, add_prefix=False)}'
)
def test_categories_sitemap(): def test_categories_sitemap():
@ -290,21 +295,28 @@ def test_get_sitemap_section_pages():
pages = get_sitemap_section_pages() pages = get_sitemap_section_pages()
assert pages == [ assert pages == [
('amo', 1), ('amo', None, 1),
('addons', 1), ('addons', 'firefox', 1),
('categories', 1), ('addons', 'android', 1),
('collections', 1), ('categories', 'firefox', 1),
('users', 1), ('collections', 'firefox', 1),
('collections', 'android', 1),
('users', 'firefox', 1),
('users', 'android', 1),
] ]
with mock.patch.object(AddonSitemap, 'limit', 5): with mock.patch.object(AddonSitemap, 'limit', 5):
pages = get_sitemap_section_pages() pages = get_sitemap_section_pages()
assert pages == [ assert pages == [
('amo', 1), ('amo', None, 1),
('addons', 1), ('addons', 'firefox', 1),
('addons', 2), ('addons', 'firefox', 2),
('categories', 1), ('addons', 'android', 1),
('collections', 1), ('addons', 'android', 2),
('users', 1), ('categories', 'firefox', 1),
('collections', 'firefox', 1),
('collections', 'android', 1),
('users', 'firefox', 1),
('users', 'android', 1),
] ]
# test the default pagination limit # test the default pagination limit
@ -318,13 +330,18 @@ def test_get_sitemap_section_pages():
with mock.patch.object(AccountSitemap, 'items', items_mock): with mock.patch.object(AccountSitemap, 'items', items_mock):
pages = get_sitemap_section_pages() pages = get_sitemap_section_pages()
assert pages == [ assert pages == [
('amo', 1), ('amo', None, 1),
('addons', 1), ('addons', 'firefox', 1),
('categories', 1), ('addons', 'android', 1),
('collections', 1), ('categories', 'firefox', 1),
('users', 1), ('collections', 'firefox', 1),
('users', 2), ('collections', 'android', 1),
('users', 3), ('users', 'firefox', 1),
('users', 'firefox', 2),
('users', 'firefox', 3),
('users', 'android', 1),
('users', 'android', 2),
('users', 'android', 3),
] ]
@ -332,11 +349,13 @@ def test_build_sitemap():
# test the index sitemap build first # test the index sitemap build first
with mock.patch('olympia.amo.sitemap.get_sitemap_section_pages') as pages_mock: with mock.patch('olympia.amo.sitemap.get_sitemap_section_pages') as pages_mock:
pages_mock.return_value = [ pages_mock.return_value = [
('amo', 1), ('amo', None, 1),
('addons', 1), ('addons', 'firefox', 1),
('addons', 2), ('addons', 'firefox', 2),
('addons', 'android', 1),
('addons', 'android', 2),
] ]
built = build_sitemap() built = build_sitemap(section=None, app_name=None)
with open(os.path.join(TEST_SITEMAPS_DIR, 'sitemap.xml')) as sitemap: with open(os.path.join(TEST_SITEMAPS_DIR, 'sitemap.xml')) as sitemap:
assert built == sitemap.read() assert built == sitemap.read()
@ -356,15 +375,24 @@ def test_build_sitemap():
] ]
with mock.patch.object(AddonSitemap, 'items', items_mock): with mock.patch.object(AddonSitemap, 'items', items_mock):
built = build_sitemap('addons') firefox_built = build_sitemap('addons', 'firefox')
with open(os.path.join(TEST_SITEMAPS_DIR, 'sitemap-addons-2.xml')) as sitemap: firefox_file = os.path.join(TEST_SITEMAPS_DIR, 'sitemap-addons-firefox-2.xml')
assert built == sitemap.read() with open(firefox_file) as sitemap:
assert firefox_built == sitemap.read()
android_built = build_sitemap('addons', 'android')
android_file = os.path.join(TEST_SITEMAPS_DIR, 'sitemap-addons-android.xml')
with open(android_file) as sitemap:
assert android_built == sitemap.read()
def test_get_sitemap_path(): def test_get_sitemap_path():
path = settings.SITEMAP_STORAGE_PATH path = settings.SITEMAP_STORAGE_PATH
assert get_sitemap_path() == f'{path}/sitemap.xml' assert get_sitemap_path(None, None) == f'{path}/sitemap.xml'
assert get_sitemap_path('foo') == f'{path}/sitemap-foo.xml' assert get_sitemap_path('foo', None) == f'{path}/sitemap-foo.xml'
assert get_sitemap_path('foo', 1) == f'{path}/sitemap-foo.xml' assert get_sitemap_path('foo', 'bar') == f'{path}/sitemap-foo-bar.xml'
assert get_sitemap_path('foo', 2) == f'{path}/sitemap-foo-2.xml' assert get_sitemap_path('foo', None, 1) == f'{path}/sitemap-foo.xml'
assert get_sitemap_path('foo', None, 2) == f'{path}/sitemap-foo-2.xml'
assert get_sitemap_path('foo', 'bar', 1) == f'{path}/sitemap-foo-bar.xml'
assert get_sitemap_path('foo', 'bar', 2) == f'{path}/sitemap-foo-bar-2.xml'

Просмотреть файл

@ -551,7 +551,7 @@ class TestSitemap(TestCase):
) )
# a section with more than one page # a section with more than one page
result = self.client.get('/sitemap.xml?section=addons&p=2') result = self.client.get('/sitemap.xml?section=addons&app_name=firefox&p=2')
assert result.status_code == 200 assert result.status_code == 200
assert result.get('Content-Type') == 'application/xml' assert result.get('Content-Type') == 'application/xml'
assert ( assert (
@ -559,6 +559,15 @@ class TestSitemap(TestCase):
in result.getvalue() in result.getvalue()
) )
# and for android
result = self.client.get('/sitemap.xml?section=addons&app_name=android')
assert result.status_code == 200
assert result.get('Content-Type') == 'application/xml'
assert (
b'<loc>http://testserver/en-US/android/addon/delicious-pierogi/</loc>'
in result.getvalue()
)
@override_settings(SITEMAP_STORAGE_PATH=TEST_SITEMAPS_DIR) @override_settings(SITEMAP_STORAGE_PATH=TEST_SITEMAPS_DIR)
def test_headers(self): def test_headers(self):
file_timestamp = round(datetime.datetime.now().timestamp() - (60 * 60)) file_timestamp = round(datetime.datetime.now().timestamp() - (60 * 60))
@ -603,7 +612,7 @@ class TestSitemap(TestCase):
) )
# a section # a section
result = self.client.get('/sitemap.xml?section=addons&debug') result = self.client.get('/sitemap.xml?section=addons&app_name=firefox&debug')
assert result.status_code == 200 assert result.status_code == 200
assert result.get('Content-Type') == 'application/xml' assert result.get('Content-Type') == 'application/xml'
# there aren't any addons so no content # there aren't any addons so no content

Просмотреть файл

@ -193,12 +193,13 @@ def sitemap(request):
modified_timestamp = None modified_timestamp = None
section = request.GET.get('section') # no section means the index page section = request.GET.get('section') # no section means the index page
app = request.GET.get('app_name')
page = request.GET.get('p', 1) page = request.GET.get('p', 1)
if 'debug' in request.GET and settings.SITEMAP_DEBUG_AVAILABLE: if 'debug' in request.GET and settings.SITEMAP_DEBUG_AVAILABLE:
content = build_sitemap(section, page) content = build_sitemap(section, app, page)
response = HttpResponse(content, content_type='application/xml') response = HttpResponse(content, content_type='application/xml')
else: else:
path = get_sitemap_path(section, page) path = get_sitemap_path(section, app, page)
try: try:
content = storage.open(path) # FileResponse closes files after consuming content = storage.open(path) # FileResponse closes files after consuming
modified_timestamp = os_stat(path).st_mtime modified_timestamp = os_stat(path).st_mtime

Просмотреть файл

@ -19,10 +19,6 @@ urlpatterns = [
frontend_view, frontend_view,
name='browse.extensions', name='browse.extensions',
), ),
re_path(r'^themes/categories/$', frontend_view, name='browse.themes.categories'),
re_path(
r'^extensions/categories/$', frontend_view, name='browse.extensions.categories'
),
re_path( re_path(
r'^search-tools/(?P<category>[^/]+)?$', r'^search-tools/(?P<category>[^/]+)?$',
frontend_view, frontend_view,