Remove properties from entries, and do not require entries to calculate file-level properties (#14043)

This commit is contained in:
Bob Silverberg 2020-04-28 11:42:27 -04:00 коммит произвёл GitHub
Родитель e077b8bea7
Коммит bfd3b046af
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
4 изменённых файлов: 114 добавлений и 119 удалений

Просмотреть файл

@ -172,16 +172,14 @@ This endpoint allows you to browse through the contents of an Add-on version.
:>json string file.mimetype: The determined mimetype of the selected file or ``application/octet-stream`` if none could be determined.
:>json string file.sha256: SHA256 hash of the selected file.
:>json int file.size: The size of the selected file in bytes.
:>json string file.filename: The filename of the file.
:>json string file.mime_category: The mime type category of this file. Can be ``image``, ``directory``, ``text`` or ``binary``.
:>json boolean uses_unknown_minified_code: Indicates that the selected file could be using minified code.
:>json array file.entries[]: The complete file-tree of the extracted XPI.
:>json int file.entries[].depth: Level of folder-tree depth, starting with 0.
:>json string file.entries[].filename: The filename of the file.
:>json string file.entries[].path: The absolute path (from the root of the XPI) of the file.
:>json string|null file.entries[].sha256: SHA256 hash. This is only set for the currently selected file.
:>json string file.entries[].mimetype: The determined mimetype of the file or ``application/octet-stream`` if none could be determined.
:>json string file.entries[].mime_category: The mime type category of this file. Can be ``image``, ``directory``, ``text`` or ``binary``.
:>json int file.entries[].size: The size in bytes.
:>json string file.entries[].modified: The exact time of the commit, should be equivalent with ``created``.
-------
@ -212,11 +210,7 @@ This endpoint allows you to compare two Add-on versions with each other.
:>json int|null file.entries[].depth: Level of folder-tree depth, starting with 0.
:>json string file.entries[].filename: The filename of the file.
:>json string file.entries[].path: The absolute path (from the root of the XPI) of the file.
:>json string|null file.entries[].sha256: SHA256 hash. This is only set for the currently selected file. It may also be `null` for deleted files.
:>json string|null file.entries[].mimetype: The determined mimetype of the file or ``application/octet-stream`` if none could be determined. Can be ``null`` in case of a deleted file.
:>json string|null file.entries[].mime_category: The mime type category of this file. Can be ``image``, ``directory``, ``text`` or ``binary``.
:>json int|null file.entries[].size: The size in bytes.
:>json string|null file.entries[].modified: The exact time of the commit, should be equivalent with ``created``.
:>json object|null diff: See the following output with inline comments for a complete description.
:>json object base_file: The file attached to the base version you're comparing against.
:>json object base_file.id: The id of the base file.

Просмотреть файл

@ -4,7 +4,6 @@ import mimetypes
import pathlib
import json
from collections import OrderedDict
from datetime import datetime, timezone, timedelta
import pygit2
@ -48,6 +47,8 @@ class AddonReviewerFlagsSerializer(serializers.ModelSerializer):
)
# NOTE: Because of caching, this serializer cannot be reused and must be
# created for each file. It cannot be used with DRF's many=True option.
class FileEntriesSerializer(FileSerializer):
content = serializers.SerializerMethodField()
uses_unknown_minified_code = serializers.SerializerMethodField()
@ -57,11 +58,14 @@ class FileEntriesSerializer(FileSerializer):
mimetype = serializers.SerializerMethodField()
sha256 = serializers.SerializerMethodField()
size = serializers.SerializerMethodField()
mime_category = serializers.SerializerMethodField()
filename = serializers.SerializerMethodField()
class Meta:
fields = FileSerializer.Meta.fields + (
'content', 'entries', 'selected_file', 'download_url',
'uses_unknown_minified_code', 'mimetype', 'sha256', 'size'
'uses_unknown_minified_code', 'mimetype', 'sha256', 'size',
'mime_category', 'filename'
)
model = File
@ -80,14 +84,37 @@ class FileEntriesSerializer(FileSerializer):
return self.parent.instance.current_file
return self.instance
def _get_commit(self, file_obj):
@cached_property
def commit(self):
"""Return the pygit2 repository instance, preselect correct channel."""
# Caching the commit to avoid calling revparse_single many times.
try:
return self.git_repo.revparse_single(file_obj.version.git_hash)
return self.git_repo.revparse_single(
self.get_instance().version.git_hash)
except pygit2.InvalidSpecError:
raise NotFound(
'Couldn\'t find the requested version in git-repository')
@cached_property
def tree(self):
# Caching the tree to avoid calling get_root_tree many times.
return self.repo.get_root_tree(self.commit)
def _get_blob_for_selected_file(self, obj):
"""Returns the blob and filename for the selected file.
Returns (None, None) if the selected file is not a blob.
"""
tree = self.tree
selected_file = self.get_selected_file(obj)
if selected_file in tree:
blob_or_tree = tree[selected_file]
if blob_or_tree.type == pygit2.GIT_OBJ_BLOB:
return (self.git_repo[blob_or_tree.oid], blob_or_tree.name)
return (None, None)
def _get_hash_for_selected_file(self, obj):
selected_file = self.get_selected_file(obj)
@ -98,8 +125,8 @@ class FileEntriesSerializer(FileSerializer):
if _entries and _entries[selected_file]['sha256']:
return _entries[selected_file]['sha256']
commit = self._get_commit(obj)
tree = self.repo.get_root_tree(commit)
commit = self.commit
blob, name = self._get_blob_for_selected_file(obj)
# Normalize the key as we want to avoid that we exceed max
# key lengh because of selected_file.
@ -110,20 +137,14 @@ class FileEntriesSerializer(FileSerializer):
normalize=True)
def _calculate_hash():
try:
blob_or_tree = tree[selected_file]
except KeyError:
if blob is None:
return None
if blob_or_tree.type == pygit2.GIT_OBJ_TREE:
return None
blob = self.git_repo[blob_or_tree.oid]
return get_sha256(io.BytesIO(memoryview(blob)))
return cache.get_or_set(cache_key, _calculate_hash, 60 * 60 * 24)
def get_entries(self, obj):
def _get_entries(self, obj):
# Given that this is a very expensive operation we have a two-fold
# cache, one that is stored on this instance for very-fast retrieval
# to support other method calls on this serializer
@ -131,23 +152,17 @@ class FileEntriesSerializer(FileSerializer):
if hasattr(self, '_entries'):
return self._entries
commit = self._get_commit(obj)
commit = self.commit
result = OrderedDict()
def _fetch_entries():
tree = self.repo.get_root_tree(commit)
tree = self.tree
for entry_wrapper in self.repo.iter_tree(tree):
entry = entry_wrapper.tree_entry
path = force_text(entry_wrapper.path)
blob = entry_wrapper.blob
commit_tzinfo = timezone(timedelta(
minutes=commit.commit_time_offset))
commit_time = datetime.fromtimestamp(
float(commit.commit_time),
commit_tzinfo)
mimetype, entry_mime_category = get_mime_type_for_blob(
tree_or_blob=entry.type, name=entry.name, blob=blob)
@ -159,7 +174,6 @@ class FileEntriesSerializer(FileSerializer):
'mimetype': mimetype,
'path': path,
'size': blob.size if blob is not None else None,
'modified': commit_time,
}
return result
@ -177,20 +191,43 @@ class FileEntriesSerializer(FileSerializer):
return self._entries
def get_entries(self, obj):
entries = self._get_entries(obj)
return self._trim_entries(entries)
def _trim_entries(self, entries):
result = OrderedDict()
for value in entries.values():
result[value['path']] = self._trim_entry(value)
return result
def _trim_entry(self, entry):
return {key: entry[key] for key in (
'depth', 'filename', 'mime_category', 'path', 'status'
) if key in entry}
def get_mimetype(self, obj):
entries = self.get_entries(obj)
entries = self._get_entries(obj)
return entries[self.get_selected_file(obj)]['mimetype']
def get_sha256(self, obj):
return self._get_hash_for_selected_file(obj)
def get_size(self, obj):
entries = self.get_entries(obj)
entries = self._get_entries(obj)
return entries[self.get_selected_file(obj)]['size']
def get_filename(self, obj):
entries = self._get_entries(obj)
return entries[self.get_selected_file(obj)]['filename']
def get_mime_category(self, obj):
entries = self._get_entries(obj)
return entries[self.get_selected_file(obj)]['mime_category']
def get_selected_file(self, obj):
requested_file = self.context.get('file', None)
files = self.get_entries(obj)
files = self._get_entries(obj)
if requested_file is None:
default_files = ('manifest.json', 'install.rdf', 'package.json')
@ -209,15 +246,10 @@ class FileEntriesSerializer(FileSerializer):
return requested_file
def get_content(self, obj):
commit = self._get_commit(obj)
tree = self.repo.get_root_tree(commit)
selected_file = self.get_selected_file(obj)
blob_or_tree = tree[selected_file]
if blob_or_tree.type == pygit2.GIT_OBJ_BLOB:
blob = self.git_repo[blob_or_tree.oid]
blob, name = self._get_blob_for_selected_file(obj)
if blob is not None:
mimetype, mime_category = get_mime_type_for_blob(
tree_or_blob='blob', name=blob_or_tree.name, blob=blob)
tree_or_blob='blob', name=name, blob=blob)
# Only return the raw data if we detect a file that contains text
# data that actually can be rendered.
@ -245,26 +277,18 @@ class FileEntriesSerializer(FileSerializer):
return self.get_selected_file(obj) in minified_files
def get_download_url(self, obj):
commit = self._get_commit(obj)
tree = self.repo.get_root_tree(commit)
selected_file = self.get_selected_file(obj)
blob, name = self._get_blob_for_selected_file(obj)
if blob is not None:
return absolutify(reverse(
'reviewers.download_git_file',
kwargs={
'version_id': self.get_instance().version.pk,
'filename': selected_file
}
))
try:
blob_or_tree = tree[selected_file]
except KeyError:
# This can happen when the file has been deleted.
return None
if blob_or_tree.type == pygit2.GIT_OBJ_TREE:
return None
return absolutify(reverse(
'reviewers.download_git_file',
kwargs={
'version_id': self.get_instance().version.pk,
'filename': selected_file
}
))
return None
class MinimalVersionSerializerWithChannel(MinimalVersionSerializer):
@ -329,7 +353,7 @@ class FileEntriesDiffSerializer(FileEntriesSerializer):
fields = FileSerializer.Meta.fields + (
'diff', 'entries', 'selected_file', 'download_url',
'uses_unknown_minified_code', 'base_file',
'sha256', 'size', 'mimetype'
'sha256', 'size', 'mimetype', 'mime_category', 'filename'
)
model = File
@ -351,8 +375,8 @@ class FileEntriesDiffSerializer(FileEntriesSerializer):
# See: https://github.com/mozilla/addons-server/issues/11392
return next(iter(diff), None)
def get_entries(self, obj):
"""Overwrite `FileEntriesSerializer.get_entries to inject
def _get_entries(self, obj):
"""Overwrite `FileEntriesSerializer._get_entries to inject
added/removed/changed information.
"""
@ -367,7 +391,7 @@ class FileEntriesDiffSerializer(FileEntriesSerializer):
parent=parent,
pathspec=None)
entries = super().get_entries(obj)
entries = super()._get_entries(obj)
# All files have a "unmodified" status by default
for path, value in entries.items():
@ -395,7 +419,6 @@ class FileEntriesDiffSerializer(FileEntriesSerializer):
'mimetype': mime,
'path': path,
'size': None,
'modified': None,
}
# Now we can set the git-status.
@ -420,7 +443,6 @@ class FileEntriesDiffSerializer(FileEntriesSerializer):
'mimetype': 'application/octet-stream',
'path': parent,
'size': None,
'modified': None,
}
return entries

Просмотреть файл

@ -1,8 +1,6 @@
# -*- coding: utf-8 -*-
import json
from datetime import datetime
from django.core.cache import cache
from rest_framework.exceptions import NotFound
@ -49,6 +47,8 @@ class TestFileEntriesSerializer(TestCase):
return self.get_serializer(obj, **extra_context).data
def test_basic(self):
expected_file_type = 'text'
expected_filename = 'manifest.json'
expected_mimetype = 'application/json'
expected_sha256 = (
'71d4122c0f2f78e089136602f88dbf590f2fa04bb5bc417454bf21446d6cb4f0'
@ -81,6 +81,8 @@ class TestFileEntriesSerializer(TestCase):
assert data['mimetype'] == expected_mimetype
assert data['sha256'] == expected_sha256
assert data['size'] == expected_size
assert data['mime_category'] == expected_file_type
assert data['filename'] == expected_filename
assert set(data['entries'].keys()) == {
'README.md',
@ -95,25 +97,16 @@ class TestFileEntriesSerializer(TestCase):
manifest_data = data['entries']['manifest.json']
assert manifest_data['depth'] == 0
assert manifest_data['filename'] == u'manifest.json'
assert manifest_data['sha256'] == expected_sha256
assert manifest_data['mimetype'] == expected_mimetype
assert manifest_data['mime_category'] == 'text'
assert manifest_data['filename'] == expected_filename
assert manifest_data['mime_category'] == expected_file_type
assert manifest_data['path'] == u'manifest.json'
assert manifest_data['size'] == expected_size
assert isinstance(manifest_data['modified'], datetime)
ja_locale_data = data['entries']['_locales/ja']
assert ja_locale_data['depth'] == 1
assert ja_locale_data['mime_category'] == 'directory'
assert ja_locale_data['filename'] == 'ja'
assert ja_locale_data['sha256'] is None
assert ja_locale_data['mimetype'] == 'application/octet-stream'
assert ja_locale_data['path'] == u'_locales/ja'
assert ja_locale_data['size'] is None
assert isinstance(ja_locale_data['modified'], datetime)
assert '"manifest_version": 2' in data['content']
assert 'id": "notify-link-clicks-i18n@notzilla.org' in data['content']
@ -152,6 +145,8 @@ class TestFileEntriesSerializer(TestCase):
'b48e66c02fe62dd47521def7c5ea11b86af91b94c23cfdf67592e1053952ed55'
)
assert data['size'] == 136
assert data['mime_category'] == 'text'
assert data['filename'] == 'LICENSE'
def test_requested_file_with_non_existent_file(self):
file = self.addon.current_version.current_file
@ -185,9 +180,9 @@ class TestFileEntriesSerializer(TestCase):
# start serialization
data = serializer.data
commit = serializer._get_commit(file)
commit = serializer.commit
assert serializer._entries == data['entries']
assert serializer._trim_entries(serializer._entries) == data['entries']
key = 'reviewers:fileentriesserializer:entries:{}'.format(commit.hex)
cached_data = cache.get(key)
@ -207,7 +202,8 @@ class TestFileEntriesSerializer(TestCase):
'content-script.js', 'icons', 'icons/LICENSE', 'icons/link-48.png'}
for key in expected_keys:
assert cached_data[key] == data['entries'][key]
assert serializer._trim_entry(
cached_data[key]) == data['entries'][key]
def test_dont_render_content_binary_file(self):
file = self.addon.current_version.current_file
@ -216,17 +212,18 @@ class TestFileEntriesSerializer(TestCase):
def test_sha256_only_calculated_or_fetched_for_selected_file(self):
file = self.addon.current_version.current_file
serializer = self.get_serializer(file, file='icons/LICENSE')
serializer.data
data = self.serialize(file, file='icons/LICENSE')
assert data['entries']['manifest.json']['sha256'] is None
assert data['entries']['icons/LICENSE']['sha256'] == (
assert serializer._entries['manifest.json']['sha256'] is None
assert serializer._entries['icons/LICENSE']['sha256'] == (
'b48e66c02fe62dd47521def7c5ea11b86af91b94c23cfdf67592e1053952ed55')
data = self.serialize(file, file='manifest.json')
assert data['entries']['manifest.json']['sha256'] == (
serializer = self.get_serializer(file, file='manifest.json')
serializer.data
assert serializer._entries['manifest.json']['sha256'] == (
'71d4122c0f2f78e089136602f88dbf590f2fa04bb5bc417454bf21446d6cb4f0')
assert data['entries']['icons/LICENSE']['sha256'] is None
assert serializer._entries['icons/LICENSE']['sha256'] is None
def test_uses_unknown_minified_code(self):
validation_data = {
@ -293,6 +290,8 @@ class TestFileEntriesDiffSerializer(TestCase):
return self.get_serializer(obj, **extra_context).data
def test_basic(self):
expected_file_type = 'text'
expected_filename = 'manifest.json'
expected_mimetype = 'application/json'
expected_sha256 = (
'bf9b0744c0011cad5caa55236951eda523f17676e91353a64a32353eac798631'
@ -342,6 +341,8 @@ class TestFileEntriesDiffSerializer(TestCase):
assert data['mimetype'] == expected_mimetype
assert data['sha256'] == expected_sha256
assert data['size'] == expected_size
assert data['mime_category'] == expected_file_type
assert data['filename'] == expected_filename
assert set(data['entries'].keys()) == {
'manifest.json', 'README.md', 'test.txt'}
@ -354,24 +355,17 @@ class TestFileEntriesDiffSerializer(TestCase):
# Unmodified file
manifest_data = data['entries']['manifest.json']
assert manifest_data['depth'] == 0
assert manifest_data['filename'] == u'manifest.json'
assert manifest_data['sha256'] == expected_sha256
assert manifest_data['mimetype'] == expected_mimetype
assert manifest_data['mime_category'] == 'text'
assert manifest_data['filename'] == expected_filename
assert manifest_data['mime_category'] == expected_file_type
assert manifest_data['path'] == u'manifest.json'
assert manifest_data['size'] == expected_size
assert manifest_data['status'] == ''
assert isinstance(manifest_data['modified'], datetime)
# Added a new file
test_txt_data = data['entries']['test.txt']
assert test_txt_data['depth'] == 0
assert test_txt_data['filename'] == u'test.txt'
assert test_txt_data['sha256'] is None
assert test_txt_data['mimetype'] == 'text/plain'
assert test_txt_data['mime_category'] == 'text'
assert test_txt_data['path'] == u'test.txt'
assert test_txt_data['size'] == 18
assert test_txt_data['status'] == 'A'
# Deleted file
@ -379,16 +373,14 @@ class TestFileEntriesDiffSerializer(TestCase):
assert readme_data['status'] == 'D'
assert readme_data['depth'] == 0
assert readme_data['filename'] == 'README.md'
assert readme_data['sha256'] is None
# Not testing mimetype as text/markdown is missing in travis mimetypes
# database. But it doesn't matter much here since we're primarily
# after the git status.
assert readme_data['mime_category'] is None
assert readme_data['path'] == u'README.md'
assert readme_data['size'] is None
assert readme_data['modified'] is None
def test_serialize_deleted_file(self):
expected_filename = 'manifest.json'
parent_version = self.addon.current_version
new_version = version_factory(
addon=self.addon, file_kw={
@ -398,7 +390,7 @@ class TestFileEntriesDiffSerializer(TestCase):
)
repo = AddonGitRepository.extract_and_commit_from_version(new_version)
apply_changes(repo, new_version, '', 'manifest.json', delete=True)
apply_changes(repo, new_version, '', expected_filename, delete=True)
file = self.addon.current_version.current_file
data = self.serialize(file, parent_version=parent_version)
@ -410,6 +402,8 @@ class TestFileEntriesDiffSerializer(TestCase):
assert data['mimetype'] == 'application/json'
assert data['sha256'] is None
assert data['size'] is None
assert data['mime_category'] is None
assert data['filename'] == expected_filename
def test_recreate_parent_dir_of_deleted_file(self):
addon, repo, parent_version, new_version = \
@ -431,12 +425,8 @@ class TestFileEntriesDiffSerializer(TestCase):
parent = entries_by_file[parent_dir]
assert parent['depth'] == 0
assert parent['filename'] == parent_dir
assert parent['sha256'] is None
assert parent['mime_category'] == 'directory'
assert parent['mimetype'] == 'application/octet-stream'
assert parent['path'] == parent_dir
assert parent['size'] is None
assert parent['modified'] is None
def test_recreate_nested_parent_dir_of_deleted_file(self):
addon, repo, parent_version, new_version = \
@ -517,11 +507,7 @@ class TestFileEntriesDiffSerializer(TestCase):
parent = entries_by_file[parent_dir]
assert parent['mime_category'] == 'directory'
assert parent['mimetype'] == 'application/octet-stream'
assert parent['path'] == parent_dir
# Since the directory is returned from git, it will have a real
# modified timestamp.
assert parent['modified'] is not None
def test_expose_grandparent_dir_deleted_subfolders(self):
addon, repo, parent_version, new_version = \
@ -547,7 +533,6 @@ class TestFileEntriesDiffSerializer(TestCase):
parent = entries_by_file[grandparent_dir]
assert parent['mime_category'] == 'directory'
assert parent['mimetype'] == 'application/octet-stream'
assert parent['path'] == grandparent_dir
assert parent['depth'] == 0
@ -572,14 +557,9 @@ class TestFileEntriesDiffSerializer(TestCase):
manifest_data = data['entries']['manifest.json']
assert manifest_data['depth'] == 0
assert manifest_data['filename'] == u'manifest.json'
assert manifest_data['sha256'] == (
'bf9b0744c0011cad5caa55236951eda523f17676e91353a64a32353eac798631')
assert manifest_data['mimetype'] == 'application/json'
assert manifest_data['mime_category'] == 'text'
assert manifest_data['path'] == u'manifest.json'
assert manifest_data['size'] == 621
assert manifest_data['status'] == ''
assert isinstance(manifest_data['modified'], datetime)
assert data['diff'] is not None

Просмотреть файл

@ -1160,8 +1160,7 @@ def download_git_stored_file(request, version_id, filename):
}
)
commit = serializer._get_commit(file)
tree = serializer.repo.get_root_tree(commit)
tree = serializer.tree
try:
blob_or_tree = tree[serializer.get_selected_file(file)]
@ -1176,7 +1175,7 @@ def download_git_stored_file(request, version_id, filename):
response = http.HttpResponse(
content=actual_blob.data,
content_type=selected_file['mimetype'])
content_type=serializer.get_mimetype(file))
# Backported from Django 2.1 to handle unicode filenames properly
selected_filename = selected_file['filename']