483 строки
17 KiB
Python
483 строки
17 KiB
Python
import codecs
|
|
import json
|
|
import mimetypes
|
|
import os
|
|
import shutil
|
|
import stat
|
|
from functools import partial
|
|
|
|
from django.conf import settings
|
|
from django.core.files.storage import default_storage as storage
|
|
from django.utils.datastructures import SortedDict
|
|
from django.utils.encoding import smart_unicode
|
|
from django.template.defaultfilters import filesizeformat
|
|
|
|
import jinja2
|
|
import commonware.log
|
|
from jingo import register, env
|
|
from tower import ugettext as _
|
|
|
|
import amo
|
|
from amo.utils import memoize, Message, rm_local_tmp_dir
|
|
from amo.urlresolvers import reverse
|
|
from files.utils import extract_xpi, get_md5
|
|
from validator.testcases.packagelayout import (blacklisted_extensions,
|
|
blacklisted_magic_numbers)
|
|
|
|
# Allow files with a shebang through.
|
|
blacklisted_magic_numbers = [b for b in list(blacklisted_magic_numbers)
|
|
if b != (0x23, 0x21)]
|
|
blacklisted_extensions = [b for b in list(blacklisted_extensions)
|
|
if b != 'sh']
|
|
task_log = commonware.log.getLogger('z.task')
|
|
|
|
|
|
@register.function
|
|
def file_viewer_class(value, key):
|
|
result = []
|
|
if value['directory']:
|
|
result.append('directory closed')
|
|
else:
|
|
result.append('file')
|
|
if value['short'] == key:
|
|
result.append('selected')
|
|
if value.get('diff'):
|
|
result.append('diff')
|
|
return ' '.join(result)
|
|
|
|
|
|
@register.function
|
|
def file_tree(files, selected):
|
|
depth = 0
|
|
output = ['<ul class="root">']
|
|
t = env.get_template('files/node.html')
|
|
for k, v in files.items():
|
|
if v['depth'] > depth:
|
|
output.append('<ul class="js-hidden">')
|
|
elif v['depth'] < depth:
|
|
output.extend(['</ul>' for x in range(v['depth'], depth)])
|
|
output.append(t.render(value=v, selected=selected))
|
|
depth = v['depth']
|
|
output.extend(['</ul>' for x in range(depth, -1, -1)])
|
|
return jinja2.Markup('\n'.join(output))
|
|
|
|
|
|
class FileViewer(object):
|
|
"""
|
|
Provide access to a storage-managed file by copying it locally and
|
|
extracting info from it. `src` is a storage-managed path and `dest` is a
|
|
local temp path.
|
|
"""
|
|
|
|
def __init__(self, file_obj, is_webapp=False):
|
|
self.file = file_obj
|
|
self.is_webapp = is_webapp
|
|
self.src = file_obj.file_path
|
|
self.dest = os.path.join(settings.TMP_PATH, 'file_viewer',
|
|
str(file_obj.pk))
|
|
self._files, self.selected = None, None
|
|
|
|
def __str__(self):
|
|
return str(self.file.id)
|
|
|
|
def _extraction_cache_key(self):
|
|
return ('%s:file-viewer:extraction-in-progress:%s' %
|
|
(settings.CACHE_PREFIX, self.file.id))
|
|
|
|
def extract(self):
|
|
"""
|
|
Will make all the directories and expand the files.
|
|
Raises error on nasty files.
|
|
"""
|
|
try:
|
|
os.makedirs(os.path.dirname(self.dest))
|
|
except OSError, err:
|
|
pass
|
|
|
|
if self.is_search_engine() and self.src.endswith('.xml'):
|
|
try:
|
|
os.makedirs(self.dest)
|
|
except OSError, err:
|
|
pass
|
|
copyfileobj(storage.open(self.src),
|
|
open(os.path.join(self.dest,
|
|
self.file.filename), 'w'))
|
|
else:
|
|
try:
|
|
extract_xpi(self.src, self.dest, expand=True)
|
|
except Exception, err:
|
|
task_log.error('Error (%s) extracting %s' % (err, self.src))
|
|
raise
|
|
|
|
def cleanup(self):
|
|
if os.path.exists(self.dest):
|
|
rm_local_tmp_dir(self.dest)
|
|
|
|
def is_search_engine(self):
|
|
"""Is our file for a search engine?"""
|
|
return self.file.version.addon.type == amo.ADDON_SEARCH
|
|
|
|
def is_extracted(self):
|
|
"""If the file has been extracted or not."""
|
|
return (os.path.exists(self.dest) and not
|
|
Message(self._extraction_cache_key()).get())
|
|
|
|
def _is_binary(self, mimetype, path):
|
|
"""Uses the filename to see if the file can be shown in HTML or not."""
|
|
# Re-use the blacklisted data from amo-validator to spot binaries.
|
|
ext = os.path.splitext(path)[1][1:]
|
|
if ext in blacklisted_extensions:
|
|
return True
|
|
|
|
if os.path.exists(path) and not os.path.isdir(path):
|
|
with storage.open(path, 'r') as rfile:
|
|
bytes = tuple(map(ord, rfile.read(4)))
|
|
if any(bytes[:len(x)] == x for x in blacklisted_magic_numbers):
|
|
return True
|
|
|
|
if mimetype:
|
|
major, minor = mimetype.split('/')
|
|
if major == 'image':
|
|
return 'image' # Mark that the file is binary, but an image.
|
|
|
|
return False
|
|
|
|
def read_file(self, allow_empty=False):
|
|
"""
|
|
Reads the file. Imposes a file limit and tries to cope with
|
|
UTF-8 and UTF-16 files appropriately. Return file contents and
|
|
a list of error messages.
|
|
"""
|
|
try:
|
|
file_data = self._read_file(allow_empty)
|
|
|
|
# If this is a webapp manifest, we should try to pretty print it.
|
|
if (self.selected and
|
|
self.selected.get('filename') == 'manifest.webapp'):
|
|
file_data = self._process_manifest(file_data)
|
|
|
|
return file_data
|
|
except (IOError, OSError):
|
|
self.selected['msg'] = _('That file no longer exists.')
|
|
return ''
|
|
|
|
def _read_file(self, allow_empty=False):
|
|
if not self.selected and allow_empty:
|
|
return ''
|
|
assert self.selected, 'Please select a file'
|
|
if self.selected['size'] > settings.FILE_VIEWER_SIZE_LIMIT:
|
|
# L10n: {0} is the file size limit of the file viewer.
|
|
msg = _('File size is over the limit of {0}.').format(
|
|
filesizeformat(settings.FILE_VIEWER_SIZE_LIMIT))
|
|
self.selected['msg'] = msg
|
|
return ''
|
|
|
|
with storage.open(self.selected['full'], 'r') as opened:
|
|
cont = opened.read()
|
|
codec = 'utf-16' if cont.startswith(codecs.BOM_UTF16) else 'utf-8'
|
|
try:
|
|
return cont.decode(codec)
|
|
except UnicodeDecodeError:
|
|
cont = cont.decode(codec, 'ignore')
|
|
#L10n: {0} is the filename.
|
|
self.selected['msg'] = (
|
|
_('Problems decoding {0}.').format(codec))
|
|
return cont
|
|
|
|
def _process_manifest(self, data):
|
|
"""
|
|
If we're dealing with a webapp manifest, this will format it nicely for
|
|
maximum diff-ability.
|
|
"""
|
|
|
|
# If this isn't a webapp, don't reformat it.
|
|
if not self.is_webapp:
|
|
return data
|
|
|
|
try:
|
|
json_data = json.loads(data)
|
|
except Exception:
|
|
# If there are any JSON decode problems, just return the raw file.
|
|
return data
|
|
|
|
def format_dict(data):
|
|
def do_format(value):
|
|
if isinstance(value, dict):
|
|
return format_dict(value)
|
|
else:
|
|
return value
|
|
|
|
# We want everything sorted, but we always want these few nodes
|
|
# right at the top.
|
|
prefix_nodes = ["name", "description", "version"]
|
|
prefix_nodes = [(k, data.pop(k)) for k in prefix_nodes if
|
|
k in data]
|
|
|
|
processed_nodes = [(k, do_format(v)) for k, v in data.items()]
|
|
return SortedDict(prefix_nodes + sorted(processed_nodes))
|
|
|
|
return json.dumps(format_dict(json_data), indent=2)
|
|
|
|
def select(self, file_):
|
|
self.selected = self.get_files().get(file_)
|
|
|
|
def is_binary(self):
|
|
if self.selected:
|
|
binary = self.selected['binary']
|
|
if binary and (binary != 'image' or not self.is_webapp):
|
|
self.selected['msg'] = _('This file is not viewable online. '
|
|
'Please download the file to view '
|
|
'the contents.')
|
|
return binary
|
|
|
|
def is_directory(self):
|
|
if self.selected:
|
|
if self.selected['directory']:
|
|
self.selected['msg'] = _('This file is a directory.')
|
|
return self.selected['directory']
|
|
|
|
def get_default(self, key=None):
|
|
"""Gets the default file and copes with search engines."""
|
|
if self.is_search_engine() and not key:
|
|
files = self.get_files()
|
|
return files.keys()[0] if files else None
|
|
|
|
if key:
|
|
return key
|
|
|
|
return 'manifest.webapp' if self.is_webapp else 'install.rdf'
|
|
|
|
def get_files(self):
|
|
"""
|
|
Returns a SortedDict, ordered by the filename of all the files in the
|
|
addon-file. Full of all the useful information you'll need to serve
|
|
this file, build templates etc.
|
|
"""
|
|
if self._files:
|
|
return self._files
|
|
|
|
if not self.is_extracted():
|
|
return {}
|
|
# In case a cron job comes along and deletes the files
|
|
# mid tree building.
|
|
try:
|
|
self._files = self._get_files()
|
|
return self._files
|
|
except (OSError, IOError):
|
|
return {}
|
|
|
|
def truncate(self, filename, pre_length=15,
|
|
post_length=10, ellipsis=u'..'):
|
|
"""
|
|
Truncates a filename so that
|
|
somelongfilename.htm
|
|
becomes:
|
|
some...htm
|
|
as it truncates around the extension.
|
|
"""
|
|
root, ext = os.path.splitext(filename)
|
|
if len(root) > pre_length:
|
|
root = root[:pre_length] + ellipsis
|
|
if len(ext) > post_length:
|
|
ext = ext[:post_length] + ellipsis
|
|
return root + ext
|
|
|
|
def get_syntax(self, filename):
|
|
"""
|
|
Converts a filename into a syntax for the syntax highlighter, with
|
|
some modifications for specific common mozilla files.
|
|
The list of syntaxes is from:
|
|
http://alexgorbatchev.com/SyntaxHighlighter/manual/brushes/
|
|
"""
|
|
if filename:
|
|
short = os.path.splitext(filename)[1][1:]
|
|
syntax_map = {'xul': 'xml', 'rdf': 'xml', 'jsm': 'js', 'json': 'js'}
|
|
short = syntax_map.get(short, short)
|
|
if short in ['actionscript3', 'as3', 'bash', 'shell', 'cpp', 'c',
|
|
'c#', 'c-sharp', 'csharp', 'css', 'diff', 'html',
|
|
'java', 'javascript', 'js', 'jscript', 'patch',
|
|
'pas', 'php', 'plain', 'py', 'python', 'sass',
|
|
'scss', 'text', 'sql', 'vb', 'vbnet', 'xml', 'xhtml',
|
|
'xslt']:
|
|
return short
|
|
return 'plain'
|
|
|
|
@memoize(prefix='file-viewer', time=60 * 60)
|
|
def _get_files(self):
|
|
all_files, res = [], SortedDict()
|
|
# Not using os.path.walk so we get just the right order.
|
|
|
|
def iterate(path):
|
|
path_dirs, path_files = storage.listdir(path)
|
|
for dirname in sorted(path_dirs):
|
|
full = os.path.join(path, dirname)
|
|
all_files.append(full)
|
|
iterate(full)
|
|
|
|
for filename in sorted(path_files):
|
|
full = os.path.join(path, filename)
|
|
all_files.append(full)
|
|
|
|
iterate(self.dest)
|
|
|
|
url_prefix = 'mkt.%s' if self.is_webapp else '%s'
|
|
for path in all_files:
|
|
filename = smart_unicode(os.path.basename(path), errors='replace')
|
|
short = smart_unicode(path[len(self.dest) + 1:], errors='replace')
|
|
mime, encoding = mimetypes.guess_type(filename)
|
|
if not mime and filename == 'manifest.webapp':
|
|
mime = 'application/x-web-app-manifest+json'
|
|
directory = os.path.isdir(path)
|
|
|
|
|
|
res[short] = {
|
|
'binary': self._is_binary(mime, path),
|
|
'depth': short.count(os.sep),
|
|
'directory': directory,
|
|
'filename': filename,
|
|
'full': path,
|
|
'md5': get_md5(path) if not directory else '',
|
|
'mimetype': mime or 'application/octet-stream',
|
|
'syntax': self.get_syntax(filename),
|
|
'modified': os.stat(path)[stat.ST_MTIME],
|
|
'short': short,
|
|
'size': os.stat(path)[stat.ST_SIZE],
|
|
'truncated': self.truncate(filename),
|
|
'url': reverse(url_prefix % 'files.list',
|
|
args=[self.file.id, 'file', short]),
|
|
'url_serve': reverse(url_prefix % 'files.redirect',
|
|
args=[self.file.id, short]),
|
|
'version': self.file.version.version,
|
|
}
|
|
|
|
return res
|
|
|
|
|
|
class DiffHelper(object):
|
|
|
|
def __init__(self, left, right, is_webapp=False):
|
|
self.left = FileViewer(left, is_webapp=is_webapp)
|
|
self.right = FileViewer(right, is_webapp=is_webapp)
|
|
self.key = None
|
|
|
|
self.is_webapp = is_webapp
|
|
|
|
def __str__(self):
|
|
return '%s:%s' % (self.left, self.right)
|
|
|
|
def extract(self):
|
|
self.left.extract(), self.right.extract()
|
|
|
|
def cleanup(self):
|
|
self.left.cleanup(), self.right.cleanup()
|
|
|
|
def is_extracted(self):
|
|
return self.left.is_extracted() and self.right.is_extracted()
|
|
|
|
def get_url(self, short):
|
|
url_name = 'mkt.files.compare' if self.is_webapp else 'files.compare'
|
|
return reverse(url_name,
|
|
args=[self.left.file.id, self.right.file.id,
|
|
'file', short])
|
|
|
|
#@memoize(prefix='file-viewer-get-files', time=60 * 60)
|
|
def get_files(self):
|
|
"""
|
|
Get the files from the primary and:
|
|
- remap any diffable ones to the compare url as opposed to the other
|
|
- highlight any diffs
|
|
"""
|
|
left_files = self.left.get_files()
|
|
right_files = self.right.get_files()
|
|
different = []
|
|
for key, file in left_files.items():
|
|
file['url'] = self.get_url(file['short'])
|
|
diff = file['md5'] != right_files.get(key, {}).get('md5')
|
|
file['diff'] = diff
|
|
if diff:
|
|
different.append(file)
|
|
|
|
# Now mark every directory above each different file as different.
|
|
for diff in different:
|
|
for depth in range(diff['depth']):
|
|
key = '/'.join(diff['short'].split('/')[:depth + 1])
|
|
if key in left_files:
|
|
left_files[key]['diff'] = True
|
|
|
|
return left_files
|
|
|
|
#@memoize(prefix='file-viewer-get-deleted-files', time=60 * 60)
|
|
def get_deleted_files(self):
|
|
"""
|
|
Get files that exist in right, but not in left. These
|
|
are files that have been deleted between the two versions.
|
|
Every element will be marked as a diff.
|
|
"""
|
|
different = SortedDict()
|
|
if self.right.is_search_engine():
|
|
return different
|
|
|
|
left_files = self.left.get_files()
|
|
right_files = self.right.get_files()
|
|
for key, file in right_files.items():
|
|
if key not in left_files:
|
|
copy = right_files[key]
|
|
copy.update({'url': self.get_url(file['short']), 'diff': True})
|
|
different[key] = copy
|
|
|
|
return different
|
|
|
|
def read_file(self):
|
|
"""Reads both selected files."""
|
|
return [self.left.read_file(allow_empty=True),
|
|
self.right.read_file(allow_empty=True)]
|
|
|
|
def select(self, key):
|
|
"""
|
|
Select a file and adds the file object to self.one and self.two
|
|
for later fetching. Does special work for search engines.
|
|
"""
|
|
self.key = key
|
|
self.left.select(key)
|
|
if key and self.right.is_search_engine():
|
|
# There's only one file in a search engine.
|
|
key = self.right.get_default()
|
|
|
|
self.right.select(key)
|
|
return self.left.selected and self.right.selected
|
|
|
|
def is_binary(self):
|
|
"""Tells you if both selected files are binary."""
|
|
return (self.left.is_binary() or
|
|
self.right.is_binary())
|
|
|
|
def is_diffable(self):
|
|
"""Tells you if the selected files are diffable."""
|
|
if not self.left.selected and not self.right.selected:
|
|
return False
|
|
|
|
for obj in [self.left, self.right]:
|
|
if obj.is_binary():
|
|
return False
|
|
if obj.is_directory():
|
|
return False
|
|
return True
|
|
|
|
|
|
def copyfileobj(fsrc, fdst, length=64*1024):
|
|
"""copy data from file-like object fsrc to file-like object fdst"""
|
|
while 1:
|
|
buf = fsrc.read(length)
|
|
if not buf:
|
|
break
|
|
fdst.write(buf)
|
|
|
|
|
|
def rmtree(prefix):
|
|
dirs, files = storage.listdir(prefix)
|
|
for fname in files:
|
|
storage.delete(os.path.join(prefix, fname))
|
|
for d in dirs:
|
|
rmtree(os.path.join(prefix, d))
|
|
storage.delete(prefix)
|