483 строки
17 KiB
483 строки
17 KiB
import codecs
import json
import mimetypes
import os
import shutil
import stat
from functools import partial
from django.conf import settings
from django.core.files.storage import default_storage as storage
from django.utils.datastructures import SortedDict
from django.utils.encoding import smart_unicode
from django.template.defaultfilters import filesizeformat
import jinja2
import commonware.log
from jingo import register, env
from tower import ugettext as _
import amo
from amo.utils import memoize, Message, rm_local_tmp_dir
from amo.urlresolvers import reverse
from files.utils import extract_xpi, get_md5
from validator.testcases.packagelayout import (blacklisted_extensions,
# Allow files with a shebang through.
blacklisted_magic_numbers = [b for b in list(blacklisted_magic_numbers)
if b != (0x23, 0x21)]
blacklisted_extensions = [b for b in list(blacklisted_extensions)
if b != 'sh']
task_log = commonware.log.getLogger('z.task')
def file_viewer_class(value, key):
result = []
if value['directory']:
result.append('directory closed')
if value['short'] == key:
if value.get('diff'):
return ' '.join(result)
def file_tree(files, selected):
depth = 0
output = ['<ul class="root">']
t = env.get_template('files/node.html')
for k, v in files.items():
if v['depth'] > depth:
output.append('<ul class="js-hidden">')
elif v['depth'] < depth:
output.extend(['</ul>' for x in range(v['depth'], depth)])
output.append(t.render(value=v, selected=selected))
depth = v['depth']
output.extend(['</ul>' for x in range(depth, -1, -1)])
return jinja2.Markup('\n'.join(output))
class FileViewer(object):
Provide access to a storage-managed file by copying it locally and
extracting info from it. `src` is a storage-managed path and `dest` is a
local temp path.
def __init__(self, file_obj, is_webapp=False):
self.file = file_obj
self.is_webapp = is_webapp
self.src = file_obj.file_path
self.dest = os.path.join(settings.TMP_PATH, 'file_viewer',
self._files, self.selected = None, None
def __str__(self):
return str(self.file.id)
def _extraction_cache_key(self):
return ('%s:file-viewer:extraction-in-progress:%s' %
(settings.CACHE_PREFIX, self.file.id))
def extract(self):
Will make all the directories and expand the files.
Raises error on nasty files.
except OSError, err:
if self.is_search_engine() and self.src.endswith('.xml'):
except OSError, err:
self.file.filename), 'w'))
extract_xpi(self.src, self.dest, expand=True)
except Exception, err:
task_log.error('Error (%s) extracting %s' % (err, self.src))
def cleanup(self):
if os.path.exists(self.dest):
def is_search_engine(self):
"""Is our file for a search engine?"""
return self.file.version.addon.type == amo.ADDON_SEARCH
def is_extracted(self):
"""If the file has been extracted or not."""
return (os.path.exists(self.dest) and not
def _is_binary(self, mimetype, path):
"""Uses the filename to see if the file can be shown in HTML or not."""
# Re-use the blacklisted data from amo-validator to spot binaries.
ext = os.path.splitext(path)[1][1:]
if ext in blacklisted_extensions:
return True
if os.path.exists(path) and not os.path.isdir(path):
with storage.open(path, 'r') as rfile:
bytes = tuple(map(ord, rfile.read(4)))
if any(bytes[:len(x)] == x for x in blacklisted_magic_numbers):
return True
if mimetype:
major, minor = mimetype.split('/')
if major == 'image':
return 'image' # Mark that the file is binary, but an image.
return False
def read_file(self, allow_empty=False):
Reads the file. Imposes a file limit and tries to cope with
UTF-8 and UTF-16 files appropriately. Return file contents and
a list of error messages.
file_data = self._read_file(allow_empty)
# If this is a webapp manifest, we should try to pretty print it.
if (self.selected and
self.selected.get('filename') == 'manifest.webapp'):
file_data = self._process_manifest(file_data)
return file_data
except (IOError, OSError):
self.selected['msg'] = _('That file no longer exists.')
return ''
def _read_file(self, allow_empty=False):
if not self.selected and allow_empty:
return ''
assert self.selected, 'Please select a file'
if self.selected['size'] > settings.FILE_VIEWER_SIZE_LIMIT:
# L10n: {0} is the file size limit of the file viewer.
msg = _('File size is over the limit of {0}.').format(
self.selected['msg'] = msg
return ''
with storage.open(self.selected['full'], 'r') as opened:
cont = opened.read()
codec = 'utf-16' if cont.startswith(codecs.BOM_UTF16) else 'utf-8'
return cont.decode(codec)
except UnicodeDecodeError:
cont = cont.decode(codec, 'ignore')
#L10n: {0} is the filename.
self.selected['msg'] = (
_('Problems decoding {0}.').format(codec))
return cont
def _process_manifest(self, data):
If we're dealing with a webapp manifest, this will format it nicely for
maximum diff-ability.
# If this isn't a webapp, don't reformat it.
if not self.is_webapp:
return data
json_data = json.loads(data)
except Exception:
# If there are any JSON decode problems, just return the raw file.
return data
def format_dict(data):
def do_format(value):
if isinstance(value, dict):
return format_dict(value)
return value
# We want everything sorted, but we always want these few nodes
# right at the top.
prefix_nodes = ["name", "description", "version"]
prefix_nodes = [(k, data.pop(k)) for k in prefix_nodes if
k in data]
processed_nodes = [(k, do_format(v)) for k, v in data.items()]
return SortedDict(prefix_nodes + sorted(processed_nodes))
return json.dumps(format_dict(json_data), indent=2)
def select(self, file_):
self.selected = self.get_files().get(file_)
def is_binary(self):
if self.selected:
binary = self.selected['binary']
if binary and (binary != 'image' or not self.is_webapp):
self.selected['msg'] = _('This file is not viewable online. '
'Please download the file to view '
'the contents.')
return binary
def is_directory(self):
if self.selected:
if self.selected['directory']:
self.selected['msg'] = _('This file is a directory.')
return self.selected['directory']
def get_default(self, key=None):
"""Gets the default file and copes with search engines."""
if self.is_search_engine() and not key:
files = self.get_files()
return files.keys()[0] if files else None
if key:
return key
return 'manifest.webapp' if self.is_webapp else 'install.rdf'
def get_files(self):
Returns a SortedDict, ordered by the filename of all the files in the
addon-file. Full of all the useful information you'll need to serve
this file, build templates etc.
if self._files:
return self._files
if not self.is_extracted():
return {}
# In case a cron job comes along and deletes the files
# mid tree building.
self._files = self._get_files()
return self._files
except (OSError, IOError):
return {}
def truncate(self, filename, pre_length=15,
post_length=10, ellipsis=u'..'):
Truncates a filename so that
as it truncates around the extension.
root, ext = os.path.splitext(filename)
if len(root) > pre_length:
root = root[:pre_length] + ellipsis
if len(ext) > post_length:
ext = ext[:post_length] + ellipsis
return root + ext
def get_syntax(self, filename):
Converts a filename into a syntax for the syntax highlighter, with
some modifications for specific common mozilla files.
The list of syntaxes is from:
if filename:
short = os.path.splitext(filename)[1][1:]
syntax_map = {'xul': 'xml', 'rdf': 'xml', 'jsm': 'js', 'json': 'js'}
short = syntax_map.get(short, short)
if short in ['actionscript3', 'as3', 'bash', 'shell', 'cpp', 'c',
'c#', 'c-sharp', 'csharp', 'css', 'diff', 'html',
'java', 'javascript', 'js', 'jscript', 'patch',
'pas', 'php', 'plain', 'py', 'python', 'sass',
'scss', 'text', 'sql', 'vb', 'vbnet', 'xml', 'xhtml',
return short
return 'plain'
@memoize(prefix='file-viewer', time=60 * 60)
def _get_files(self):
all_files, res = [], SortedDict()
# Not using os.path.walk so we get just the right order.
def iterate(path):
path_dirs, path_files = storage.listdir(path)
for dirname in sorted(path_dirs):
full = os.path.join(path, dirname)
for filename in sorted(path_files):
full = os.path.join(path, filename)
url_prefix = 'mkt.%s' if self.is_webapp else '%s'
for path in all_files:
filename = smart_unicode(os.path.basename(path), errors='replace')
short = smart_unicode(path[len(self.dest) + 1:], errors='replace')
mime, encoding = mimetypes.guess_type(filename)
if not mime and filename == 'manifest.webapp':
mime = 'application/x-web-app-manifest+json'
directory = os.path.isdir(path)
res[short] = {
'binary': self._is_binary(mime, path),
'depth': short.count(os.sep),
'directory': directory,
'filename': filename,
'full': path,
'md5': get_md5(path) if not directory else '',
'mimetype': mime or 'application/octet-stream',
'syntax': self.get_syntax(filename),
'modified': os.stat(path)[stat.ST_MTIME],
'short': short,
'size': os.stat(path)[stat.ST_SIZE],
'truncated': self.truncate(filename),
'url': reverse(url_prefix % 'files.list',
args=[self.file.id, 'file', short]),
'url_serve': reverse(url_prefix % 'files.redirect',
args=[self.file.id, short]),
'version': self.file.version.version,
return res
class DiffHelper(object):
def __init__(self, left, right, is_webapp=False):
self.left = FileViewer(left, is_webapp=is_webapp)
self.right = FileViewer(right, is_webapp=is_webapp)
self.key = None
self.is_webapp = is_webapp
def __str__(self):
return '%s:%s' % (self.left, self.right)
def extract(self):
self.left.extract(), self.right.extract()
def cleanup(self):
self.left.cleanup(), self.right.cleanup()
def is_extracted(self):
return self.left.is_extracted() and self.right.is_extracted()
def get_url(self, short):
url_name = 'mkt.files.compare' if self.is_webapp else 'files.compare'
return reverse(url_name,
args=[self.left.file.id, self.right.file.id,
'file', short])
#@memoize(prefix='file-viewer-get-files', time=60 * 60)
def get_files(self):
Get the files from the primary and:
- remap any diffable ones to the compare url as opposed to the other
- highlight any diffs
left_files = self.left.get_files()
right_files = self.right.get_files()
different = []
for key, file in left_files.items():
file['url'] = self.get_url(file['short'])
diff = file['md5'] != right_files.get(key, {}).get('md5')
file['diff'] = diff
if diff:
# Now mark every directory above each different file as different.
for diff in different:
for depth in range(diff['depth']):
key = '/'.join(diff['short'].split('/')[:depth + 1])
if key in left_files:
left_files[key]['diff'] = True
return left_files
#@memoize(prefix='file-viewer-get-deleted-files', time=60 * 60)
def get_deleted_files(self):
Get files that exist in right, but not in left. These
are files that have been deleted between the two versions.
Every element will be marked as a diff.
different = SortedDict()
if self.right.is_search_engine():
return different
left_files = self.left.get_files()
right_files = self.right.get_files()
for key, file in right_files.items():
if key not in left_files:
copy = right_files[key]
copy.update({'url': self.get_url(file['short']), 'diff': True})
different[key] = copy
return different
def read_file(self):
"""Reads both selected files."""
return [self.left.read_file(allow_empty=True),
def select(self, key):
Select a file and adds the file object to self.one and self.two
for later fetching. Does special work for search engines.
self.key = key
if key and self.right.is_search_engine():
# There's only one file in a search engine.
key = self.right.get_default()
return self.left.selected and self.right.selected
def is_binary(self):
"""Tells you if both selected files are binary."""
return (self.left.is_binary() or
def is_diffable(self):
"""Tells you if the selected files are diffable."""
if not self.left.selected and not self.right.selected:
return False
for obj in [self.left, self.right]:
if obj.is_binary():
return False
if obj.is_directory():
return False
return True
def copyfileobj(fsrc, fdst, length=64*1024):
"""copy data from file-like object fsrc to file-like object fdst"""
while 1:
buf = fsrc.read(length)
if not buf:
def rmtree(prefix):
dirs, files = storage.listdir(prefix)
for fname in files:
storage.delete(os.path.join(prefix, fname))
for d in dirs:
rmtree(os.path.join(prefix, d))