313 строки
13 KiB
Python
313 строки
13 KiB
Python
# caching HTTP proxy for hgweb
|
|
#
|
|
# Copyright Unity Technologies, Mads Kiilerich <madski@unity3d.com>
|
|
# Copyright Matt Mackall <mpm@selenic.com> and others
|
|
#
|
|
# This software may be used and distributed according to the terms of the
|
|
# GNU General Public License version 2 or any later version.
|
|
|
|
'''Caching HTTP proxy for hgweb hosting
|
|
|
|
This proxy can serve as an "accelerator" or "concentrator" that might reduce
|
|
the network traffic and improve the user experience where the bandwidth is
|
|
limited and the same data is fetched multiple times.
|
|
|
|
Enable the extension with::
|
|
|
|
[extensions]
|
|
hgwebcachingproxy = /path/to/hgwebcachingproxy.py
|
|
|
|
For light-weight usage or testing run the proxy similar to :hg:`serve`::
|
|
|
|
hg proxy --port 1234 http://servername/ /var/cache/hgrepos
|
|
|
|
Instead of pointing Mercurial clients at::
|
|
|
|
http://servername/repos/name
|
|
|
|
point them at the proxy:
|
|
|
|
http://proxyname:1234/repos/name
|
|
|
|
The proxy will make sure its local cache of the repository is fully updated
|
|
when starting a new session. Sessions are defined by the repository name,
|
|
username and credentials, and they expire after 30 seconds
|
|
(``[hgwebcachingproxy] ttl``) without usage. All read-only requests within a
|
|
session will be served locally. Pushes will be forwarded straight to the main
|
|
server, and, after pushing, the proxy will do a pull to make sure the mirror is
|
|
up-to-date. Largefiles will be fetched and cached on demand.
|
|
|
|
Only repos that already exist in the local cache will be served. The cache must
|
|
thus manually be seeded with repositories that is to be served - either with a
|
|
new or existing clone or an empty repo which will then be populated on first
|
|
request.
|
|
|
|
The proxy will by default assume that the server uses HTTP basic authentication
|
|
(unless ``[hgwebcachingproxy] anonymous`` is true). If no credentials are
|
|
provided they will be requested using (using ``[hgwebcachingproxy] realm``) to
|
|
avoid slow extra round trips to the server. All credentials for access to a
|
|
repository will be forwarded to the server for authentication and
|
|
authorization. The server will not be aware of the actual requests that are
|
|
served from the local cache and its logs will thus not be fully accurate.
|
|
|
|
The URL of the server can also be configured as ``[hgwebcachingproxy]
|
|
serverurl``, and the path to the cached repositories can be configured in
|
|
``[hgwebcachingproxy] cachepath``.
|
|
|
|
For usage as WSGI application create a proxy.wsgi with configuration::
|
|
|
|
import sys; sys.path.insert(0, '/path/to/hg/')
|
|
from hgext.hgwebcachingproxy import wsgi
|
|
application = wsgi(serverurl='https://.../', cachepath='/path/to/repos/')
|
|
|
|
In an apache mod_wsgi configuration this proxy.wsgi can be used like::
|
|
|
|
WSGIPassAuthorization On
|
|
WSGIScriptAlias / /path/to/proxy.wsgi
|
|
'''
|
|
|
|
import os.path
|
|
import urllib2, posixpath, time
|
|
from mercurial import cmdutil, util, commands, hg, error
|
|
from mercurial import ui as uimod
|
|
from mercurial.hgweb import protocol, common, request
|
|
from mercurial.i18n import _
|
|
from hgext.largefiles import lfutil, basestore
|
|
|
|
cmdtable = {}
|
|
command = cmdutil.command(cmdtable)
|
|
testedwith = '2.8'
|
|
|
|
commands.norepo += " proxy"
|
|
|
|
# username,passwd,path mapping to peer
|
|
peercache = dict()
|
|
|
|
class proxyserver(object):
|
|
def __init__(self, ui, serverurl, cachepath, anonymous):
|
|
self.ui = ui or uimod.ui()
|
|
self.serverurl = (serverurl or
|
|
self.ui.config('hgwebcachingproxy', 'serverurl'))
|
|
self.cachepath = (cachepath or
|
|
self.ui.config('hgwebcachingproxy', 'cachepath'))
|
|
if anonymous is None:
|
|
anonymous = self.ui.configbool('hgwebcachingproxy', 'anonymous')
|
|
self.anonymous = anonymous
|
|
|
|
if not self.serverurl:
|
|
raise util.Abort(_('no server url'))
|
|
u = util.url(self.serverurl)
|
|
if u.scheme not in ['http', 'https']:
|
|
raise util.Abort(_('invalid scheme in server url %s') % serverurl)
|
|
|
|
if not self.cachepath or not os.path.isdir(self.cachepath):
|
|
raise util.Abort(_('cache path %s is not a directory') %
|
|
self.cachepath)
|
|
self.ttl = self.ui.configint('hgwebcachingproxy', 'ttl', 30)
|
|
self.authheaders = [('WWW-Authenticate',
|
|
'Basic realm="%s"' %
|
|
self.ui.config('hgwebcachingproxy', 'realm',
|
|
'Mercurial Proxy Authentication'))]
|
|
|
|
def __call__(self, env, respond):
|
|
req = request.wsgirequest(env, respond)
|
|
return self.run_wsgi(req)
|
|
|
|
def run_wsgi(self, req):
|
|
proto = protocol.webproto(req, self.ui)
|
|
|
|
u = util.url(self.serverurl)
|
|
|
|
# Simple path validation - probably only sufficient on Linux
|
|
path = req.env['PATH_INFO'].replace('\\', '/').strip('/')
|
|
if ':' in path or path.startswith('.') or '/.' in path:
|
|
self.ui.warn(_('bad request path %r\n') % path)
|
|
req.respond(common.HTTP_BAD_REQUEST, protocol.HGTYPE)
|
|
return []
|
|
|
|
# Forward HTTP basic authorization headers through the layers
|
|
authheader = req.env.get('HTTP_AUTHORIZATION')
|
|
if authheader and authheader.lower().startswith('basic '):
|
|
userpasswd = authheader[6:].decode('base64')
|
|
if ':' in userpasswd:
|
|
u.user, u.passwd = userpasswd.split(':', 1)
|
|
|
|
# Bounce early on missing credentials
|
|
if not (self.anonymous or u.user and u.passwd):
|
|
er = common.ErrorResponse(common.HTTP_UNAUTHORIZED,
|
|
'Authentication is mandatory',
|
|
self.authheaders)
|
|
req.respond(er, protocol.HGTYPE)
|
|
return ['HTTP authentication required']
|
|
|
|
# MIME and HTTP allows multiple headers by the same name - we only
|
|
# use and care about one
|
|
args = dict((k, v[0]) for k, v in proto._args().items())
|
|
cmd = args.pop('cmd', None)
|
|
self.ui.write("%s@%s cmd: %s args: %s\n" %
|
|
(u.user, path or '/', cmd, ' '.join('%s=%s' % (k, v)
|
|
for k, v in sorted(args.items()))))
|
|
|
|
if not cmd:
|
|
self.ui.warn(_('no command in request\n'))
|
|
req.respond(common.HTTP_BAD_REQUEST, protocol.HGTYPE)
|
|
return []
|
|
|
|
u.path = posixpath.join(u.path or '', req.env['PATH_INFO']).strip('/')
|
|
url = str(u)
|
|
|
|
repopath = os.path.join(self.cachepath, path)
|
|
try:
|
|
repo = hg.repository(self.ui, path=repopath)
|
|
except error.RepoError, e:
|
|
self.ui.warn(_("error with path %r: %s\n") % (path, e))
|
|
req.respond(common.HTTP_NOT_FOUND, protocol.HGTYPE)
|
|
return ['repository %s not found in proxy' % path]
|
|
path = path or '/'
|
|
|
|
try:
|
|
# Reuse auth if possible - checking remotely is expensive
|
|
peer, ts = peercache.get((u.user, u.passwd, path), (None, None))
|
|
if peer is not None and time.time() > ts + self.ttl:
|
|
self.ui.note(_('%s@%s expired, age %s\n') %
|
|
(u.user, path, time.time() - ts))
|
|
peer = None
|
|
peercache[(u.user, u.passwd, path)] = (peer, ts)
|
|
# peer is now None or valid
|
|
|
|
if cmd == 'capabilities' and not peer:
|
|
# new session on expired repo - do auth and pull again
|
|
self.ui.note(_('%s@%s - pulling\n') % (u.user, path))
|
|
t0 = time.time()
|
|
peer = hg.peer(self.ui, {}, url)
|
|
r = repo.pull(peer)
|
|
self.ui.debug('pull got %r after %s\n' % (r, time.time() - t0))
|
|
peercache[(u.user, u.passwd, path)] = (peer, time.time())
|
|
elif ts is None: # never authenticated
|
|
self.ui.note('%s@%s - authenticating\n' % (u.user, path))
|
|
peer = hg.peer(self.ui, {}, url)
|
|
self.ui.debug('%s@%s - authenticated\n' % (u.user, path))
|
|
peercache[(u.user, u.passwd, path)] = (peer, time.time())
|
|
# user is now auth'ed for this session
|
|
|
|
# fetch largefiles whenever they are referenced
|
|
# (creating fake/combined batch statlfile responses is too complex)
|
|
shas = []
|
|
if cmd in ['statlfile', 'getlfile']:
|
|
shas.append(args['sha'])
|
|
if cmd == 'batch':
|
|
for x in args['cmds'].split(';'):
|
|
if x.startswith('statlfile sha='):
|
|
shas.append(x[14:])
|
|
missingshas = [sha for sha in shas
|
|
if not lfutil.findfile(repo, sha)]
|
|
if missingshas:
|
|
self.ui.debug('%s@%s - missing %s\n' %
|
|
(u.user, path, ' '.join(missingshas)))
|
|
if not peer:
|
|
peer = hg.peer(self.ui, {}, url)
|
|
store = basestore._openstore(repo, peer, False)
|
|
existsremotely = store.exists(missingshas)
|
|
for sha, available in sorted(existsremotely.iteritems()):
|
|
if not available:
|
|
self.ui.note('%s@%s - %s not available remotely\n' %
|
|
(u.user, path, sha))
|
|
continue
|
|
self.ui.note('%s@%s - fetching %s\n' % (u.user, path, sha))
|
|
gotit = store._gethash(sha, sha)
|
|
if not gotit:
|
|
self.ui.warn(_('failed to get %s for %s@%s remotely\n'
|
|
) % (sha, u.user, path))
|
|
peercache[(u.user, u.passwd, path)] = (peer, time.time())
|
|
|
|
# Forward write commands to the remote server
|
|
if cmd in ['putlfile', 'unbundle', 'pushkey']:
|
|
size = int(req.env.get('CONTENT_LENGTH', 0))
|
|
self.ui.debug('reading bundle with size %s\n' % size)
|
|
data = req.read(int(size))
|
|
|
|
if not peer:
|
|
peer = hg.peer(self.ui, {}, url)
|
|
self.ui.note(_('calling %s remotely\n') % cmd)
|
|
r = peer._call(cmd, data=data, **args)
|
|
if cmd == 'unbundle':
|
|
self.ui.debug('fetching changes back\n')
|
|
repo.pull(peer)
|
|
peercache[(u.user, u.passwd, path)] = (peer, time.time())
|
|
req.respond(common.HTTP_OK, protocol.HGTYPE)
|
|
return [r]
|
|
|
|
# Now serve it locally
|
|
return protocol.call(repo, req, cmd)
|
|
|
|
except urllib2.HTTPError, inst:
|
|
self.ui.warn(_('HTTPError connecting to server: %s\n') % inst)
|
|
req.respond(inst.code, protocol.HGTYPE)
|
|
return ['HTTP error']
|
|
except util.Abort, e: # hg.peer will abort when it gets 401
|
|
if e.message not in ['http authorization required',
|
|
'authorization failed']:
|
|
raise
|
|
self.ui.debug('server requires authentication\n')
|
|
er = common.ErrorResponse(
|
|
common.HTTP_UNAUTHORIZED
|
|
if e.message == 'http authorization required'
|
|
else common.HTTP_BAD_REQUEST,
|
|
'Authentication is required',
|
|
self.authheaders)
|
|
req.respond(er, protocol.HGTYPE)
|
|
return ['HTTP authentication required']
|
|
|
|
|
|
@command('^proxy',
|
|
[('A', 'accesslog', '', _('name of access log file to write to'),
|
|
_('FILE')),
|
|
('d', 'daemon', None, _('run server in background')),
|
|
('', 'daemon-pipefds', '', _('used internally by daemon mode'), _('NUM')),
|
|
('E', 'errorlog', '', _('name of error log file to write to'), _('FILE')),
|
|
# use string type, then we can check if something was passed
|
|
('p', 'port', '', _('port to listen on (default: 8000)'), _('PORT')),
|
|
('a', 'address', '', _('address to listen on (default: all interfaces)'),
|
|
_('ADDR')),
|
|
('', 'prefix', '', _('prefix path to serve from (default: server root)'),
|
|
_('PREFIX')),
|
|
('', 'pid-file', '', _('name of file to write process ID to'), _('FILE')),
|
|
('6', 'ipv6', None, _('use IPv6 in addition to IPv4')),
|
|
('', 'certificate', '', _('SSL certificate file'), _('FILE')),
|
|
('', 'anonymous', None, _("authentication is not mandatory"))],
|
|
_('[OPTIONS]... SERVERURL CACHEPATH'))
|
|
def proxy(ui, serverurl, cachepath, **opts):
|
|
"""start stand-alone caching hgweb proxy
|
|
|
|
Start a local HTTP server that acts as a caching proxy for a remote
|
|
server SERVERURL. Fetched data will be stored locally in the directory
|
|
CACHEPATH and reused for future requests for the same data.
|
|
|
|
By default, the server logs accesses to stdout and errors to
|
|
stderr. Use the -A/--accesslog and -E/--errorlog options to log to
|
|
files.
|
|
|
|
To have the server choose a free port number to listen on, specify
|
|
a port number of 0; in this case, the server will print the port
|
|
number it uses.
|
|
|
|
See :hg:`hg help hgwebcachingproxy` for more details.
|
|
|
|
Returns 0 on success.
|
|
"""
|
|
if opts.get('port'):
|
|
opts['port'] = util.getport(opts.get('port'))
|
|
|
|
optlist = ("address port prefix ipv6 accesslog errorlog certificate")
|
|
for o in optlist.split():
|
|
val = opts.get(o, '')
|
|
if val not in (None, ''):
|
|
ui.setconfig("web", o, val)
|
|
|
|
app = proxyserver(ui, serverurl, cachepath, opts.get('anonymous'))
|
|
service = commands.httpservice(ui, app, opts)
|
|
cmdutil.service(opts, initfn=service.init, runfn=service.run)
|
|
|
|
def wsgi(ui=None, serverurl=None, cachepath=None, anonymous=None):
|
|
return proxyserver(ui, serverurl, cachepath, anonymous)
|