зеркало из https://github.com/github/codeql.git
Bazel: allow LFS rules to use cached downloads without internet
If the cache is prefilled, LFS rules were still trying to query LFS urls. Now the strategy is to first try to fetch the files from the repository cache (which is possible by providing an empty url list and `allow_fail` to `repository_ctx.download`), and only run the LFS protocol if that fails. Technically this is possible by enhancing `git_lfs_probe.py` with a `--hash-only` flag. This is also an optimization where no uneeded access is done (including the slightly slow SSH call) if the repository cache is warm.
This commit is contained in:
Родитель
a50584c665
Коммит
170e2231d4
|
@ -2,9 +2,10 @@
|
||||||
|
|
||||||
"""
|
"""
|
||||||
Probe lfs files.
|
Probe lfs files.
|
||||||
For each source file provided as output, this will print:
|
For each source file provided as input, this will print:
|
||||||
* "local", if the source file is not an LFS pointer
|
* "local", if the source file is not an LFS pointer
|
||||||
* the sha256 hash, a space character and a transient download link obtained via the LFS protocol otherwise
|
* the sha256 hash, a space character and a transient download link obtained via the LFS protocol otherwise
|
||||||
|
If --hash-only is provided, the transient URL will not be fetched and printed
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
@ -19,6 +20,13 @@ import re
|
||||||
import base64
|
import base64
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
def options():
|
||||||
|
p = argparse.ArgumentParser(description=__doc__)
|
||||||
|
p.add_argument("--hash-only", action="store_true")
|
||||||
|
p.add_argument("sources", type=pathlib.Path, nargs="+")
|
||||||
|
return p.parse_args()
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
|
@ -30,7 +38,8 @@ class Endpoint:
|
||||||
self.headers.update((k.capitalize(), v) for k, v in d.items())
|
self.headers.update((k.capitalize(), v) for k, v in d.items())
|
||||||
|
|
||||||
|
|
||||||
sources = [pathlib.Path(arg).resolve() for arg in sys.argv[1:]]
|
opts = options()
|
||||||
|
sources = [p.resolve() for p in opts.sources]
|
||||||
source_dir = pathlib.Path(os.path.commonpath(src.parent for src in sources))
|
source_dir = pathlib.Path(os.path.commonpath(src.parent for src in sources))
|
||||||
source_dir = subprocess.check_output(["git", "rev-parse", "--show-toplevel"], cwd=source_dir, text=True).strip()
|
source_dir = subprocess.check_output(["git", "rev-parse", "--show-toplevel"], cwd=source_dir, text=True).strip()
|
||||||
|
|
||||||
|
@ -84,11 +93,15 @@ def get_endpoint():
|
||||||
# see https://github.com/git-lfs/git-lfs/blob/310d1b4a7d01e8d9d884447df4635c7a9c7642c2/docs/api/basic-transfers.md
|
# see https://github.com/git-lfs/git-lfs/blob/310d1b4a7d01e8d9d884447df4635c7a9c7642c2/docs/api/basic-transfers.md
|
||||||
def get_locations(objects):
|
def get_locations(objects):
|
||||||
ret = ["local" for _ in objects]
|
ret = ["local" for _ in objects]
|
||||||
endpoint = get_endpoint()
|
|
||||||
indexes = [i for i, o in enumerate(objects) if o]
|
indexes = [i for i, o in enumerate(objects) if o]
|
||||||
if not indexes:
|
if not indexes:
|
||||||
# all objects are local, do not send an empty request as that would be an error
|
# all objects are local, do not send an empty request as that would be an error
|
||||||
return ret
|
return ret
|
||||||
|
if opts.hash_only:
|
||||||
|
for i in indexes:
|
||||||
|
ret[i] = objects[i]["oid"]
|
||||||
|
return ret
|
||||||
|
endpoint = get_endpoint()
|
||||||
data = {
|
data = {
|
||||||
"operation": "download",
|
"operation": "download",
|
||||||
"transfers": ["basic"],
|
"transfers": ["basic"],
|
||||||
|
|
|
@ -1,36 +1,44 @@
|
||||||
def lfs_smudge(repository_ctx, srcs, extract = False, stripPrefix = None):
|
def lfs_smudge(repository_ctx, srcs, extract = False, stripPrefix = None):
|
||||||
for src in srcs:
|
|
||||||
repository_ctx.watch(src)
|
|
||||||
script = Label("//misc/bazel/internal:git_lfs_probe.py")
|
|
||||||
python = repository_ctx.which("python3") or repository_ctx.which("python")
|
python = repository_ctx.which("python3") or repository_ctx.which("python")
|
||||||
if not python:
|
if not python:
|
||||||
fail("Neither python3 nor python executables found")
|
fail("Neither python3 nor python executables found")
|
||||||
repository_ctx.report_progress("querying LFS url(s) for: %s" % ", ".join([src.basename for src in srcs]))
|
script = Label("//misc/bazel/internal:git_lfs_probe.py")
|
||||||
res = repository_ctx.execute([python, script] + srcs, quiet = True)
|
|
||||||
if res.return_code != 0:
|
def probe(srcs, hash_only = False):
|
||||||
fail("git LFS probing failed while instantiating @%s:\n%s" % (repository_ctx.name, res.stderr))
|
repository_ctx.report_progress("querying LFS url(s) for: %s" % ", ".join([src.basename for src in srcs]))
|
||||||
promises = []
|
cmd = [python, script]
|
||||||
for src, loc in zip(srcs, res.stdout.splitlines()):
|
if hash_only:
|
||||||
if loc == "local":
|
cmd.append("--hash-only")
|
||||||
if extract:
|
cmd.extend(srcs)
|
||||||
repository_ctx.report_progress("extracting local %s" % src.basename)
|
res = repository_ctx.execute(cmd, quiet = True)
|
||||||
repository_ctx.extract(src, stripPrefix = stripPrefix)
|
if res.return_code != 0:
|
||||||
else:
|
fail("git LFS probing failed while instantiating @%s:\n%s" % (repository_ctx.name, res.stderr))
|
||||||
repository_ctx.report_progress("symlinking local %s" % src.basename)
|
return res.stdout.splitlines()
|
||||||
repository_ctx.symlink(src, src.basename)
|
|
||||||
|
for src in srcs:
|
||||||
|
repository_ctx.watch(src)
|
||||||
|
infos = probe(srcs, hash_only = True)
|
||||||
|
remote = []
|
||||||
|
for src, info in zip(srcs, infos):
|
||||||
|
if info == "local":
|
||||||
|
repository_ctx.report_progress("symlinking local %s" % src.basename)
|
||||||
|
repository_ctx.symlink(src, src.basename)
|
||||||
else:
|
else:
|
||||||
sha256, _, url = loc.partition(" ")
|
repository_ctx.report_progress("trying cache for remote %s" % src.basename)
|
||||||
if extract:
|
res = repository_ctx.download([], src.basename, sha256 = info, allow_fail = True)
|
||||||
# we can't use skylib's `paths.split_extension`, as that only gets the last extension, so `.tar.gz`
|
if not res.success:
|
||||||
# or similar wouldn't work
|
remote.append(src)
|
||||||
# it doesn't matter if file is something like some.name.zip and possible_extension == "name.zip",
|
if remote:
|
||||||
# download_and_extract will just append ".name.zip" its internal temporary name, so extraction works
|
infos = probe(remote)
|
||||||
possible_extension = ".".join(src.basename.rsplit(".", 2)[-2:])
|
for src, info in zip(remote, infos):
|
||||||
repository_ctx.report_progress("downloading and extracting remote %s" % src.basename)
|
sha256, _, url = info.partition(" ")
|
||||||
repository_ctx.download_and_extract(url, sha256 = sha256, stripPrefix = stripPrefix, type = possible_extension)
|
|
||||||
else:
|
|
||||||
repository_ctx.report_progress("downloading remote %s" % src.basename)
|
repository_ctx.report_progress("downloading remote %s" % src.basename)
|
||||||
repository_ctx.download(url, src.basename, sha256 = sha256)
|
repository_ctx.download(url, src.basename, sha256 = sha256)
|
||||||
|
if extract:
|
||||||
|
for src in srcs:
|
||||||
|
repository_ctx.report_progress("extracting %s" % src.basename)
|
||||||
|
repository_ctx.extract(src.basename, stripPrefix = stripPrefix)
|
||||||
|
repository_ctx.delete(src.basename)
|
||||||
|
|
||||||
def _download_and_extract_lfs(repository_ctx):
|
def _download_and_extract_lfs(repository_ctx):
|
||||||
attr = repository_ctx.attr
|
attr = repository_ctx.attr
|
||||||
|
|
Загрузка…
Ссылка в новой задаче