This commit is contained in:
Magnus Edenhill 2021-05-04 13:14:03 +02:00
Родитель 825865f993
Коммит 6c12e4cca6
2 изменённых файлов: 152 добавлений и 0 удалений

Просмотреть файл

@ -60,3 +60,14 @@ To create a bundle (tarball) of librdkafka self-contained static library
builds, use the following command:
$ ./release.py --class StaticPackage v1.1.0
### Clean up S3 bucket
To clean up old non-release/non-RC builds from the S3 bucket, first check with:
$ AWS_PROFILE=.. ./cleanup-s3.py --age 360
Verify that the listed objects should really be deleted, then delete:
$ AWS_PROFILE=.. ./cleanup-s3.py --age 360 --delete

141
packaging/nuget/cleanup-s3.py Executable file
Просмотреть файл

@ -0,0 +1,141 @@
#!/usr/bin/env python3
#
# Clean up test builds from librdkafka's S3 bucket.
# This also covers python builds.
import re
import os
from datetime import datetime, timezone
import boto3
import argparse
# Collects CI artifacts from S3 storage, downloading them
# to a local directory, or collecting already downloaded artifacts from
# local directory.
#
# The artifacts' folder in the S3 bucket must have the following token
# format:
# <token>-[<value>]__ (repeat)
#
# Recognized tokens (unrecognized tokens are ignored):
# p - project (e.g., "confluent-kafka-python")
# bld - builder (e.g., "travis")
# plat - platform ("osx", "linux", ..)
# arch - arch ("x64", ..)
# tag - git tag
# sha - git sha
# bid - builder's build-id
# bldtype - Release, Debug (appveyor)
# lnk - std, static
#
# Example:
# librdkafka/p-librdkafka__bld-travis__plat-linux__arch-x64__tag-v0.0.62__sha-d051b2c19eb0c118991cd8bc5cf86d8e5e446cde__bid-1562.1/librdkafka.tar.gz
s3_bucket = 'librdkafka-ci-packages'
def may_delete(path):
""" Returns true if S3 object path is eligible for deletion, e.g.
has a non-release/rc tag. """
# The path contains the tokens needed to perform
# matching of project, gitref, etc.
rinfo = re.findall(r'(?P<tag>[^-]+)-(?P<val>.*?)(?:__|$)', path)
if rinfo is None or len(rinfo) == 0:
print(f"Incorrect folder/file name format for {path}")
return False
info = dict(rinfo)
tag = info.get('tag', None)
if tag is not None and (len(tag) == 0 or tag.startswith('$(')):
# AppVeyor doesn't substite $(APPVEYOR_REPO_TAG_NAME)
# with an empty value when not set, it leaves that token
# in the string - so translate that to no tag.
del info['tag']
tag = None
if tag is None:
return True
if re.match(r'^v?\d+\.\d+\.\d+(-?RC\d+)?$', tag, flags=re.IGNORECASE) is None:
return True
return False
def collect_s3(s3, min_age_days=60):
""" Collect artifacts from S3 """
now = datetime.now(timezone.utc)
eligible = []
totcnt = 0
# note: list_objects will return at most 1000 objects per call,
# use continuation token to read full list.
cont_token = None
more = True
while more:
if cont_token is not None:
res = s3.list_objects_v2(Bucket=s3_bucket,
ContinuationToken=cont_token)
else:
res = s3.list_objects_v2(Bucket=s3_bucket)
if res.get('IsTruncated') == True:
cont_token = res.get('NextContinuationToken')
else:
more = False
for item in res.get('Contents'):
totcnt += 1
age = (now - item.get('LastModified')).days
path = item.get('Key')
if age >= min_age_days and may_delete(path):
eligible.append(path)
return (eligible, totcnt)
def chunk_list(lst, cnt):
""" Split list into lists of cnt """
for i in range(0, len(lst), cnt):
yield lst[i:i + cnt]
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--delete",
help="WARNING! Don't just check, actually delete S3 objects.",
action="store_true")
parser.add_argument("--age", help="Minimum object age in days.",
type=int, default=360)
args = parser.parse_args()
dry_run = args.delete is not True
min_age_days = args.age
if dry_run:
op = "Eligible for deletion"
else:
op = "Deleting"
s3 = boto3.client('s3')
# Collect eligible artifacts
eligible, totcnt = collect_s3(s3, min_age_days=min_age_days)
print(f"{len(eligible)}/{totcnt} eligible artifacts to delete")
# Delete in chunks of 1000 (max what the S3 API can do)
for chunk in chunk_list(eligible, 1000):
print(op + ":\n" + '\n'.join(chunk))
if dry_run:
continue
res = s3.delete_objects(Bucket=s3_bucket,
Delete={
'Objects': [{'Key': x} for x in chunk],
'Quiet': True
})
errors = res.get('Errors', [])
if len(errors) > 0:
raise Exception(f"Delete failed: {errors}")