Rework into a totally new comparison script.

This commit is contained in:
J.C. Jones 2020-11-02 09:34:34 -07:00
Родитель d220316aad
Коммит fb4d665669
5 изменённых файлов: 138 добавлений и 300 удалений

Просмотреть файл

@ -1,67 +1,36 @@
# Requirements
You'll need read access to the `prod` bucket and read/write access to the `staging` bucket on a suitable copy of Kinto.
Install deps with `pip install -r requirements.txt`
# Usage
## configure the yaml
```
cp config.yml.example .config.yml
vim .config.yml
```
`python3 compare.py`
## Run
```
python3 kinto-blacklist-entry-checker.py --bug 8842097
```
or
```
# Grab a changeset from BMO:
curl 'https://bug1343305.bmoattachments.org/attachment.cgi?id=8842097' > expected.txt
# and then process that changeset
python3 kinto-blacklist-entry-checker.py --expect expected.txt
```
## Example Output
```
LDAP account password for user@company:
Password:
Dataset contains 24 added and 29 deleted entries
Deleted:
['0ab1b53f-5695-4802-9a35-4de16961c10f',
'19b97966-beb9-46cf-a21b-2df51b5562ec',
'20430047-9c95-40cd-9865-912bf7dffa5b',
'2b7eba40-729e-4829-b70b-574b6074f336',
'397417ee-aa00-4da0-9bbe-6498f2afa060',
'40b9c372-91f6-4ba4-8600-3d36af7a14bd',
'47e13a94-1bbf-4e08-be63-b74dbf69491c',
'50edb292-efb2-4ac3-95c1-187686ebffdb',
'5256dadc-6cdc-4406-b33a-b1052dad1533',
'5342ffc1-b544-481e-9201-4dfdfaf08c63',
'534cb5be-6999-4590-a38f-8c7cef1ac4e8',
'57445c75-0c3b-4a37-ad46-d86a5bba22f2',
'5c33614b-1d6b-41c1-b483-e9fd39b0f205',
'669e5b07-084a-4ea9-8e02-14df475366d2',
'6b6f7eac-f719-4e85-acb6-ae994def3be0',
'6f21ffef-e1da-4abc-9711-3591c9d205e4',
'70ef0911-75b3-49ad-8c5f-b191b8cd4ce2',
'7ef66b53-9caa-4689-846d-939961ffcc97',
'894a6814-07d5-49ef-b1a0-08028447c361',
'8d4a85e9-04b1-436f-93a7-eb5053d39d93',
'8e9a1bed-323b-496c-a1c2-0208ba07576b',
'91bb20ce-49f0-4900-98ab-8874088b8af5',
'91be3663-de9b-41f9-89d3-d7c0e43e9d67',
'9682dffb-15a7-4cbf-a42c-8657a9fb21f9',
'9a0827b2-8605-4469-af6c-cc3c658282aa',
'acf9935a-f12f-4dd0-8472-3ba211cb7b89',
'b817a0bb-19a3-4f40-b5b8-1b6041045308',
'd81af35e-c9b3-4e8f-a562-6944a86d3a13',
'dce279fb-0dc6-42d7-91f5-e38222f65e2a']
Expected, but not found in Kinto:
[]
Unexpected, found in Kinto:
[]
```
[09:33:43] Stage-Stage: 1243 Stage-Preview: 1243 Stage-Published: 1240 compare.py:72
[09:33:45] Prod-Stage: 1243 Prod-Preview: 1243 Prod-Published: 1240 compare.py:78
Verifying stage against preview compare.py:84
stage/security-state-staging (1243) and stage/security-state-preview (1243) compare.py:87
are equivalent
stage/security-state-staging (1243) and prod/security-state-staging (1243) compare.py:87
are equivalent
stage/security-state-staging (1243) and prod/security-state-preview (1243) compare.py:87
are equivalent
stage/security-state-preview (1243) and prod/security-state-staging (1243) compare.py:87
are equivalent
stage/security-state-preview (1243) and prod/security-state-preview (1243) compare.py:87
are equivalent
prod/security-state-staging (1243) and prod/security-state-preview (1243) compare.py:87
are equivalent
There are 3 changes waiting: compare.py:92
Added entries
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ ┃ ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ MF0xCzAJBgNVBAYTAkpQMSUwIwYDVQQKExxTRUNPTSBUcnVzdCBTeXN0ZW1zIENPLixM… │ Irmw1g== │
│ MGAxCzAJBgNVBAYTAk5MMR4wHAYDVQQKDBVTdGFhdCBkZXIgTmVkZXJsYW5kZW4xMTAv… │ akWWZTytiy58nyLOsqpnaQ== │
│ MFAxJDAiBgNVBAsTG0dsb2JhbFNpZ24gRUNDIFJvb3QgQ0EgLSBSNTETMBEGA1UEChMK… │ Ae5fInnr9AhpWVIjkw== │
└───────────────────────────────────────────────────────────────────────┴──────────────────────────┘
```

109
compare.py Normal file
Просмотреть файл

@ -0,0 +1,109 @@
# Verify Kinto sets to ensure coherency between Stage and Prod, and all affected buckets
import requests, base64, io, itertools
from collections import UserList
from rich import console, table
console = console.Console()
base_prod="https://settings.prod.mozaws.net/v1"
base_stage="https://settings.stage.mozaws.net/v1"
bucket_staging="security-state-staging"
bucket_preview="security-state-preview"
bucket_publish="security-state"
def canonicalize(s):
return s.strip("\n ")
def make_entry(**data):
if "issuerName" in data and "serialNumber" in data:
data["issuer"] = data["issuerName"]
data["serial"] = data["serialNumber"]
if "issuer" in data and "serial" in data:
return {
"issuer": canonicalize(data['issuer']),
"serial": canonicalize(data['serial'])
}
if "pubKeyHash" in data and "subject" in data:
return {
"pubKeyHash": canonicalize(data['pubKeyHash']),
"subject": canonicalize(data['subject'])
}
raise Exception(f"Unexpected entry components {data}")
def is_equivalent(left, right):
try:
verify_equivalent(left, right)
except:
return False
return True
def verify_equivalent(left, right):
if len(left) != len(right):
raise Exception(f"{left} and {right} are not equivalent, sizes differ")
for entry in left:
if entry not in right:
raise Exception(f"{left} and {right} are not equivalent, mismatch on {entry}")
class Records(UserList):
def __init__(self, base, bucket, data):
self.base = base
self.bucket = bucket
self.data = [ make_entry(**entry) for entry in data ]
def __str__(self):
name = "stage" if "stage" in self.base else "prod"
return f"{name}/{self.bucket} ({len(self)})"
def get_records(base, bucket):
url = f"{base}/buckets/{bucket}/collections/onecrl/records"
rsp = requests.get(url)
results = rsp.json()
if 'data' in results:
return Records(base, bucket, results['data'])
raise Exception(f"Unexpected struct reading from {url}: {results}")
def main():
stage_stage = get_records(base_stage, bucket_staging)
stage_preview = get_records(base_stage, bucket_preview)
stage_publish = get_records(base_stage, bucket_publish)
console.log(f"Stage-Stage: {len(stage_stage)} Stage-Preview: {len(stage_preview)} Stage-Published: {len(stage_publish)}")
prod_stage = get_records(base_prod, bucket_staging)
prod_preview = get_records(base_prod, bucket_preview)
prod_publish = get_records(base_prod, bucket_publish)
console.log(f"Prod-Stage: {len(prod_stage)} Prod-Preview: {len(prod_preview)} Prod-Published: {len(prod_publish)}")
verify_equivalent(stage_stage, prod_stage)
verify_equivalent(stage_preview, prod_preview)
verify_equivalent(stage_publish, prod_publish)
console.log("Verifying stage against preview")
for left, right in itertools.combinations([stage_stage, stage_preview, prod_stage, prod_preview], 2):
verify_equivalent(left, right)
console.log(f"{left} and {right} are equivalent")
if is_equivalent(stage_stage, stage_publish):
console.log("No changes are waiting")
else:
console.log(f"There are {len(stage_stage)-len(stage_publish)} changes waiting:")
datatable = table.Table(title="Added entries")
for entry in stage_stage:
if entry not in stage_publish:
if "issuer" in entry:
datatable.add_row(entry["issuer"], entry["serial"])
elif "pubKeyHash" in entry:
datatable.add_row(entry["pubKeyHash"], entry["subject"])
else:
raise Exception("Unexpected entry", entry)
console.print(datatable)
if __name__ == "__main__":
main()

Просмотреть файл

@ -1,6 +0,0 @@
host: staging-writer.server.aws
stagingendpoint: /v1/buckets/staging/collections/certificates/records
prodendpoint: /v1/buckets/blocklist/collections/certificates/records
user: user@example.com
livelist: https://firefox.settings.services.mozilla.com/v1/buckets/blocklists/collections/certificates/records

Просмотреть файл

@ -1,233 +0,0 @@
# Verify Kinto sets with a delta expected list
import json, yaml, requests, sys, base64, io, codecs, re
from xml.etree import ElementTree
from collections import defaultdict
from optparse import OptionParser
from pprint import pprint
from colorama import init, Fore
def canonicalize(s):
return s.strip("\n ")
def make_entry(**data):
if "issuerName" in data and "serialNumber" in data:
data["issuer"] = data["issuerName"]
data["serial"] = data["serialNumber"]
if "issuer" in data and "serial" in data:
return f"issuer: {canonicalize(data['issuer'])} serial: {canonicalize(data['serial'])}"
if "pubKeyHash" in data and "subject" in data:
return f"pubKeyHash: {canonicalize(data['pubKeyHash'])} subject: {canonicalize(data['subject'])}"
raise Exception(f"Unexpected entry components {data}")
def entry_data_from_line(line):
parts = re.findall(r'(\S+): (\S+)', line, 0)
return dict(parts)
def find_id(dataset, ident):
for entry in dataset['data']:
if entry['id'] == ident:
return entry
return None
def gIfR(condition):
return Fore.GREEN + "[GOOD] " if condition else Fore.RED + "[BAD] "
def main():
init()
defaults = defaultdict(str)
try:
with open (".config.yml", 'r') as ymlfile:
dataset = yaml.load(ymlfile, Loader=yaml.BaseLoader)
defaults.update(dataset)
except FileNotFoundError:
print("No .config.yml; continuing without defaults.")
parser = OptionParser()
parser.add_option("-e", "--expected", dest="expected", metavar="FILE",
help="Expected input file", default=defaults['expected'])
parser.add_option("-b", "--bug", dest="bugnum", help="Bug #")
parser.add_option("-H", "--host", dest="host", help="Hostname",
default=defaults['host'])
parser.add_option("-S", "--staging-endpoint", dest="stagingendpoint", help="Path at the host for staging",
default=defaults['stagingendpoint'])
parser.add_option("-P", "--prod-endpoint", dest="prodendpoint", help="Path at the host for production",
default=defaults['prodendpoint'])
parser.add_option("-l", "--livelist", dest="livelist", help="Live blocklist",
default=defaults['livelist'])
parser.add_option("-q", "--quiet",
action="store_true", dest="verbose", default=False,
help="be more verbose to stdout")
parser.add_option("-d", "--debug",
action="store_true", dest="debug", default=False,
help="Start debugger after run")
(options, args) = parser.parse_args()
if options.expected == "" and options.bugnum is None:
print("You must specify an expected file or a bug number")
parser.print_help()
sys.exit(1)
if options.host == "":
print("You must specify a host")
parser.print_help()
sys.exit(1)
if options.stagingendpoint == "":
print("You must specify a staging endpoint")
parser.print_help()
sys.exit(1)
if options.livelist == "":
print("You must specify a livelist")
parser.print_help()
sys.exit(1)
if "-writer" not in options.host:
print("Your host needs to be the one with -writer in its name")
sys.exit(1)
expected = set()
expected_source = None
if options.expected != "":
expected_source = open(options.expected)
else:
bug_page = requests.get("https://bugzilla.mozilla.org/show_bug.cgi?ctype=xml&id={}".format(options.bugnum))
bug = ElementTree.fromstring(bug_page.content)[0]
for attachment in bug.findall("attachment"):
if attachment.attrib['isobsolete'] == '0' and "BugData.txt" in attachment.find("filename").text:
print("Downloading attachment ID {} found, dated {}".format(attachment.find("attachid").text,
attachment.find("date").text))
utfData = codecs.decode(base64.b64decode(attachment.find("data").text))
expected_source = io.StringIO(utfData)
print("Intermediates to be revoked")
with expected_source as expected_data:
for line in expected_data.readlines():
entry_data = entry_data_from_line(line)
expected.add(make_entry(**entry_data))
if options.verbose:
print("Issuer: {} Serial: {}".format(str(base64.b64decode(issuer)), base64.b16encode(base64.b64decode(serial))))
liveentries = set()
liveids = set()
livereq = requests.get(options.livelist)
livelist_dataset = livereq.json()
if 'data' not in livelist_dataset:
raise Exception("Invalid livelist, or something else. Details: {}".format(livereq.content))
for entryData in livelist_dataset['data']:
liveentries.add(make_entry(**entryData))
liveids.add(entryData['id'])
payload = {
"_sort": "-last_modified",
"_limit": 9999
}
found = set()
while not found:
print(f"We're going to need a bearer token to log into {options.host}.")
print(f"Please log in to https://{options.host}/v1/admin/ and use devtools to find the Authorization header.")
print("Example: Authorization: Bearer ZZkZZZxNZZZZ_0ZZZZ4ZZZZZNcZZZZ")
token = input("> ")
if token.startswith("Authorization: Bearer"):
token = token.split("Bearer")[1].strip()
headers = {"Authorization": "Bearer {}".format(token)}
update_url = "https://{}{}".format(options.host, options.stagingendpoint)
updatereq = requests.get(update_url, params=payload, headers=headers)
update_dataset = updatereq.json()
if 'data' not in update_dataset:
print("Invalid login, or something else. URL: {} Details: {}".format(update_url, updatereq.content))
continue
for entryData in update_dataset['data']:
found.add(make_entry(**entryData))
print("Worked, downloaded {} entries from the staging list.".format(len(found)))
prod_url = "https://{}{}".format(options.host, options.prodendpoint)
prod = set()
prodreq = requests.get(prod_url, params=payload)
prod_dataset = prodreq.json()
if 'data' not in prod_dataset:
raise Exception("Invalid login, or something else. Details: {}".format(prodreq.content))
for entryData in prod_dataset['data']:
prod.add(make_entry(**entryData))
deleted = prod-found
notfound = expected-found
missing = liveentries-(found|expected)
print("")
if options.expected != "":
print("Evaluating expected file = '{}'".format(options.expected))
else:
print("Downloading intermediates to be revoked from bug # {}".format(options.bugnum))
print("")
print("Results:")
print("Pending Kinto Dataset (Found): {}".format(len(found)))
print("Added Entries (Expected): {}".format(len(expected)))
print("{c}Expected But Not Pending (Not Found): {}".format(len(notfound), c=gIfR(len(notfound)==0)) + Fore.RESET)
print("Deleted: {}".format(len(deleted)))
print("{c}Entries In Production But Lost Without Being Deleted (Missing): {}".format(len(missing), c=gIfR(len(missing)==0)) + Fore.RESET)
print("")
if expected == found-prod:
print(Fore.GREEN + "[GOOD] The Expected file matches the change between the staged Kinto and production." + Fore.RESET)
else:
print(Fore.RED + "[BAD] The Expected file doesn't match; there are {} differences seen, but '{}' is {} entries long".format(len(found-prod), options.expected, len(expected)) + Fore.RESET)
if liveentries | expected == found:
print(Fore.GREEN + "[GOOD] The Kinto dataset found at production equals the union of the expected file and the live list." + Fore.RESET)
else:
print(Fore.RED + "[BAD] The Kinto dataset is not equal to the union of the live list and the expected file. Differences follow." + Fore.RESET)
for entry in found - (liveentries | expected):
print(Fore.RED + " * {}".format(entry) + Fore.RESET)
if len(notfound) > 0:
print(Fore.RED + "[BAD] Expected, but not found in Kinto:" + Fore.RESET)
pprint(sorted(notfound))
else:
print("Nothing not found.")
if len(deleted) > 0:
if deleted == missing:
for deletedEntry in deleted:
seen = False
for entryData in prod_dataset['data']:
if deletedEntry == make_entry(**entryData):
seen = True
print("Deleted ID: {} Serial: {}".format(entryData['id'], entryData['serialNumber']))
break
if not seen:
print(Fore.RED + "[BAD] Deleted Entry: {}".format(deletedEntry) + Fore.RESET)
raise("Missing entry?")
print(Fore.GREEN + "[GOOD] The missing entries {} are all deleted.".format(len(deleted)) + Fore.RESET)
else:
print(Fore.RED + "[BAD] Found live, but missing in Kinto:" + Fore.RESET)
pprint(sorted(missing))
else:
print("Nothing deleted.")
if options.debug:
print("Variables available:")
print(" prod - located in Kinto production")
print(" expected - from the file on disk")
print(" found - located in Kinto changeset")
print(" liveentries - located in the live CDN list")
import pdb; pdb.set_trace()
if __name__ == "__main__":
main()

Просмотреть файл

@ -1,3 +1,2 @@
requests
pyyaml
colorama
rich