This commit is contained in:
Christiane Ruetten 2019-03-01 18:30:00 +01:00 коммит произвёл Christiane Ruetten
Родитель 00f0152c2c
Коммит 3d76a07a12
3 изменённых файлов: 386 добавлений и 9 удалений

Просмотреть файл

@ -5,7 +5,7 @@
from setuptools import setup, find_packages
PACKAGE_NAME = 'tlscanary'
PACKAGE_VERSION = '3.3.0a4'
PACKAGE_VERSION = '3.3.0a6'
INSTALL_REQUIRES = [
'coloredlogs',
@ -15,14 +15,12 @@ INSTALL_REQUIRES = [
'worq'
]
TESTS_REQUIRE = [
'coverage',
'pytest',
'pytest-pycodestyle',
'pytest-runner'
SCHEDULER_REQUIRES = [
'matplotlib',
'schedule'
]
DEV_REQUIRES = [
TESTS_REQUIRE = [
'coverage',
'pycodestyle',
'pytest',
@ -30,6 +28,8 @@ DEV_REQUIRES = [
'pytest-runner'
]
DEV_REQUIRES = TESTS_REQUIRE + SCHEDULER_REQUIRES
setup(
name=PACKAGE_NAME,
version=PACKAGE_VERSION,
@ -63,10 +63,14 @@ setup(
zip_safe=False,
install_requires=INSTALL_REQUIRES,
tests_require=TESTS_REQUIRE,
extras_require={'dev': DEV_REQUIRES}, # For `pip install -e .[dev]`
extras_require={
'dev': DEV_REQUIRES, # For `pip install -e .[dev]`
'scheduler': SCHEDULER_REQUIRES # For `pip install -e .[scheduler]`
},
entry_points={
'console_scripts': [
'tlscanary = tlscanary.main:main'
'tlscanary = tlscanary.main:main',
'tlscscheduler = tlscanary.scheduler.main:main'
]
}
)

364
tlscanary/scheduler/main.py Executable file
Просмотреть файл

@ -0,0 +1,364 @@
#!/usr/bin/env python3
# -*- coding: utf8 -*-
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
# You can obtain one at http://mozilla.org/MPL/2.0/.
import coloredlogs
import json
import logging
from multiprocessing import Pool
import os
import schedule
from subprocess import check_output, CalledProcessError, run
import sys
import time
# Load Matplotlib in "headless" mode to prevent carnage
from . import matplotlib_agg as matplotlib
from matplotlib.dates import datestr2num, DateFormatter
import matplotlib.pyplot as plt
# Initialize coloredlogs
logging.Formatter.converter = time.gmtime
logger = logging.getLogger(__name__)
coloredlogs.DEFAULT_LOG_FORMAT = "%(asctime)s %(levelname)s %(threadName)s %(name)s %(message)s"
coloredlogs.install(level="INFO")
def list_logs(tlscanary: str, tag: str = "all") -> list:
cmd = [tlscanary, "log", "-i", tag, "-e", "incomplete", "-e", "incompatible"]
logger.info("Running `%s`" % " ".join(cmd))
log_output = check_output(cmd).decode("utf-8").split("\n")[:-1]
# Log reference is in leftmost column of output
return list(sorted(map(lambda line: line.split("\t")[0], log_output)))
def get_log(tlscanary: str, ref: str) -> dict:
cmd = [tlscanary, "log", "-a", "json", "-i", str(ref)]
# Retries are necessary as EC2 instances are running into spurious BrokenPipe errors
# when spawning tlscanary subprocesses, likely due to memory underruns.
retries = 5
out = None
while out is None and retries > 0:
try:
logger.debug("Running `%s`" % " ".join(cmd))
out = check_output(cmd).decode("utf-8")
except CalledProcessError:
logger.warning("Retrying failed command `%s`" % " ".join(cmd))
retries -= 1
if out is None:
logger.critical("Giving up on retrying command `%s`" % " ".join(cmd))
raise Exception("Number of retries exceeded")
return json.loads(out)
def process_log(log: dict, mode: str):
if len(log) == 0:
raise Exception("Empty log")
set_size = int(log[0]["meta"]["sources_size"])
timestamp = datestr2num(log[0]["meta"]["run_finish_time"])
# Extract filtered list of affected hosts and ranks
carnage = []
for log_data in log[0]["data"]:
if mode == "symantec":
# Old way of counting stopped working once NSS changes removed short error message
# if "short_error_message" in l["response"]["result"]["info"]:
# sm = l["response"]["result"]["info"]["short_error_message"]
# if sm == "SEC_ERROR_UNKNOWN_ISSUER" or sm == "MOZILLA_PKIX_ERROR_ADDITIONAL_POLICY_CONSTRAINT_FAILED":
# errors += 1
# New way of counting is solely filtering by ssl status code
status = log_data["response"]["result"]["info"]["status"]
if status == 2153398259 or status == 2153390067:
carnage.append((int(log_data["rank"]), log_data["host"]))
elif mode == "tlsdeprecation":
status = log_data["response"]["result"]["info"]["short_error_message"]
if status == "SSL_ERROR_UNSUPPORTED_VERSION":
carnage.append((int(log_data["rank"]), log_data["host"]))
else:
raise Exception("Unknown log processing mode: %s" % mode)
carnage = list(sorted(carnage))
# Count numbers for each subset
counts = {}
for key in (100, 1000, 10000, 100000, 1000000):
counts[str(key)] = 0
for rank, _ in carnage:
for key in (100, 1000, 10000, 100000, 1000000):
if rank <= key:
counts[str(key)] += 1
return timestamp, set_size, carnage, counts
class SymantecJob(object):
def __init__(self, tlscanary_binary: str, plot_file: str):
self.log_tag = "symantec"
self.tlscanary_binary = tlscanary_binary
self.plot_file = plot_file
def run(self):
logger.info("Starting Symantec scan")
self.scan()
logger.info("Processing Symantec logs")
data = self.process_logs()
logger.info("Writing Symantec plot to `%s`" % self.plot_file)
self.generate_plot(data)
logger.info("Completed Symantec job")
def scan(self):
cmd = [self.tlscanary_binary,
"regression", "-r",
"-t", "beta",
"-p1", "security.pki.distrust_ca_policy;2",
"-b", "beta",
"-p2", "security.pki.distrust_ca_policy;0"
]
logger.info("Running `%s`" % " ".join(cmd))
run(cmd, check=True)
cmd = [self.tlscanary_binary, "log", "-i", "1", "-a", "addtag", "-t", self.log_tag]
logger.info("Running `%s`" % " ".join(cmd))
run(cmd, check=True)
def process_log(self, ref: str):
timestamp, set_size, _, counts = process_log(get_log(self.tlscanary_binary, ref), mode=self.log_tag)
return timestamp, set_size, counts
def process_logs(self) -> dict:
# with open("/home/ubuntu/http/broken-%s.csv" % key, "w") as f:
# f.writelines(["%d,%s\n" % x for x in sorted(carnage)])
data = {}
p = Pool()
work_list = list_logs(self.tlscanary_binary, tag=self.log_tag)
results = p.imap_unordered(self.process_log, work_list)
# Add static data compiled by matt
data["1000000"] = {
"timestamps": [
datestr2num("2018-05-22"),
datestr2num("2018-06-26"),
datestr2num("2018-07-17"),
datestr2num("2018-08-15")
],
"values": [
# original set_size was 496833 hosts
100.0 * 35066 / 1000000,
100.0 * 27102 / 1000000,
100.0 * 23197 / 1000000,
100.0 * 17833 / 1000000
]
}
for timestamp, _, counts in sorted(results):
for tier in counts.keys():
# if tier == "100000":
# continue
value = 100.0 * counts[tier] / int(tier)
if tier not in data:
data[tier] = {"timestamps": [timestamp], "values": [value]}
else:
data[tier]["timestamps"].append(timestamp)
data[tier]["values"].append(value)
return data
def generate_plot(self, data: dict):
# Tips about matplotlib styling:
# http://messymind.net/making-matplotlib-look-like-ggplot/
fig, ax = plt.subplots(figsize=(9, 5))
fig.suptitle("Percentage of Symantec regressions", fontsize=14)
# https://matplotlib.org/users/colormaps.html
cmap = plt.cm.get_cmap('tab10', 10)
for i, k in enumerate(sorted(data.keys())):
logger.debug(i, k, data[k])
ax.plot_date(data[k]["timestamps"], data[k]["values"],
color=cmap(i), linestyle='-', markersize=0, label="Top %s" % k)
ax.grid(True, 'major', color='1.0', linestyle='-', linewidth=0.7)
ax.grid(True, 'minor', color='0.95', linestyle='-', linewidth=0.5)
# Remove outer box
for child in ax.get_children():
if isinstance(child, matplotlib.spines.Spine):
child.set_alpha(0)
ax.patch.set_facecolor('0.92')
ax.set_axisbelow(True)
ax.set_ylim(bottom=0)
ax.legend(loc='upper left')
ax.legend_.get_frame().set_linewidth(0)
ax.legend_.get_frame().set_alpha(0.5)
# ax.xaxis.set_major_formatter(DateFormatter('%H:%M:%S'))
plt.gcf().autofmt_xdate()
plt.savefig(self.plot_file)
# plt.show()
plt.close()
class TLSDeprecationJob(object):
def __init__(self, tlscanary_binary, plot_file):
self.log_tag = "tlsdeprecation"
self.tlscanary_binary = tlscanary_binary
self.plot_file = plot_file
def run(self):
logger.info("Starting TLS Deprecation scan")
self.scan()
logger.info("Processing TLS Deprecation logs")
data = self.process_logs()
logger.info("Writing TLS Deprecation plot to `%s`" % self.plot_file)
self.generate_plot(data)
logger.info("Completed TLS Deprecation job")
def scan(self):
cmd = [self.tlscanary_binary,
"regression", "-r",
"-t", "beta",
"-p1", "security.tls.version.min;3",
"-b", "beta",
"-p2", "security.tls.version.min;1"
]
logger.info("Running `%s`" % " ".join(cmd))
run(cmd, check=True)
cmd = [self.tlscanary_binary, "log", "-i", "1", "-a", "addtag", "-t", self.log_tag]
logger.info("Running `%s`" % " ".join(cmd))
run(cmd, check=True)
def process_log(self, ref: str):
timestamp, set_size, _, counts = process_log(get_log(self.tlscanary_binary, ref), mode=self.log_tag)
return timestamp, set_size, counts
def process_logs(self) -> dict:
# with open("/home/ubuntu/http/broken-%s.csv" % key, "w") as f:
# f.writelines(["%d,%s\n" % x for x in sorted(carnage)])
data = {}
p = Pool()
work_list = list_logs(self.tlscanary_binary, tag=self.log_tag)
results = p.imap_unordered(self.process_log, work_list)
for timestamp, _, counts in sorted(results):
for tier in counts.keys():
# if tier == "100000":
# continue
value = 100.0 * counts[tier] / int(tier)
if tier not in data:
data[tier] = {"timestamps": [timestamp], "values": [value]}
else:
data[tier]["timestamps"].append(timestamp)
data[tier]["values"].append(value)
return data
def generate_plot(self, data: dict):
# Tips about matplotlib styling:
# http://messymind.net/making-matplotlib-look-like-ggplot/
fig, ax = plt.subplots(figsize=(9, 5))
fig.suptitle("TLS 1.1 Deprecation Regressions (%)", fontsize=14)
# https://matplotlib.org/users/colormaps.html
cmap = plt.cm.get_cmap('tab10', 10)
for i, k in enumerate(sorted(data.keys())):
logger.debug(i, k, data[k])
ax.plot_date(data[k]["timestamps"], data[k]["values"],
color=cmap(i), linestyle='-', markersize=0, label="Top %s" % k)
ax.grid(True, 'major', color='1.0', linestyle='-', linewidth=0.7)
ax.grid(True, 'minor', color='0.95', linestyle='-', linewidth=0.5)
# Remove outer box
for child in ax.get_children():
if isinstance(child, matplotlib.spines.Spine):
child.set_alpha(0)
ax.patch.set_facecolor('0.92')
ax.set_axisbelow(True)
ax.set_ylim(bottom=0)
ax.legend(loc='upper left')
ax.legend_.get_frame().set_linewidth(0)
ax.legend_.get_frame().set_alpha(0.5)
# ax.xaxis.set_major_formatter(DateFormatter('%H:%M:%S'))
plt.gcf().autofmt_xdate()
logger.info("Writing TLS Deprecation plot to `%s`" % self.plot_file)
plt.savefig(self.plot_file)
# plt.show()
plt.close()
class SrcUpdateJob(object):
def __init__(self, tlscanary_binary):
self.tlscanary_binary = tlscanary_binary
def run(self):
logger.info("Updating sources")
# Setting distrust policy to 0 as long as we need to capture hosts for Symantec regressions
cmd = [self.tlscanary_binary, "srcupdate", "-p", "security.pki.distrust_ca_policy;0"]
logger.info("Running `%s`" % " ".join(cmd))
run(cmd, check=True)
logger.info("Completed source update job")
class Scheduler(object):
@staticmethod
def log_alive():
logger.info("Scheduler is alive")
@staticmethod
def run(output_directory):
tlscanary_binary = "tlscanary"
output_directory = os.path.abspath(output_directory)
logger.info("Output directory is `%s`" % output_directory)
symantec = SymantecJob(tlscanary_binary, os.path.join(output_directory, "symantec.svg"))
tlsdeprecation = TLSDeprecationJob(tlscanary_binary, os.path.join(output_directory, "tlsdeprecation.svg"))
srcupdate = SrcUpdateJob(tlscanary_binary)
# ####################################################################
# Here's the the actual schedule #####################################
# ####################################################################
schedule.every(60).minutes.do(Scheduler.log_alive)
schedule.every().monday.at("00:00").do(symantec.run)
schedule.every().wednesday.at("00:00").do(tlsdeprecation.run)
schedule.every().saturday.at("00:00").do(srcupdate.run)
try:
while True:
schedule.run_pending()
time.sleep(1)
except KeyboardInterrupt:
logger.critical("\nUser interrupt. Quitting...")
return 10
def main() -> int:
output_directory = sys.argv[1]
return Scheduler.run(output_directory)
if __name__ == "__main__":
sys.exit(main())

Просмотреть файл

@ -0,0 +1,9 @@
# This is just a hack to avoid pycodestyle complaining about
# E402 module level import not at top of file
# when loading matplotlib with Agg backend, which has to be
# procedurally configured after importing matplotlib and before
# mporting matplotlib.pyplot.
# Thanks to https://stackoverflow.com/questions/39305810
import matplotlib
matplotlib.use('Agg')