Initial commit of the HTTP Observatory
This commit is contained in:
Коммит
231d4841eb
|
@ -0,0 +1,3 @@
|
|||
.idea
|
||||
.pyc
|
||||
__pycache__
|
|
@ -0,0 +1,362 @@
|
|||
Mozilla Public License, version 2.0
|
||||
|
||||
1. Definitions
|
||||
|
||||
1.1. "Contributor"
|
||||
|
||||
means each individual or legal entity that creates, contributes to the
|
||||
creation of, or owns Covered Software.
|
||||
|
||||
1.2. "Contributor Version"
|
||||
|
||||
means the combination of the Contributions of others (if any) used by a
|
||||
Contributor and that particular Contributor's Contribution.
|
||||
|
||||
1.3. "Contribution"
|
||||
|
||||
means Covered Software of a particular Contributor.
|
||||
|
||||
1.4. "Covered Software"
|
||||
|
||||
means Source Code Form to which the initial Contributor has attached the
|
||||
notice in Exhibit A, the Executable Form of such Source Code Form, and
|
||||
Modifications of such Source Code Form, in each case including portions
|
||||
thereof.
|
||||
|
||||
1.5. "Incompatible With Secondary Licenses"
|
||||
means
|
||||
|
||||
a. that the initial Contributor has attached the notice described in
|
||||
Exhibit B to the Covered Software; or
|
||||
|
||||
b. that the Covered Software was made available under the terms of
|
||||
version 1.1 or earlier of the License, but not also under the terms of
|
||||
a Secondary License.
|
||||
|
||||
1.6. "Executable Form"
|
||||
|
||||
means any form of the work other than Source Code Form.
|
||||
|
||||
1.7. "Larger Work"
|
||||
|
||||
means a work that combines Covered Software with other material, in a
|
||||
separate file or files, that is not Covered Software.
|
||||
|
||||
1.8. "License"
|
||||
|
||||
means this document.
|
||||
|
||||
1.9. "Licensable"
|
||||
|
||||
means having the right to grant, to the maximum extent possible, whether
|
||||
at the time of the initial grant or subsequently, any and all of the
|
||||
rights conveyed by this License.
|
||||
|
||||
1.10. "Modifications"
|
||||
|
||||
means any of the following:
|
||||
|
||||
a. any file in Source Code Form that results from an addition to,
|
||||
deletion from, or modification of the contents of Covered Software; or
|
||||
|
||||
b. any new file in Source Code Form that contains any Covered Software.
|
||||
|
||||
1.11. "Patent Claims" of a Contributor
|
||||
|
||||
means any patent claim(s), including without limitation, method,
|
||||
process, and apparatus claims, in any patent Licensable by such
|
||||
Contributor that would be infringed, but for the grant of the License,
|
||||
by the making, using, selling, offering for sale, having made, import,
|
||||
or transfer of either its Contributions or its Contributor Version.
|
||||
|
||||
1.12. "Secondary License"
|
||||
|
||||
means either the GNU General Public License, Version 2.0, the GNU Lesser
|
||||
General Public License, Version 2.1, the GNU Affero General Public
|
||||
License, Version 3.0, or any later versions of those licenses.
|
||||
|
||||
1.13. "Source Code Form"
|
||||
|
||||
means the form of the work preferred for making modifications.
|
||||
|
||||
1.14. "You" (or "Your")
|
||||
|
||||
means an individual or a legal entity exercising rights under this
|
||||
License. For legal entities, "You" includes any entity that controls, is
|
||||
controlled by, or is under common control with You. For purposes of this
|
||||
definition, "control" means (a) the power, direct or indirect, to cause
|
||||
the direction or management of such entity, whether by contract or
|
||||
otherwise, or (b) ownership of more than fifty percent (50%) of the
|
||||
outstanding shares or beneficial ownership of such entity.
|
||||
|
||||
|
||||
2. License Grants and Conditions
|
||||
|
||||
2.1. Grants
|
||||
|
||||
Each Contributor hereby grants You a world-wide, royalty-free,
|
||||
non-exclusive license:
|
||||
|
||||
a. under intellectual property rights (other than patent or trademark)
|
||||
Licensable by such Contributor to use, reproduce, make available,
|
||||
modify, display, perform, distribute, and otherwise exploit its
|
||||
Contributions, either on an unmodified basis, with Modifications, or
|
||||
as part of a Larger Work; and
|
||||
|
||||
b. under Patent Claims of such Contributor to make, use, sell, offer for
|
||||
sale, have made, import, and otherwise transfer either its
|
||||
Contributions or its Contributor Version.
|
||||
|
||||
2.2. Effective Date
|
||||
|
||||
The licenses granted in Section 2.1 with respect to any Contribution
|
||||
become effective for each Contribution on the date the Contributor first
|
||||
distributes such Contribution.
|
||||
|
||||
2.3. Limitations on Grant Scope
|
||||
|
||||
The licenses granted in this Section 2 are the only rights granted under
|
||||
this License. No additional rights or licenses will be implied from the
|
||||
distribution or licensing of Covered Software under this License.
|
||||
Notwithstanding Section 2.1(b) above, no patent license is granted by a
|
||||
Contributor:
|
||||
|
||||
a. for any code that a Contributor has removed from Covered Software; or
|
||||
|
||||
b. for infringements caused by: (i) Your and any other third party's
|
||||
modifications of Covered Software, or (ii) the combination of its
|
||||
Contributions with other software (except as part of its Contributor
|
||||
Version); or
|
||||
|
||||
c. under Patent Claims infringed by Covered Software in the absence of
|
||||
its Contributions.
|
||||
|
||||
This License does not grant any rights in the trademarks, service marks,
|
||||
or logos of any Contributor (except as may be necessary to comply with
|
||||
the notice requirements in Section 3.4).
|
||||
|
||||
2.4. Subsequent Licenses
|
||||
|
||||
No Contributor makes additional grants as a result of Your choice to
|
||||
distribute the Covered Software under a subsequent version of this
|
||||
License (see Section 10.2) or under the terms of a Secondary License (if
|
||||
permitted under the terms of Section 3.3).
|
||||
|
||||
2.5. Representation
|
||||
|
||||
Each Contributor represents that the Contributor believes its
|
||||
Contributions are its original creation(s) or it has sufficient rights to
|
||||
grant the rights to its Contributions conveyed by this License.
|
||||
|
||||
2.6. Fair Use
|
||||
|
||||
This License is not intended to limit any rights You have under
|
||||
applicable copyright doctrines of fair use, fair dealing, or other
|
||||
equivalents.
|
||||
|
||||
2.7. Conditions
|
||||
|
||||
Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in
|
||||
Section 2.1.
|
||||
|
||||
|
||||
3. Responsibilities
|
||||
|
||||
3.1. Distribution of Source Form
|
||||
|
||||
All distribution of Covered Software in Source Code Form, including any
|
||||
Modifications that You create or to which You contribute, must be under
|
||||
the terms of this License. You must inform recipients that the Source
|
||||
Code Form of the Covered Software is governed by the terms of this
|
||||
License, and how they can obtain a copy of this License. You may not
|
||||
attempt to alter or restrict the recipients' rights in the Source Code
|
||||
Form.
|
||||
|
||||
3.2. Distribution of Executable Form
|
||||
|
||||
If You distribute Covered Software in Executable Form then:
|
||||
|
||||
a. such Covered Software must also be made available in Source Code Form,
|
||||
as described in Section 3.1, and You must inform recipients of the
|
||||
Executable Form how they can obtain a copy of such Source Code Form by
|
||||
reasonable means in a timely manner, at a charge no more than the cost
|
||||
of distribution to the recipient; and
|
||||
|
||||
b. You may distribute such Executable Form under the terms of this
|
||||
License, or sublicense it under different terms, provided that the
|
||||
license for the Executable Form does not attempt to limit or alter the
|
||||
recipients' rights in the Source Code Form under this License.
|
||||
|
||||
3.3. Distribution of a Larger Work
|
||||
|
||||
You may create and distribute a Larger Work under terms of Your choice,
|
||||
provided that You also comply with the requirements of this License for
|
||||
the Covered Software. If the Larger Work is a combination of Covered
|
||||
Software with a work governed by one or more Secondary Licenses, and the
|
||||
Covered Software is not Incompatible With Secondary Licenses, this
|
||||
License permits You to additionally distribute such Covered Software
|
||||
under the terms of such Secondary License(s), so that the recipient of
|
||||
the Larger Work may, at their option, further distribute the Covered
|
||||
Software under the terms of either this License or such Secondary
|
||||
License(s).
|
||||
|
||||
3.4. Notices
|
||||
|
||||
You may not remove or alter the substance of any license notices
|
||||
(including copyright notices, patent notices, disclaimers of warranty, or
|
||||
limitations of liability) contained within the Source Code Form of the
|
||||
Covered Software, except that You may alter any license notices to the
|
||||
extent required to remedy known factual inaccuracies.
|
||||
|
||||
3.5. Application of Additional Terms
|
||||
|
||||
You may choose to offer, and to charge a fee for, warranty, support,
|
||||
indemnity or liability obligations to one or more recipients of Covered
|
||||
Software. However, You may do so only on Your own behalf, and not on
|
||||
behalf of any Contributor. You must make it absolutely clear that any
|
||||
such warranty, support, indemnity, or liability obligation is offered by
|
||||
You alone, and You hereby agree to indemnify every Contributor for any
|
||||
liability incurred by such Contributor as a result of warranty, support,
|
||||
indemnity or liability terms You offer. You may include additional
|
||||
disclaimers of warranty and limitations of liability specific to any
|
||||
jurisdiction.
|
||||
|
||||
4. Inability to Comply Due to Statute or Regulation
|
||||
|
||||
If it is impossible for You to comply with any of the terms of this License
|
||||
with respect to some or all of the Covered Software due to statute,
|
||||
judicial order, or regulation then You must: (a) comply with the terms of
|
||||
this License to the maximum extent possible; and (b) describe the
|
||||
limitations and the code they affect. Such description must be placed in a
|
||||
text file included with all distributions of the Covered Software under
|
||||
this License. Except to the extent prohibited by statute or regulation,
|
||||
such description must be sufficiently detailed for a recipient of ordinary
|
||||
skill to be able to understand it.
|
||||
|
||||
5. Termination
|
||||
|
||||
5.1. The rights granted under this License will terminate automatically if You
|
||||
fail to comply with any of its terms. However, if You become compliant,
|
||||
then the rights granted under this License from a particular Contributor
|
||||
are reinstated (a) provisionally, unless and until such Contributor
|
||||
explicitly and finally terminates Your grants, and (b) on an ongoing
|
||||
basis, if such Contributor fails to notify You of the non-compliance by
|
||||
some reasonable means prior to 60 days after You have come back into
|
||||
compliance. Moreover, Your grants from a particular Contributor are
|
||||
reinstated on an ongoing basis if such Contributor notifies You of the
|
||||
non-compliance by some reasonable means, this is the first time You have
|
||||
received notice of non-compliance with this License from such
|
||||
Contributor, and You become compliant prior to 30 days after Your receipt
|
||||
of the notice.
|
||||
|
||||
5.2. If You initiate litigation against any entity by asserting a patent
|
||||
infringement claim (excluding declaratory judgment actions,
|
||||
counter-claims, and cross-claims) alleging that a Contributor Version
|
||||
directly or indirectly infringes any patent, then the rights granted to
|
||||
You by any and all Contributors for the Covered Software under Section
|
||||
2.1 of this License shall terminate.
|
||||
|
||||
5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user
|
||||
license agreements (excluding distributors and resellers) which have been
|
||||
validly granted by You or Your distributors under this License prior to
|
||||
termination shall survive termination.
|
||||
|
||||
6. Disclaimer of Warranty
|
||||
|
||||
Covered Software is provided under this License on an "as is" basis,
|
||||
without warranty of any kind, either expressed, implied, or statutory,
|
||||
including, without limitation, warranties that the Covered Software is free
|
||||
of defects, merchantable, fit for a particular purpose or non-infringing.
|
||||
The entire risk as to the quality and performance of the Covered Software
|
||||
is with You. Should any Covered Software prove defective in any respect,
|
||||
You (not any Contributor) assume the cost of any necessary servicing,
|
||||
repair, or correction. This disclaimer of warranty constitutes an essential
|
||||
part of this License. No use of any Covered Software is authorized under
|
||||
this License except under this disclaimer.
|
||||
|
||||
7. Limitation of Liability
|
||||
|
||||
Under no circumstances and under no legal theory, whether tort (including
|
||||
negligence), contract, or otherwise, shall any Contributor, or anyone who
|
||||
distributes Covered Software as permitted above, be liable to You for any
|
||||
direct, indirect, special, incidental, or consequential damages of any
|
||||
character including, without limitation, damages for lost profits, loss of
|
||||
goodwill, work stoppage, computer failure or malfunction, or any and all
|
||||
other commercial damages or losses, even if such party shall have been
|
||||
informed of the possibility of such damages. This limitation of liability
|
||||
shall not apply to liability for death or personal injury resulting from
|
||||
such party's negligence to the extent applicable law prohibits such
|
||||
limitation. Some jurisdictions do not allow the exclusion or limitation of
|
||||
incidental or consequential damages, so this exclusion and limitation may
|
||||
not apply to You.
|
||||
|
||||
8. Litigation
|
||||
|
||||
Any litigation relating to this License may be brought only in the courts
|
||||
of a jurisdiction where the defendant maintains its principal place of
|
||||
business and such litigation shall be governed by laws of that
|
||||
jurisdiction, without reference to its conflict-of-law provisions. Nothing
|
||||
in this Section shall prevent a party's ability to bring cross-claims or
|
||||
counter-claims.
|
||||
|
||||
9. Miscellaneous
|
||||
|
||||
This License represents the complete agreement concerning the subject
|
||||
matter hereof. If any provision of this License is held to be
|
||||
unenforceable, such provision shall be reformed only to the extent
|
||||
necessary to make it enforceable. Any law or regulation which provides that
|
||||
the language of a contract shall be construed against the drafter shall not
|
||||
be used to construe this License against a Contributor.
|
||||
|
||||
|
||||
10. Versions of the License
|
||||
|
||||
10.1. New Versions
|
||||
|
||||
Mozilla Foundation is the license steward. Except as provided in Section
|
||||
10.3, no one other than the license steward has the right to modify or
|
||||
publish new versions of this License. Each version will be given a
|
||||
distinguishing version number.
|
||||
|
||||
10.2. Effect of New Versions
|
||||
|
||||
You may distribute the Covered Software under the terms of the version
|
||||
of the License under which You originally received the Covered Software,
|
||||
or under the terms of any subsequent version published by the license
|
||||
steward.
|
||||
|
||||
10.3. Modified Versions
|
||||
|
||||
If you create software not governed by this License, and you want to
|
||||
create a new license for such software, you may create and use a
|
||||
modified version of this License if you rename the license and remove
|
||||
any references to the name of the license steward (except to note that
|
||||
such modified license differs from this License).
|
||||
|
||||
10.4. Distributing Source Code Form that is Incompatible With Secondary
|
||||
Licenses If You choose to distribute Source Code Form that is
|
||||
Incompatible With Secondary Licenses under the terms of this version of
|
||||
the License, the notice described in Exhibit B of this License must be
|
||||
attached.
|
||||
|
||||
Exhibit A - Source Code Form License Notice
|
||||
|
||||
This Source Code Form is subject to the
|
||||
terms of the Mozilla Public License, v.
|
||||
2.0. If a copy of the MPL was not
|
||||
distributed with this file, You can
|
||||
obtain one at
|
||||
http://mozilla.org/MPL/2.0/.
|
||||
|
||||
If it is not possible or desirable to put the notice in a particular file,
|
||||
then You may include the notice in a location (such as a LICENSE file in a
|
||||
relevant directory) where a recipient would be likely to look for such a
|
||||
notice.
|
||||
|
||||
You may add additional accurate notices of copyright ownership.
|
||||
|
||||
Exhibit B - "Incompatible With Secondary Licenses" Notice
|
||||
|
||||
This Source Code Form is "Incompatible
|
||||
With Secondary Licenses", as defined by
|
||||
the Mozilla Public License, v. 2.0.
|
|
@ -0,0 +1,11 @@
|
|||
# Mozilla HTTP Observatory
|
||||
|
||||
A set of tools to scan your websites for basic web hygeine. This project is undergoing heavy development and is not yet suitable for production.
|
||||
|
||||
## Authors
|
||||
|
||||
* April King
|
||||
|
||||
## License
|
||||
|
||||
* Mozilla Public License Version 2.0
|
|
@ -0,0 +1 @@
|
|||
data
|
|
@ -0,0 +1,11 @@
|
|||
from .database import *
|
||||
|
||||
__all__ = [
|
||||
'get_cursor',
|
||||
'insert_scan',
|
||||
'insert_scan_grade',
|
||||
'insert_test_result',
|
||||
'select_scan_recent_scan',
|
||||
'select_site_id',
|
||||
'select_test_results',
|
||||
]
|
|
@ -0,0 +1,149 @@
|
|||
from contextlib import contextmanager
|
||||
from json import dumps
|
||||
|
||||
from scanner import STATE_FINISHED, STATE_RUNNING, STATE_STARTED
|
||||
|
||||
import psycopg2
|
||||
import psycopg2.extras
|
||||
import psycopg2.pool
|
||||
|
||||
import scanner.analyzer
|
||||
|
||||
# Create a psycopg2 connection pool
|
||||
# TODO: pull credentials from environmental variable
|
||||
pool = psycopg2.pool.SimpleConnectionPool(1, 32, database='http_observatory')
|
||||
|
||||
|
||||
@contextmanager
|
||||
def get_cursor():
|
||||
conn = pool.getconn()
|
||||
|
||||
try:
|
||||
yield conn.cursor(cursor_factory=psycopg2.extras.DictCursor)
|
||||
conn.commit()
|
||||
finally:
|
||||
pool.putconn(conn)
|
||||
|
||||
|
||||
def insert_scan_grade(scan_id, grade):
|
||||
with get_cursor() as cur:
|
||||
cur.execute("""UPDATE scans
|
||||
SET (grade, grade_reasons) =
|
||||
(%s, %s)
|
||||
WHERE id = %s
|
||||
RETURNING *""",
|
||||
(grade['grade'], dumps(grade['grade_reasons']), scan_id))
|
||||
|
||||
return cur.fetchone()
|
||||
|
||||
|
||||
def insert_scan(site_id) -> psycopg2.extras.DictRow:
|
||||
with get_cursor() as cur:
|
||||
cur.execute("""INSERT INTO scans (site_id, state, start_time, tests_quantity)
|
||||
VALUES (%s, %s, NOW(), %s)
|
||||
RETURNING *""",
|
||||
(site_id, STATE_STARTED, len(scanner.analyzer.__all__)))
|
||||
|
||||
return cur.fetchone()
|
||||
|
||||
|
||||
def insert_test_result(site_id: int, scan_id: int, name: str, output: dict) -> psycopg2.extras.DictRow:
|
||||
expectation = output.pop('expectation')
|
||||
|
||||
with get_cursor() as cur:
|
||||
# First, let's get the scan from the scans table
|
||||
cur.execute("""SELECT tests_completed, tests_passed, tests_failed, tests_quantity, state FROM scans
|
||||
WHERE id=%s""", (scan_id,))
|
||||
|
||||
row = cur.fetchone()
|
||||
|
||||
# Increment the number of tests completed
|
||||
tests_completed = row['tests_completed'] + 1
|
||||
end_time = 'NULL'
|
||||
|
||||
# Set the proper state
|
||||
state = row['state']
|
||||
if state == STATE_STARTED:
|
||||
state = STATE_RUNNING
|
||||
elif tests_completed == row['tests_quantity']:
|
||||
state = STATE_FINISHED
|
||||
end_time = 'NOW()'
|
||||
|
||||
# Increment the tests passed/failed column
|
||||
tests_passed = row['tests_passed'] + 1 if output['pass'] == True else row['tests_passed']
|
||||
tests_failed = row['tests_failed'] + 1 if output['pass'] == False else row['tests_failed']
|
||||
|
||||
# Update the scans table
|
||||
cur.execute("""UPDATE scans
|
||||
SET (end_time, tests_completed, tests_failed, tests_passed, state) =
|
||||
({0}, %s, %s, %s, %s)
|
||||
WHERE id = %s""".format(end_time),
|
||||
(tests_completed, tests_failed, tests_passed, state, scan_id))
|
||||
|
||||
# Add the test result to the database
|
||||
cur.execute("""INSERT INTO tests (site_id, scan_id, name, expectation, output)
|
||||
VALUES (%s, %s, %s, %s, %s)
|
||||
RETURNING *""",
|
||||
(site_id, scan_id, name, expectation, dumps(output, indent=4, sort_keys=True)))
|
||||
|
||||
return cur.fetchone()
|
||||
|
||||
|
||||
# TODO: Only look for successful scans?
|
||||
def select_scan_recent_scan(site_id: int) -> psycopg2.extras.DictRow:
|
||||
with get_cursor() as cur:
|
||||
cur.execute("""SELECT * FROM scans
|
||||
WHERE start_time >= NOW() - INTERVAL '1 day'
|
||||
AND site_id = '%s'
|
||||
ORDER BY start_time DESC
|
||||
LIMIT 1""",
|
||||
(site_id,))
|
||||
|
||||
if cur.rowcount > 0:
|
||||
return cur.fetchone()
|
||||
|
||||
return {}
|
||||
|
||||
|
||||
def select_site_id(hostname: str) -> int:
|
||||
# See if the site exists already
|
||||
with get_cursor() as cur:
|
||||
cur.execute("""SELECT id FROM sites
|
||||
WHERE domain=(%s)
|
||||
ORDER BY creation_time DESC
|
||||
LIMIT 1""",
|
||||
(hostname,))
|
||||
|
||||
if cur.rowcount > 0:
|
||||
return cur.fetchone()['id']
|
||||
|
||||
# If not, let's create the site
|
||||
with get_cursor() as cur:
|
||||
cur.execute("""INSERT INTO sites (domain, creation_time)
|
||||
VALUES (%s, NOW())
|
||||
RETURNING id""", (hostname,))
|
||||
|
||||
return cur.fetchone()['id']
|
||||
|
||||
|
||||
def select_test_results(scan_id: int) -> dict:
|
||||
tests = {}
|
||||
|
||||
with get_cursor() as cur:
|
||||
cur.execute("SELECT * FROM tests WHERE scan_id = %s", (scan_id,))
|
||||
|
||||
# Grab every test and stuff it into the tests dictionary
|
||||
if cur.rowcount > 1:
|
||||
for test in cur:
|
||||
expectation = test['expectation']
|
||||
passed = test['output'].pop('pass')
|
||||
result = test['output'].pop('result')
|
||||
|
||||
tests[test['name']] = {
|
||||
'expectation': expectation,
|
||||
'output': test['output'],
|
||||
'passed': passed,
|
||||
'result': result,
|
||||
}
|
||||
|
||||
return tests
|
|
@ -0,0 +1,51 @@
|
|||
CREATE TABLE IF NOT EXISTS sites (
|
||||
id SERIAL PRIMARY KEY,
|
||||
domain VARCHAR(255) NOT NULL,
|
||||
creation_time TIMESTAMP NOT NULL,
|
||||
public_headers JSONB NULL,
|
||||
private_headers JSONB NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS expectations (
|
||||
id SERIAL PRIMARY KEY,
|
||||
site_id INTEGER REFERENCES sites (id),
|
||||
test_name VARCHAR NOT NULL,
|
||||
expectation VARCHAR NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS scans (
|
||||
id SERIAL PRIMARY KEY,
|
||||
site_id INTEGER REFERENCES sites (id),
|
||||
state VARCHAR NOT NULL,
|
||||
start_time TIMESTAMP NOT NULL,
|
||||
end_time TIMESTAMP NULL,
|
||||
tests_completed SMALLINT NOT NULL DEFAULT 0,
|
||||
tests_failed SMALLINT NOT NULL DEFAULT 0,
|
||||
tests_passed SMALLINT NOT NULL DEFAULT 0,
|
||||
tests_quantity SMALLINT NOT NULL,
|
||||
error VARCHAR NULL,
|
||||
grade VARCHAR(2) NULL,
|
||||
grade_reasons JSONB NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS tests (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
site_id INTEGER REFERENCES sites (id),
|
||||
scan_id INTEGER REFERENCES scans (id),
|
||||
name VARCHAR NOT NULL,
|
||||
expectation VARCHAR NOT NULL,
|
||||
output JSONB NOT NULL
|
||||
);
|
||||
|
||||
CREATE INDEX sites_domain_idx ON sites ((lower(domain)));
|
||||
CREATE INDEX tests_name_idx ON tests (name);
|
||||
|
||||
CREATE ROLE httpobsscanner;
|
||||
GRANT SELECT, INSERT ON sites, expectations, scans, tests TO httpobsscanner;
|
||||
GRANT UPDATE on sites, expectations, scans TO httpobsscanner;
|
||||
|
||||
CREATE ROLE httpobsapi;
|
||||
GRANT SELECT ON expectations, scans, tests to httpobsapi;
|
||||
GRANT SELECT (id, domain, public_headers) ON sites TO httpobsapi;
|
||||
GRANT INSERT, UPDATE ON sites, expectations to httpobsapi;
|
||||
GRANT INSERT, UPDATE (private_headers) ON sites to httpobsapi;
|
|
@ -0,0 +1,16 @@
|
|||
__all__ = [
|
||||
'STATE_ABORTED',
|
||||
'STATE_FAILED',
|
||||
'STATE_FINISHED',
|
||||
'STATE_RUNNING',
|
||||
'STATE_STARTED',
|
||||
'STATE_STOPPED',
|
||||
]
|
||||
|
||||
# The various statuses
|
||||
STATE_ABORTED = 'ABORTED'
|
||||
STATE_FAILED = 'FAILED'
|
||||
STATE_FINISHED = 'FINISHED'
|
||||
STATE_RUNNING = 'RUNNING'
|
||||
STATE_STARTED = 'STARTED'
|
||||
STATE_STOPPED = 'STOPPED'
|
|
@ -0,0 +1,19 @@
|
|||
from .content import contribute, subresource_integrity
|
||||
from .headers import content_security_policy, cookies, strict_transport_security,\
|
||||
x_content_type_options, x_xss_protection, x_frame_options
|
||||
from .misc import cross_origin_resource_sharing, redirection, tls_configuration
|
||||
|
||||
__all__ = [
|
||||
'content_security_policy',
|
||||
'cookies',
|
||||
'contribute',
|
||||
'cross_origin_resource_sharing',
|
||||
'redirection',
|
||||
'strict_transport_security',
|
||||
'subresource_integrity',
|
||||
'tls_configuration',
|
||||
'x_content_type_options',
|
||||
'x_frame_options',
|
||||
'x_xss_protection',
|
||||
]
|
||||
|
|
@ -0,0 +1,185 @@
|
|||
from bs4 import BeautifulSoup as bs
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import json
|
||||
import tld
|
||||
|
||||
MOZILLA_DOMAINS = ('mozilla', 'allizom', 'webmaker')
|
||||
|
||||
|
||||
def contribute(reqs: dict, expectation='contribute-json-with-required-keys') -> dict:
|
||||
"""
|
||||
:param reqs: dictionary containing all the request and response objects
|
||||
:param expectation: test expectation
|
||||
contribute-json-with-required-keys: contribute.json exists, with all the REQUIRED_KEYS [default]
|
||||
contribute-json-missing-required-keys: contribute.json exists, but missing some of the REQUIRED_KEYS
|
||||
contribute-json-only-required-on-mozilla-properties: contribute.json isn't required,
|
||||
since it's not a Mozilla domain
|
||||
contribute-json-not-implemented: contribute.json file missing
|
||||
:return: dictionary with:
|
||||
data: the parsed contribute.json file
|
||||
expectation: test expectation
|
||||
pass: whether the site's configuration met its expectation (null for non-Mozilla sites)
|
||||
result: short string describing the result of the test
|
||||
"""
|
||||
REQUIRED_KEYS = ('name', 'description', 'participate', 'bugs', 'urls')
|
||||
|
||||
output = {
|
||||
'data': None,
|
||||
'expectation': expectation,
|
||||
'pass': False,
|
||||
'result': None,
|
||||
}
|
||||
response = reqs['responses']['auto']
|
||||
|
||||
# If there's no contribute.json file
|
||||
if reqs['resources']['/contribute.json']:
|
||||
try:
|
||||
output['data'] = json.loads(reqs['resources']['/contribute.json'])
|
||||
|
||||
if all(key in output['data'] for key in REQUIRED_KEYS):
|
||||
output['result'] = 'contribute-json-with-required-keys'
|
||||
else:
|
||||
output['result'] = 'contribute-json-missing-required-keys'
|
||||
except (json.JSONDecodeError, TypeError):
|
||||
output['result'] = 'contribute-json-invalid-json'
|
||||
|
||||
elif urlparse(response.url).netloc.split('.')[-2] not in MOZILLA_DOMAINS:
|
||||
output['expectation'] = output['result'] = 'contribute-json-only-required-on-mozilla-properties'
|
||||
else:
|
||||
output['result'] = 'contribute-json-not-implemented'
|
||||
|
||||
# Check to see if the test passed or failed
|
||||
if expectation == output['result']:
|
||||
output['pass'] = True
|
||||
elif output['result'] == 'contribute-json-only-required-on-mozilla-properties':
|
||||
output['pass'] = None
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def subresource_integrity(reqs: dict, expectation='sri-implemented-and-external-scripts-loaded-securely') -> dict:
|
||||
"""
|
||||
:param reqs: dictionary containing all the request and response objects
|
||||
:param expectation: test expectation
|
||||
sri-implemented-and-external-scripts-loaded-securely: integrity attribute exists on all external scripts,
|
||||
and scripts loaded [default for HTML]
|
||||
sri-implemented-and-external-scripts-not-loaded-securely-on-all-external-scripts:
|
||||
sri-not-implemented-and-scripts-loaded-securely: SRI isn't needed, because the page isn't HTML
|
||||
sri-not-implemented-and-scripts-loaded-insecurely: SRI isn't implemented, and scripts are downloaded over HTTP
|
||||
sri-not-implemented-but-all-scripts-loaded-from-secure-origin: SRI isn't implemented,
|
||||
but all scripts come from secure origins
|
||||
sri-not-implemented-but-no-scripts-loaded: SRI isn't implemented, because the page doesn't load any scripts
|
||||
sri-not-implemented-response-not-html: SRI isn't needed, because the page isn't HTML [default for non-HTML]
|
||||
:return: dictionary with:
|
||||
data: all external scripts and their integrity / crossorigin attributes
|
||||
expectation: test expectation
|
||||
pass: whether the site's external scripts met expectations
|
||||
result: short string describing the result of the test
|
||||
"""
|
||||
|
||||
output = {
|
||||
'data': {},
|
||||
'expectation': expectation,
|
||||
'pass': False,
|
||||
'result': None,
|
||||
}
|
||||
response = reqs['responses']['auto']
|
||||
|
||||
# Return the new result if it's worse than the existing result, otherwise just the current result
|
||||
def only_if_worse(result: str) -> str:
|
||||
goodness = ['sri-implemented-and-external-scripts-loaded-securely',
|
||||
'sri-implemented-and-external-scripts-loaded-insecurely',
|
||||
'sri-not-implemented-and-scripts-loaded-securely',
|
||||
'sri-not-implemented-and-scripts-loaded-insecurely',
|
||||
'sri-not-implemented-response-not-html']
|
||||
|
||||
if not output['result']:
|
||||
return result
|
||||
elif goodness.index(result) > goodness.index(output['result']):
|
||||
return result
|
||||
else:
|
||||
return output['result']
|
||||
|
||||
# If the response to get / fails
|
||||
if response.status_code != 200:
|
||||
output['result'] = 'request-did-not-return-status-code-200'
|
||||
|
||||
# If the content isn't HTML, there's no scripts to load; this is okay
|
||||
elif response.headers.get('Content-Type', '').split(';')[0] != 'text/html':
|
||||
output['expectation'] = 'sri-not-implemented-response-not-html'
|
||||
output['result'] = 'sri-not-needed-response-not-html'
|
||||
|
||||
else:
|
||||
# Try to parse the HTML
|
||||
try:
|
||||
soup = bs(reqs['resources']['/'], 'html.parser')
|
||||
except:
|
||||
output['result'] = 'html-not-parsable'
|
||||
return output
|
||||
|
||||
# Track to see if any scripts were on foreign TLDs
|
||||
scripts_on_foreign_origin = False
|
||||
|
||||
# Get all the scripts
|
||||
scripts = soup.find_all('script')
|
||||
for script in scripts:
|
||||
if script.has_attr('src'):
|
||||
# Script tag parameters
|
||||
src = urlparse(script['src'])
|
||||
integrity = getattr(script, 'integrity')
|
||||
crossorigin = getattr(script, 'crossorigin')
|
||||
|
||||
# Check to see if they're on the same TLD
|
||||
sametld = True if tld.get_tld(response.url) == tld.get_tld(script['src'], fail_silently=True) else False
|
||||
|
||||
# Check to see if it's the same origin, same or a trusted Mozilla subdomain
|
||||
if (src.netloc == '' or
|
||||
sametld or
|
||||
src.netloc.split('.')[-2] in MOZILLA_DOMAINS):
|
||||
secureorigin = True
|
||||
else:
|
||||
secureorigin = False
|
||||
scripts_on_foreign_origin = True
|
||||
|
||||
# Add it to the scripts data result, if it's not a relative URI or on a Mozilla subdomain
|
||||
if not secureorigin:
|
||||
output['data'][script['src']] = {
|
||||
'crossorigin': crossorigin,
|
||||
'integrity': integrity
|
||||
}
|
||||
|
||||
# See if it's a secure scheme
|
||||
if src.scheme and src.scheme == 'https':
|
||||
securescheme = True
|
||||
else:
|
||||
securescheme = False
|
||||
|
||||
if integrity and not securescheme:
|
||||
output['result'] = only_if_worse('sri-implemented-and-external-scripts-loaded-insecurely')
|
||||
elif not integrity and securescheme:
|
||||
output['result'] = only_if_worse('sri-not-implemented-and-scripts-loaded-securely')
|
||||
elif not integrity and not securescheme:
|
||||
output['result'] = only_if_worse('sri-not-implemented-and-scripts-loaded-insecurely')
|
||||
|
||||
# If the page doesn't load any scripts
|
||||
if not scripts:
|
||||
output['result'] = 'sri-not-implemented-but-no-scripts-loaded'
|
||||
|
||||
# If all the scripts are loaded from a secure origin, not triggering a need for SRI
|
||||
elif scripts and not scripts_on_foreign_origin:
|
||||
output['result'] = 'sri-not-implemented-but-all-scripts-loaded-from-secure-origin'
|
||||
|
||||
# If the page loaded from a foreign origin, but everything included SRI
|
||||
elif scripts and scripts_on_foreign_origin and not output['result']:
|
||||
output['result'] = 'sri-implemented-and-external-scripts-loaded-securely'
|
||||
|
||||
# Check to see if the test passed or failed
|
||||
if expectation == output['result']:
|
||||
output['pass'] = True
|
||||
elif output['result'] in ('sri-not-implemented-response-not-html',
|
||||
'sri-not-implemented-but-all-scripts-loaded-from-secure-origin',
|
||||
'sri-not-implemented-but-no-scripts-loaded'):
|
||||
output['pass'] = True
|
||||
|
||||
return output
|
|
@ -0,0 +1,319 @@
|
|||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def content_security_policy(reqs: dict, expectation='csp-implemented-with-unsafe-allowed-in-style-src-only') -> dict:
|
||||
"""
|
||||
:param reqs: dictionary containing all the request and response objects
|
||||
:param expectation: test expectation
|
||||
csp-implemented-with-no-unsafe: CSP implemented with no unsafe inline keywords
|
||||
csp-implemented-with-unsafe-allowed-in-style-src-only: Allow the 'unsafe' keyword in style-src only [default]
|
||||
csp-implemented-with-unsafe: CSP implemented with using either unsafe-eval or unsafe-inline
|
||||
csp-implemented-with-insecure-scheme: CSP implemented with having sources over http:
|
||||
csp-not-implemented: CSP not implemented
|
||||
:return: dictionary with:
|
||||
data: the raw CSP header
|
||||
expectation: test expectation
|
||||
pass: whether the site's configuration met its expectation
|
||||
result: short string describing the result of the test
|
||||
"""
|
||||
|
||||
output = {
|
||||
'data': None,
|
||||
'expectation': expectation,
|
||||
'pass': False,
|
||||
'result': None,
|
||||
}
|
||||
response = reqs['responses']['auto']
|
||||
|
||||
# Check to see the state of the CSP header
|
||||
if 'Content-Security-Policy' in response.headers:
|
||||
# Store the CSP policy, if it's implemented
|
||||
output['data'] = response.headers['Content-Security-Policy'].strip()
|
||||
|
||||
# Decompose the CSP; could probably do this in one step, but it's complicated enough
|
||||
try:
|
||||
csp = [ directive.strip().split(' ', 1) for directive in output['data'].split(';')]
|
||||
csp = { directive[0].lower(): (directive[1] if len(directive) > 1 else '') for directive in csp }
|
||||
except:
|
||||
output['result'] = 'csp-header-invalid-header'
|
||||
return output
|
||||
|
||||
for directive, value in csp.items():
|
||||
if 'unsafe-' in value and directive == 'style-src' and not output['result']:
|
||||
output['result'] = 'csp-implemented-with-unsafe-allowed-in-style-src-only'
|
||||
elif 'unsafe-' in value:
|
||||
output['result'] = 'csp-implemented-with-unsafe'
|
||||
elif urlparse(response.url).scheme == 'https' and 'http:' in value:
|
||||
output['result'] = 'csp-implemented-with-insecure-scheme'
|
||||
|
||||
if not output['result']:
|
||||
output['result'] = 'csp-implemented-with-no-unsafe'
|
||||
|
||||
else:
|
||||
output['result'] = 'csp-not-implemented'
|
||||
|
||||
# Check to see if the test passed or failed
|
||||
if expectation == output['result']:
|
||||
output['pass'] = True
|
||||
elif expectation == ('csp-implemented-with-unsafe-allowed-in-style-src-only' and
|
||||
output['result'] == 'csp-implemented-with-no-unsafe'):
|
||||
output['pass'] = True
|
||||
|
||||
return output
|
||||
|
||||
def cookies(reqs: dict, expectation='secure-cookies-with-httponly-sessions') -> dict:
|
||||
"""
|
||||
:param reqs: dictionary containing all the request and response objects
|
||||
:param expectation: test expectation
|
||||
secure-cookies-with-httponly-sessions: All cookies have secure flag set, all session cookies are HttpOnly
|
||||
insecure-cookie-set: Allowed to set cookies without the secure flag
|
||||
httponly-flag-not-set-on-session-cookies: Allowed to have session cookies without the HttpOnly flag
|
||||
:return: dictionary with:
|
||||
data: the cookie jar
|
||||
expectation: test expectation
|
||||
pass: whether the site's configuration met its expectation
|
||||
result: short string describing the result of the test
|
||||
"""
|
||||
|
||||
output = {
|
||||
'data': None,
|
||||
'expectation': expectation,
|
||||
'pass': False,
|
||||
'result': None,
|
||||
}
|
||||
session = reqs['session'] # all requests and their associated cookies
|
||||
|
||||
# If there are no cookies
|
||||
if not session.cookies:
|
||||
output['result'] = 'no-cookies-found'
|
||||
|
||||
else:
|
||||
jar = {}
|
||||
|
||||
for cookie in session.cookies:
|
||||
# The httponly functionality is a bit broken
|
||||
if not hasattr(cookie, 'httponly'):
|
||||
if 'httponly' in [key.lower() for key in cookie._rest]:
|
||||
cookie.httponly = True
|
||||
else:
|
||||
cookie.httponly = False
|
||||
|
||||
# Add it to the jar
|
||||
jar[cookie.name] = {i: getattr(cookie, i, None) for i in ['domain', 'expires', 'httponly', 'max-age', 'path', 'port', 'secure']}
|
||||
|
||||
# All cookies must be set with the secure flag, but httponly not being set overrides it
|
||||
if not cookie.secure and not output['result']:
|
||||
output['result'] = 'insecure-cookie-set'
|
||||
|
||||
# Login and session cookies should be set with HttpOnly
|
||||
if any(i in cookie.name.lower() for i in ['login', 'sess']) and cookie.httponly == False:
|
||||
output['result'] = 'httponly-flag-not-set-on-session-cookies'
|
||||
|
||||
# Save the cookie jar
|
||||
output['data'] = jar
|
||||
|
||||
# Got through the cookie check properly
|
||||
if not output['result']:
|
||||
output['result'] = 'secure-cookies-with-httponly-sessions'
|
||||
|
||||
|
||||
# Check to see if the test passed or failed
|
||||
if not session.cookies:
|
||||
output['pass'] = True
|
||||
elif expectation == output['result']:
|
||||
output['pass'] = True
|
||||
|
||||
return output
|
||||
|
||||
def strict_transport_security(reqs: dict, expectation='hsts-implemented-max-age-at-least-six-months') -> dict:
|
||||
"""
|
||||
:param reqs: dictionary containing all the request and response objects
|
||||
:param expectation: test expectation
|
||||
hsts-implemented-max-age-least-six-months: HSTS implemented with a max age of at least six months (15768000)
|
||||
hsts-implemented-max-age-less-than-six-months: CSP implemented with using either unsafe-eval or unsafe-inline
|
||||
hsts-not-implemented: CSP not implemented
|
||||
:return: dictionary with:
|
||||
data: the raw HSTS header
|
||||
expectation: test expectation
|
||||
includesubdomains: whether the includeSubDomains directive is set
|
||||
pass: whether the site's configuration met its expectation
|
||||
preload: whether the preload flag is set
|
||||
result: short string describing the result of the test
|
||||
"""
|
||||
SIX_MONTHS = 15768000
|
||||
|
||||
output = {
|
||||
'data': None,
|
||||
'expectation': expectation,
|
||||
'includesubdomains': None,
|
||||
'max-age': None,
|
||||
'pass': False,
|
||||
'preload': None,
|
||||
'result': None,
|
||||
}
|
||||
response = reqs['responses']['https']
|
||||
|
||||
if response == None:
|
||||
return output
|
||||
|
||||
if 'Strict-Transport-Security' in response.headers:
|
||||
output['data'] = response.headers['Strict-Transport-Security']
|
||||
|
||||
try:
|
||||
sts = [i.lower().strip() for i in output['data'].split(';')]
|
||||
|
||||
for parameter in sts:
|
||||
if parameter.startswith('max-age='):
|
||||
output['max-age'] = int(parameter[8:])
|
||||
elif parameter == 'includesubdomains':
|
||||
output['includesubdomains'] = True
|
||||
elif parameter == 'preload':
|
||||
output['preload'] = True
|
||||
|
||||
if output['max-age']:
|
||||
if output['max-age'] < SIX_MONTHS: # must be at least six months
|
||||
output['result'] = 'hsts-implemented-max-age-less-than-six-months'
|
||||
else:
|
||||
output['result'] = 'hsts-implemented-max-age-at-least-six-months'
|
||||
else:
|
||||
output['result'] = 'hsts-invalid-header'
|
||||
|
||||
# If they're not included, then they're considered to be unset
|
||||
if not output['includesubdomains']:
|
||||
output['includesubdomains'] = False
|
||||
if not output['preload']:
|
||||
output['preload'] = False
|
||||
|
||||
except:
|
||||
output['result'] = 'hsts-invalid-header'
|
||||
|
||||
# If HSTS isn't set in the headers
|
||||
else:
|
||||
output['result'] = 'hsts-not-implemented'
|
||||
|
||||
# Check to see if the test passed or failed
|
||||
if expectation == output['result']:
|
||||
output['pass'] = True
|
||||
|
||||
return output
|
||||
|
||||
def x_content_type_options(reqs: dict, expectation='x-content-type-options-nosniff') -> dict:
|
||||
"""
|
||||
:param reqs: dictionary containing all the request and response objects
|
||||
:param expectation: test expectation
|
||||
x-content-type-options-nosniff: X-Content-Type-Options set to "nosniff" [default]
|
||||
x-content-type-options-not-implemented: X-Content-Type-Options header missing
|
||||
:return: dictionary with:
|
||||
data: the raw X-Content-Type-Options header
|
||||
expectation: test expectation
|
||||
pass: whether the site's configuration met its expectation
|
||||
result: short string describing the result of the test
|
||||
"""
|
||||
|
||||
output = {
|
||||
'data': None,
|
||||
'expectation': expectation,
|
||||
'pass': False,
|
||||
'result': None,
|
||||
}
|
||||
response = reqs['responses']['auto']
|
||||
|
||||
if 'X-Content-Type-Options' in response.headers:
|
||||
output['data'] = response.headers['X-Content-Type-Options']
|
||||
|
||||
if output['data'].lower() == 'nosniff':
|
||||
output['result'] = 'x-content-type-options-nosniff'
|
||||
else:
|
||||
output['result'] = 'x-content-type-options-invalid-header'
|
||||
else:
|
||||
output['result'] = 'x-content-type-options-not-implemented'
|
||||
|
||||
# Check to see if the test passed or failed
|
||||
if expectation == output['result']:
|
||||
output['pass'] = True
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def x_frame_options(reqs: dict, expectation='x-frame-options-sameorigin-or-deny') -> dict:
|
||||
"""
|
||||
:param reqs: dictionary containing all the request and response objects
|
||||
:param expectation: test expectation
|
||||
x-frame-options-sameorigin-or-deny: X-Frame-Options set to "sameorigin" or "deny" [default]
|
||||
x-frame-options-allow-from-origin: X-Frame-Options set to ALLOW-FROM uri
|
||||
x-frame-options-not-implemented: X-Frame-Options header missing
|
||||
:return: dictionary with:
|
||||
data: the raw X-Content-Type-Options header
|
||||
expectation: test expectation
|
||||
pass: whether the site's configuration met its expectation
|
||||
result: short string describing the result of the test
|
||||
"""
|
||||
|
||||
output = {
|
||||
'data': None,
|
||||
'expectation': expectation,
|
||||
'pass': False,
|
||||
'result': None,
|
||||
}
|
||||
response = reqs['responses']['auto']
|
||||
|
||||
if 'X-Frame-Options' in response.headers:
|
||||
output['data'] = response.headers['X-Frame-Options']
|
||||
|
||||
if output['data'].lower() in ('deny', 'sameorigin'):
|
||||
output['result'] = 'x-frame-options-sameorigin-or-deny'
|
||||
elif 'allow-from ' in output['data'].lower():
|
||||
output['result'] = 'x-frame-options-allow-from-origin'
|
||||
else:
|
||||
output['result'] = 'x-frame-options-invalid-header'
|
||||
else:
|
||||
output['result'] = 'x-frame-options-not-implemented'
|
||||
|
||||
# Check to see if the test passed or failed
|
||||
if expectation == output['result']:
|
||||
output['pass'] = True
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def x_xss_protection(reqs: dict, expectation='x-xss-protection-1-mode-block') -> dict:
|
||||
"""
|
||||
:param reqs: dictionary containing all the request and response objects
|
||||
:param expectation: test expectation
|
||||
x-xss-protection-1-mode-block: X-XSS-Protection set to "1; block" [default]
|
||||
x-xss-protection-0: X-XSS-Protection set to "0" (disabled)
|
||||
x-xss-protection-not-implemented: X-XSS-Protection header missing
|
||||
:return: dictionary with:
|
||||
data: the raw X-XSS-Protection header
|
||||
expectation: test expectation
|
||||
pass: whether the site's configuration met its expectation
|
||||
result: short string describing the result of the test
|
||||
"""
|
||||
|
||||
output = {
|
||||
'data': None,
|
||||
'expectation': expectation,
|
||||
'pass': False,
|
||||
'result': None,
|
||||
}
|
||||
response = reqs['responses']['auto']
|
||||
|
||||
if 'X-XSS-Protection' in response.headers:
|
||||
output['data'] = response.headers['X-XSS-Protection']
|
||||
|
||||
if output['data'].lower().replace(' ', '').strip() == '1;mode=block':
|
||||
output['result'] = 'x-xss-protection-1-mode-block'
|
||||
elif output['data'].strip() == '0':
|
||||
output['result'] = 'x-xss-protection-0'
|
||||
else:
|
||||
output['result'] = 'x-xss-protection-invalid-header'
|
||||
else:
|
||||
output['result'] = 'x-xss-protection-not-implemented'
|
||||
|
||||
# Check to see if the test passed or failed
|
||||
if expectation == output['result']:
|
||||
output['pass'] = True
|
||||
|
||||
return output
|
|
@ -0,0 +1,175 @@
|
|||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def cross_origin_resource_sharing(reqs: dict, expectation='cross-origin-resource-sharing-not-implemented') -> dict:
|
||||
"""
|
||||
:param reqs: dictionary containing all the request and response objects
|
||||
:param expectation: test expectation
|
||||
cross-origin-resource-sharing-not-implemented: ACAO and the XML files don't exist [default]
|
||||
cross-origin-research-sharing-implemented: One of them does
|
||||
:return: dictionary with:
|
||||
data: the ACAO header, clientaccesspolicy.xml file, and crossorigin.xml file
|
||||
expectation: test expectation
|
||||
pass: whether the site's configuration met its expectation
|
||||
result: short string describing the result of the test
|
||||
"""
|
||||
|
||||
output = {
|
||||
'data': {
|
||||
'acao': None,
|
||||
'clientaccesspolicy': None,
|
||||
'crossorigin': None
|
||||
},
|
||||
'expectation': expectation,
|
||||
'pass': False,
|
||||
'result': None,
|
||||
}
|
||||
|
||||
acao = reqs['responses']['auto']
|
||||
|
||||
if 'Access-Control-Allow-Origin' in acao.headers:
|
||||
output['data']['acao'] = acao.headers['Access-Control-Allow-Origin']
|
||||
|
||||
if '*' in output['acao']:
|
||||
output['result'] = 'cross-origin-resource-sharing-implemented'
|
||||
|
||||
# TODO: check to see if it's a limited clientaccesspolicy.xml file
|
||||
if reqs['resources']['/clientaccesspolicy.xml'] == 200:
|
||||
output['result'] = 'cross-origin-resource-sharing-implemented'
|
||||
output['data']['clientaccesspolicy'] = reqs['resources']['/clientaccesspolicy.xml']
|
||||
|
||||
# TODO: check to see if it's a limited crossorigin.xml file
|
||||
if reqs['resources']['/crossorigin.xml']:
|
||||
output['result'] = 'cross-origin-resource-sharing-implemented'
|
||||
output['data']['crossorigin'] = reqs['resources']['/crossorigin.xml']
|
||||
|
||||
if not output['data']['acao'] and not output['data']['clientaccesspolicy'] and not output['data']['crossorigin']:
|
||||
output['result'] = 'cross-origin-resource-sharing-not-implemented'
|
||||
|
||||
# Check to see if the test passed or failed
|
||||
if expectation == output['result']:
|
||||
output['pass'] = True
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def redirection(reqs: dict, expectation='http-to-https-with-initial-redirect-to-same-host') -> dict:
|
||||
"""
|
||||
:param reqs: dictionary containing all the request and response objects
|
||||
:param expectation: test expectation
|
||||
http-to-https-with-initial-redirect-to-same-host: Redirects from http to https,
|
||||
first redirection stays on host [default]
|
||||
no-https-redirect: Site allowed to be served over HTTP
|
||||
not-listening-for-http: Site doesn't listen for HTTP requests at all
|
||||
off-host-redirection-from-http: Initial HTTP allowed to go from one host to another, still redirects to HTTPS
|
||||
:return: dictionary with:
|
||||
destination: final location of where GET / over HTTP ends
|
||||
expectation: test expectation
|
||||
pass: whether the site's configuration met its expectation
|
||||
path: the URLs that the requests followed to get to destination
|
||||
redirects: whether the site does any redirections at all
|
||||
result: short string describing the result of the test
|
||||
status-code: HTTP status code for the final redirection (typically 301 or 302)
|
||||
"""
|
||||
|
||||
response = reqs['responses']['http']
|
||||
output = {
|
||||
'destination': response.url,
|
||||
'expectation': expectation,
|
||||
'pass': False,
|
||||
'redirects': True,
|
||||
'result': None,
|
||||
'route': [],
|
||||
'status_code': response.status_code,
|
||||
}
|
||||
|
||||
if not response:
|
||||
output['result'] = 'not-listening-for-http'
|
||||
|
||||
elif response.history:
|
||||
for entry in response.history:
|
||||
src = urlparse(entry.url)
|
||||
dst = urlparse(entry.headers['Location'])
|
||||
|
||||
# Add the result to the path that requests followed
|
||||
output['route'].append(entry.url)
|
||||
|
||||
# http should never redirect to another http location -- should always go to https first
|
||||
if dst.scheme != 'https':
|
||||
output['result'] = 'no-https-redirect'
|
||||
output['status_code'] = entry.status_code
|
||||
|
||||
# If it's an http -> https redirection, make sure it redirects to the same host. If that's not done, then
|
||||
# HSTS cannot be properly set on the original host
|
||||
elif src.scheme == 'http' and dst.scheme == 'https' and src.netloc != dst.netloc:
|
||||
output['result'] = 'off-host-redirection-from-http'
|
||||
output['status_code'] = entry.status_code
|
||||
|
||||
else:
|
||||
# Store the final status code for the redirection
|
||||
output['status_code'] = response.history[-1].status_code
|
||||
|
||||
if not output['result']:
|
||||
output['result'] = 'http-to-https-with-initial-redirect-to-same-host'
|
||||
|
||||
# No redirections took place
|
||||
else:
|
||||
output['result'] = 'no-https-redirect'
|
||||
output['redirects'] = False
|
||||
|
||||
# Append the final location to the path
|
||||
output['route'].append(response.url)
|
||||
|
||||
# Check to see if the test passed or failed
|
||||
if expectation == output['result'] or output['result'] == 'not-listening-for-http':
|
||||
output['pass'] = True
|
||||
|
||||
return output
|
||||
|
||||
|
||||
def tls_configuration(reqs: dict, expectation='intermediate-or-modern-tls-configuration') -> dict:
|
||||
"""
|
||||
:param reqs: dictionary containing all the request and response objects
|
||||
:param expectation: test expectation
|
||||
intermediate-or-modern-tls-configuration: intermediate or modern TLS configuration [default]
|
||||
modern-tls-configuration: modern TLS configuration only
|
||||
intermediate-tls-configuration: intermediate TLS configuration only
|
||||
old-tls-configuration: old TLS configuration only
|
||||
bad-tls-configuration: known bad TLS configuration
|
||||
:return: dictionary with:
|
||||
expectation: test expectation
|
||||
pass: whether the site's configuration met its expectation
|
||||
result: short string describing the result of the test
|
||||
tls_observatory_scan_id: TLS observatory scan id, for result lookups
|
||||
"""
|
||||
|
||||
EVALUATION_HEADER = '* Mozilla evaluation: '
|
||||
SCANNING_HEADER = 'Scanning '
|
||||
|
||||
output = {
|
||||
'expectation': expectation,
|
||||
'pass': False,
|
||||
'result': None,
|
||||
'tls_observatory_scan_id': None,
|
||||
}
|
||||
tlsobs = reqs['responses']['tlsobs']
|
||||
|
||||
if tlsobs is None:
|
||||
output['result'] = 'tls-observatory-scan-failed'
|
||||
|
||||
else:
|
||||
for line in tlsobs.split('\n'):
|
||||
if line.startswith(SCANNING_HEADER):
|
||||
output['tls_observatory_scan_id'] = int(line.split(' ')[-1][:-1])
|
||||
|
||||
elif line.startswith(EVALUATION_HEADER):
|
||||
level = line.split(EVALUATION_HEADER)[-1]
|
||||
output['result'] = level + '-tls-configuration' # intermediate-tls-configuration
|
||||
|
||||
# Quick shortcut to see if the test passed or failed
|
||||
if level in expectation:
|
||||
output['pass'] = True
|
||||
|
||||
return output
|
|
@ -0,0 +1,3 @@
|
|||
from .grade import grade
|
||||
|
||||
__all__ = ['grade']
|
|
@ -0,0 +1,53 @@
|
|||
import database
|
||||
|
||||
grade_order = ['A+', 'A', 'A-', 'B+', 'B', 'B-', 'C+', 'C', 'C-', 'F']
|
||||
output = {
|
||||
'grade': 'A+',
|
||||
'grade_reasons': {}
|
||||
}
|
||||
|
||||
|
||||
def __set_grade(grade: str, test: str, reason=None) -> str:
|
||||
"""
|
||||
Updates the grade, but only if it's worse than the current grade
|
||||
|
||||
:param grade: the new maximum grade
|
||||
:return: the current grade after possible updating
|
||||
"""
|
||||
if grade_order.index(grade) > grade_order.index(output['grade']):
|
||||
output['grade'] = grade
|
||||
|
||||
if reason:
|
||||
reason += ' Grade capped at {grade}.'.format(grade=grade)
|
||||
output['grade_reasons'][test] = reason
|
||||
|
||||
return output['grade']
|
||||
|
||||
|
||||
def grade(scan_id: int) -> dict:
|
||||
# Get the test results from the database
|
||||
test_results = database.select_test_results(scan_id)
|
||||
|
||||
# TODO: this needs a ton of fleshing out
|
||||
|
||||
# Grade the CSP stuff
|
||||
test = 'content-security-policy'
|
||||
result = test_results[test]['result']
|
||||
|
||||
if result == 'csp-implemented-with-no-unsafe':
|
||||
pass
|
||||
elif result == 'csp-implemented-with-unsafe-allowed-in-style-src-only':
|
||||
__set_grade('A', test, 'CSP implemented with unsafe-inline in style-src.')
|
||||
else:
|
||||
__set_grade('B', test, 'CSP not implemented or implemented improperly.')
|
||||
|
||||
# Grade the TLS stuff
|
||||
test = 'tls-configuration'
|
||||
result = test_results[test]['result']
|
||||
|
||||
if result == 'old-tls-configuration':
|
||||
__set_grade('C', test, 'TLS configuration uses the Mozilla old configuration.')
|
||||
elif result == 'bad-tls-configuration':
|
||||
__set_grade('F', test, 'TLS configuration doesn\'t match any known good Mozilla configurations.')
|
||||
|
||||
return database.insert_scan_grade(scan_id, output)
|
|
@ -0,0 +1 @@
|
|||
from .retriever import *
|
|
@ -0,0 +1,94 @@
|
|||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
import subprocess
|
||||
|
||||
|
||||
# Create a session, returning the session and the HTTP response in a dictionary
|
||||
def __create_session(url: str) -> dict:
|
||||
s = requests.Session()
|
||||
r = s.get(url)
|
||||
|
||||
# Store the domain and scheme in the session
|
||||
s.url = urlparse(r.url)
|
||||
|
||||
return {'session': s, 'response': r}
|
||||
|
||||
|
||||
def __get(session, relative_path='/'):
|
||||
try:
|
||||
return session.get(session.url.scheme + '://' + session.url.netloc + relative_path)
|
||||
except:
|
||||
return None
|
||||
|
||||
|
||||
def __get_page_text(response: requests.Response) -> str:
|
||||
if response.status_code == 200:
|
||||
return response.text
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def __get_tlsobs_result(hostname: str):
|
||||
return subprocess.check_output(['tlsobs', hostname]).decode('utf-8')
|
||||
|
||||
|
||||
def retrieve_all(hostname: str, headers=None) -> dict:
|
||||
retrievals = {
|
||||
'hostname': hostname,
|
||||
'resources': {
|
||||
},
|
||||
'responses': {
|
||||
'auto': None, # whichever of 'http' or 'https' actually works, with 'https' as higher priority
|
||||
'http': None,
|
||||
'https': None,
|
||||
'tlsobs': None
|
||||
},
|
||||
'session': None,
|
||||
}
|
||||
|
||||
# The list of resources to get
|
||||
resources = (
|
||||
'/clientaccesspolicy.xml',
|
||||
'/contribute.json',
|
||||
'/crossorigin.xml',
|
||||
'/robots.txt'
|
||||
)
|
||||
|
||||
# HTTP headers stuff (avoiding mutable arguments)
|
||||
# TODO: pull private / public headers from database
|
||||
if not headers:
|
||||
headers = {}
|
||||
|
||||
# Create some reusable sessions, one for HTTP and one for HTTPS
|
||||
http_session = __create_session('http://' + hostname + '/')
|
||||
https_session = __create_session('https://' + hostname + '/')
|
||||
|
||||
# If neither one works, then the site just can't be loaded
|
||||
if not http_session['session'] and not https_session['session']:
|
||||
return retrievals
|
||||
|
||||
else:
|
||||
# Store the HTTP only and HTTPS only responses (some things can only be retrieved over one or the other)
|
||||
retrievals['responses']['http'] = http_session['response']
|
||||
retrievals['responses']['https'] = https_session['response']
|
||||
|
||||
if https_session['session']:
|
||||
retrievals['responses']['auto'] = https_session['response']
|
||||
retrievals['session'] = https_session['session']
|
||||
else:
|
||||
retrievals['responses']['auto'] = http_session['response']
|
||||
retrievals['session'] = http_session['session']
|
||||
|
||||
# Store the contents of the base page
|
||||
retrievals['resources']['/'] = __get_page_text(retrievals['responses']['auto'])
|
||||
|
||||
# Store all the files we retrieve
|
||||
for resource in resources:
|
||||
resp = __get(retrievals['session'], resource)
|
||||
retrievals['resources'][resource] = __get_page_text(resp)
|
||||
|
||||
# Store the TLS Observatory response
|
||||
retrievals['responses']['tlsobs'] = __get_tlsobs_result(hostname)
|
||||
|
||||
return retrievals
|
|
@ -0,0 +1,38 @@
|
|||
from database import get_cursor, insert_test_result
|
||||
from scanner import STATE_FAILED
|
||||
from scanner.retriever import retrieve_all
|
||||
|
||||
from celery import Celery
|
||||
from os import environ
|
||||
|
||||
import scanner.analyzer
|
||||
import sys
|
||||
|
||||
app = Celery('http_observatory_scanner', broker=environ['BROKER_URL'])
|
||||
|
||||
|
||||
# TODO: make this into a Celery task
|
||||
def scan(hostname: str, site_id: int, scan_id: int):
|
||||
# Attempt to retrieve all the resources
|
||||
try:
|
||||
reqs = retrieve_all(hostname)
|
||||
except:
|
||||
# TODO: have more specific error messages
|
||||
e = sys.exc_info()[1] # get the error message
|
||||
|
||||
# If we are unsuccessful in close out the scan in the database if it failed
|
||||
with get_cursor() as cur:
|
||||
cur.execute("""UPDATE scans
|
||||
SET (status, end_time, error) = (%s, NOW(), %s)
|
||||
WHERE id = %s
|
||||
RETURNING *""",
|
||||
(STATE_FAILED, repr(e), scan_id))
|
||||
return
|
||||
|
||||
# Get all the tests
|
||||
tests = [f for _, f in scanner.analyzer.__dict__.items() if callable(f)]
|
||||
|
||||
for test in tests:
|
||||
# TODO: Get overridden expectation
|
||||
test_name = test.__name__.replace('_', '-')
|
||||
insert_test_result(site_id, scan_id, test_name, test(reqs))
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,6 @@
|
|||
from .api import *
|
||||
|
||||
__all__ = [
|
||||
'api_get_test_results',
|
||||
'api_post_scan_hostname',
|
||||
]
|
|
@ -0,0 +1,55 @@
|
|||
from scanner import STATE_FINISHED
|
||||
from scanner.grader import grade
|
||||
from scanner.tasks import scan
|
||||
|
||||
from flask import Blueprint, abort, jsonify
|
||||
|
||||
import database
|
||||
|
||||
api = Blueprint('api', __name__)
|
||||
|
||||
# TODO Implement GET, which just returns scan status?
|
||||
# @api.route('/api/v1/scan/<hostname>', methods=['GET'])
|
||||
# def get_scan_hostname(hostname):
|
||||
# abort(403)
|
||||
|
||||
@api.route('/api/v1/scan/<hostname>', methods=['GET', 'POST'])
|
||||
def api_post_scan_hostname(hostname: str):
|
||||
hostname = hostname.lower()
|
||||
|
||||
# Get the site's id number
|
||||
site_id = database.select_site_id(hostname)
|
||||
|
||||
# Next, let's see if there's a recent scan
|
||||
recent_scan_row = database.select_scan_recent_scan(site_id)
|
||||
|
||||
# If there was a recent scan, just return it
|
||||
if recent_scan_row:
|
||||
if recent_scan_row['state'] == STATE_FINISHED and recent_scan_row['grade'] == None:
|
||||
recent_scan_row = grade(recent_scan_row['id'])
|
||||
|
||||
# TODO: clean this up
|
||||
return jsonify(recent_scan_row)
|
||||
|
||||
# Otherwise, let's start up a scan
|
||||
else:
|
||||
row = database.insert_scan(site_id)
|
||||
scan_id = row['id']
|
||||
|
||||
# Begin the dispatch process
|
||||
scan(hostname, site_id, scan_id)
|
||||
|
||||
# And return the scan data
|
||||
# TODO: clean this up
|
||||
return jsonify(row)
|
||||
|
||||
|
||||
@api.route('/api/v1/results/<scan_id>', methods=['GET'])
|
||||
def api_get_test_results(scan_id: int):
|
||||
try:
|
||||
scan_id = int(scan_id)
|
||||
except ValueError:
|
||||
abort(403)
|
||||
|
||||
# Get all the test results for the given scan id and return it
|
||||
return jsonify(database.select_test_results(scan_id))
|
|
@ -0,0 +1,13 @@
|
|||
from flask import Flask
|
||||
from website.backend import api
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
|
||||
@app.route('/')
|
||||
def main() -> str:
|
||||
return 'Welcome to the HTTP Observatory backend service!'
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.register_blueprint(api)
|
||||
app.run(debug=True)
|
Загрузка…
Ссылка в новой задаче