Bugfix in test_crawl.py unit test.

Some sites respond with a 301 redirect to 'http://' which is an invalid
url.
This commit is contained in:
englehardt 2016-01-28 18:50:50 -05:00
Родитель d070cb3305
Коммит 431acd0f00
1 изменённых файлов: 6 добавлений и 3 удалений

Просмотреть файл

@ -116,10 +116,13 @@ class TestCrawl():
ccur.execute("SELECT COUNT(*) FROM http_responses WHERE top_url = ?",('http://'+url,))
if ccur.fetchone()[0] > 1:
continue
ccur.execute("SELECT response_status FROM http_responses WHERE top_url = ?",('http://'+url,))
if ccur.fetchone()[0] == 204:
ccur.execute("SELECT response_status, location FROM http_responses WHERE top_url = ?",('http://'+url,))
response_status, location = ccur.fetchone()
if response_status == 204:
continue
unexpected_missing_url.add(url)
if location == 'http://':
continue
unexpected_missing_urls.add(url)
crawl_con.close()
assert len(unexpected_missing_urls) == 0