fix ignore file to include more
This commit is contained in:
Родитель
bb789c4b8b
Коммит
319771f10b
|
@ -2,59 +2,6 @@
|
||||||
__pycache__/
|
__pycache__/
|
||||||
*.py[cod]
|
*.py[cod]
|
||||||
|
|
||||||
# C extensions
|
|
||||||
*.so
|
|
||||||
|
|
||||||
# Distribution / packaging
|
|
||||||
.Python
|
|
||||||
env/
|
|
||||||
build/
|
|
||||||
develop-eggs/
|
|
||||||
dist/
|
|
||||||
downloads/
|
|
||||||
eggs/
|
|
||||||
.eggs/
|
|
||||||
lib/
|
|
||||||
lib64/
|
|
||||||
parts/
|
|
||||||
sdist/
|
|
||||||
var/
|
|
||||||
*.egg-info/
|
|
||||||
.installed.cfg
|
|
||||||
*.egg
|
|
||||||
|
|
||||||
# PyInstaller
|
|
||||||
# Usually these files are written by a python script from a template
|
|
||||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
|
||||||
*.manifest
|
|
||||||
*.spec
|
|
||||||
|
|
||||||
# Installer logs
|
|
||||||
pip-log.txt
|
|
||||||
pip-delete-this-directory.txt
|
|
||||||
|
|
||||||
# Unit test / coverage reports
|
|
||||||
htmlcov/
|
|
||||||
.tox/
|
|
||||||
.coverage
|
|
||||||
.coverage.*
|
|
||||||
.cache
|
|
||||||
nosetests.xml
|
|
||||||
coverage.xml
|
|
||||||
*,cover
|
|
||||||
|
|
||||||
# Translations
|
|
||||||
*.mo
|
|
||||||
*.pot
|
|
||||||
|
|
||||||
# Django stuff:
|
|
||||||
*.log
|
|
||||||
|
|
||||||
# Sphinx documentation
|
|
||||||
docs/_build/
|
|
||||||
|
|
||||||
# PyBuilder
|
|
||||||
target/
|
|
||||||
/.idea
|
/.idea
|
||||||
/MoDataSubmission.iml
|
/MoDataSubmission.iml
|
||||||
/pyLibrary/.svn
|
/pyLibrary/.svn
|
||||||
|
|
|
@ -0,0 +1,64 @@
|
||||||
|
|
||||||
|
Environment
|
||||||
|
===========
|
||||||
|
|
||||||
|
This directory is for connecting to other systems. Generally, these
|
||||||
|
classes are facades that assume content is UTF-8 encoded JSON.
|
||||||
|
|
||||||
|
|
||||||
|
files
|
||||||
|
-----
|
||||||
|
|
||||||
|
The `File` class makes the default assumption all files have cr-delimited
|
||||||
|
unicode content that is UTF-8 encoded. This is great for json files.
|
||||||
|
It also provides better OO over some common file manipulations.
|
||||||
|
|
||||||
|
|
||||||
|
emailer
|
||||||
|
-------
|
||||||
|
|
||||||
|
A simple emailer, the primary purpose is to accept a [Dict](../dot/README.md)
|
||||||
|
of settings.
|
||||||
|
|
||||||
|
|
||||||
|
pulse
|
||||||
|
-----
|
||||||
|
|
||||||
|
For connecting clients to [Mozilla's Pulse](https://pulse.mozilla.org/).
|
||||||
|
|
||||||
|
|
||||||
|
elasticsearch
|
||||||
|
-------------
|
||||||
|
|
||||||
|
This module handles the lifecycle of an Elasticsearch index in the context of
|
||||||
|
ETL. You only need this module if you are creating and retiring indexes. You
|
||||||
|
do not need this module for simply searching; for that I suggest using the
|
||||||
|
rest API directly.
|
||||||
|
|
||||||
|
###Settings###
|
||||||
|
|
||||||
|
Both ```Cluster``` and ```Index``` objects accept the same settings dict,
|
||||||
|
selecting only the properties it requires.
|
||||||
|
|
||||||
|
{
|
||||||
|
"host" : "http://192.168.0.98",
|
||||||
|
"port" : 9200,
|
||||||
|
"index" : "b2g_tests",
|
||||||
|
"type" : "test_result",
|
||||||
|
"debug" : true,
|
||||||
|
"limit_replicas" : true,
|
||||||
|
"schema_file" : "./resources/schema/test_schema.json"
|
||||||
|
},
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
Cluster
|
||||||
|
-------
|
||||||
|
|
||||||
|
|
||||||
|
Index
|
||||||
|
-----
|
|
@ -0,0 +1,311 @@
|
||||||
|
# encoding: utf-8
|
||||||
|
#
|
||||||
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||||
|
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
#
|
||||||
|
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||||
|
#
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import gzip
|
||||||
|
from io import BytesIO
|
||||||
|
from tempfile import TemporaryFile
|
||||||
|
import zipfile
|
||||||
|
import zlib
|
||||||
|
|
||||||
|
from pyLibrary.debugs.logs import Log
|
||||||
|
from pyLibrary.maths import Math
|
||||||
|
|
||||||
|
# LIBRARY TO DEAL WITH BIG DATA ARRAYS AS ITERATORS OVER (IR)REGULAR SIZED
|
||||||
|
# BLOCKS, OR AS ITERATORS OVER LINES
|
||||||
|
|
||||||
|
|
||||||
|
MIN_READ_SIZE = 8 * 1024
|
||||||
|
MAX_STRING_SIZE = 1 * 1024 * 1024
|
||||||
|
|
||||||
|
|
||||||
|
class FileString(object):
|
||||||
|
"""
|
||||||
|
ACTS LIKE A STRING, BUT IS A FILE
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, file):
|
||||||
|
self.file = file
|
||||||
|
|
||||||
|
def decode(self, encoding):
|
||||||
|
if encoding != "utf8":
|
||||||
|
Log.error("can not handle {{encoding}}", encoding= encoding)
|
||||||
|
self.encoding = encoding
|
||||||
|
return self
|
||||||
|
|
||||||
|
def split(self, sep):
|
||||||
|
if sep != "\n":
|
||||||
|
Log.error("Can only split by lines")
|
||||||
|
self.file.seek(0)
|
||||||
|
return LazyLines(self.file)
|
||||||
|
|
||||||
|
def __len__(self):
|
||||||
|
temp = self.file.tell()
|
||||||
|
self.file.seek(0, 2)
|
||||||
|
file_length = self.file.tell()
|
||||||
|
self.file.seek(temp)
|
||||||
|
return file_length
|
||||||
|
|
||||||
|
def __getslice__(self, i, j):
|
||||||
|
self.file.seek(i)
|
||||||
|
output = self.file.read(j - i).decode(self.encoding)
|
||||||
|
return output
|
||||||
|
|
||||||
|
def __add__(self, other):
|
||||||
|
self.file.seek(0, 2)
|
||||||
|
self.file.write(other)
|
||||||
|
|
||||||
|
def __radd__(self, other):
|
||||||
|
new_file = TemporaryFile()
|
||||||
|
new_file.write(other)
|
||||||
|
self.file.seek(0)
|
||||||
|
for l in self.file:
|
||||||
|
new_file.write(l)
|
||||||
|
new_file.seek(0)
|
||||||
|
return FileString(new_file)
|
||||||
|
|
||||||
|
def __getattr__(self, attr):
|
||||||
|
return getattr(self.file, attr)
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
self.file, temp = None, self.file
|
||||||
|
if temp:
|
||||||
|
temp.close()
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
self.file.seek(0)
|
||||||
|
return self.file
|
||||||
|
|
||||||
|
|
||||||
|
def safe_size(source):
|
||||||
|
"""
|
||||||
|
READ THE source UP TO SOME LIMIT, THEN COPY TO A FILE IF TOO BIG
|
||||||
|
RETURN A str() OR A FileString()
|
||||||
|
"""
|
||||||
|
|
||||||
|
if source is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
total_bytes = 0
|
||||||
|
bytes = []
|
||||||
|
b = source.read(MIN_READ_SIZE)
|
||||||
|
while b:
|
||||||
|
total_bytes += len(b)
|
||||||
|
bytes.append(b)
|
||||||
|
if total_bytes > MAX_STRING_SIZE:
|
||||||
|
try:
|
||||||
|
data = FileString(TemporaryFile())
|
||||||
|
for bb in bytes:
|
||||||
|
data.write(bb)
|
||||||
|
del bytes
|
||||||
|
del bb
|
||||||
|
b = source.read(MIN_READ_SIZE)
|
||||||
|
while b:
|
||||||
|
total_bytes += len(b)
|
||||||
|
data.write(b)
|
||||||
|
b = source.read(MIN_READ_SIZE)
|
||||||
|
data.seek(0)
|
||||||
|
Log.note("Using file of size {{length}} instead of str()", length= total_bytes)
|
||||||
|
|
||||||
|
return data
|
||||||
|
except Exception, e:
|
||||||
|
Log.error("Could not write file > {{num}} bytes", num= total_bytes, cause=e)
|
||||||
|
b = source.read(MIN_READ_SIZE)
|
||||||
|
|
||||||
|
data = b"".join(bytes)
|
||||||
|
del bytes
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
class LazyLines(object):
|
||||||
|
"""
|
||||||
|
SIMPLE LINE ITERATOR, BUT WITH A BIT OF CACHING TO LOOK LIKE AN ARRAY
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, source, encoding="utf8"):
|
||||||
|
"""
|
||||||
|
ASSUME source IS A LINE ITERATOR OVER utf8 ENCODED BYTE STREAM
|
||||||
|
"""
|
||||||
|
self.source = source
|
||||||
|
self.encoding = encoding
|
||||||
|
self._iter = self.__iter__()
|
||||||
|
self._last = None
|
||||||
|
self._next = 0
|
||||||
|
|
||||||
|
def __getslice__(self, i, j):
|
||||||
|
if i == self._next:
|
||||||
|
return self._iter
|
||||||
|
Log.error("Do not know how to slice this generator")
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
def output(encoding):
|
||||||
|
for v in self.source:
|
||||||
|
if not encoding:
|
||||||
|
self._last = v
|
||||||
|
else:
|
||||||
|
self._last = v.decode(encoding)
|
||||||
|
self._next += 1
|
||||||
|
yield self._last
|
||||||
|
|
||||||
|
return output(self.encoding)
|
||||||
|
|
||||||
|
def __getitem__(self, item):
|
||||||
|
try:
|
||||||
|
if item == self._next:
|
||||||
|
return self._iter.next()
|
||||||
|
elif item == self._next - 1:
|
||||||
|
return self._last
|
||||||
|
else:
|
||||||
|
Log.error("can not index out-of-order too much")
|
||||||
|
except Exception, e:
|
||||||
|
Log.error("Problem indexing", e)
|
||||||
|
|
||||||
|
|
||||||
|
class CompressedLines(LazyLines):
|
||||||
|
"""
|
||||||
|
KEEP COMPRESSED HTTP (content-type: gzip) IN BYTES ARRAY
|
||||||
|
WHILE PULLING OUT ONE LINE AT A TIME FOR PROCESSING
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, compressed, encoding="utf8"):
|
||||||
|
"""
|
||||||
|
USED compressed BYTES TO DELIVER LINES OF TEXT
|
||||||
|
LIKE LazyLines, BUT HAS POTENTIAL TO seek()
|
||||||
|
"""
|
||||||
|
self.compressed = compressed
|
||||||
|
LazyLines.__init__(self, None, encoding=encoding)
|
||||||
|
self._iter = self.__iter__()
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return LazyLines(ibytes2ilines(compressed_bytes2ibytes(self.compressed, MIN_READ_SIZE)), self.encoding).__iter__()
|
||||||
|
|
||||||
|
def __getslice__(self, i, j):
|
||||||
|
if i == self._next:
|
||||||
|
return self._iter
|
||||||
|
|
||||||
|
if i == 0:
|
||||||
|
return self.__iter__()
|
||||||
|
|
||||||
|
if i == self._next - 1:
|
||||||
|
def output():
|
||||||
|
yield self._last
|
||||||
|
for v in self._iter:
|
||||||
|
yield v
|
||||||
|
|
||||||
|
return output()
|
||||||
|
Log.error("Do not know how to slice this generator")
|
||||||
|
|
||||||
|
def __getitem__(self, item):
|
||||||
|
try:
|
||||||
|
if item == self._next:
|
||||||
|
self._last = self._iter.next()
|
||||||
|
self._next += 1
|
||||||
|
return self._last
|
||||||
|
elif item == self._next - 1:
|
||||||
|
return self._last
|
||||||
|
else:
|
||||||
|
Log.error("can not index out-of-order too much")
|
||||||
|
except Exception, e:
|
||||||
|
Log.error("Problem indexing", e)
|
||||||
|
|
||||||
|
|
||||||
|
def __radd__(self, other):
|
||||||
|
new_file = TemporaryFile()
|
||||||
|
new_file.write(other)
|
||||||
|
self.file.seek(0)
|
||||||
|
for l in self.file:
|
||||||
|
new_file.write(l)
|
||||||
|
new_file.seek(0)
|
||||||
|
return FileString(new_file)
|
||||||
|
|
||||||
|
|
||||||
|
def compressed_bytes2ibytes(compressed, size):
|
||||||
|
"""
|
||||||
|
CONVERT AN ARRAY OF BYTES TO A BYTE-BLOCK GENERATOR
|
||||||
|
USEFUL IN THE CASE WHEN WE WANT TO LIMIT HOW MUCH WE FEED ANOTHER
|
||||||
|
GENERATOR (LIKE A DECOMPRESSOR)
|
||||||
|
"""
|
||||||
|
|
||||||
|
decompressor = zlib.decompressobj(16 + zlib.MAX_WBITS)
|
||||||
|
|
||||||
|
for i in range(0, Math.ceiling(len(compressed), size), size):
|
||||||
|
try:
|
||||||
|
block = compressed[i: i + size]
|
||||||
|
yield decompressor.decompress(block)
|
||||||
|
except Exception, e:
|
||||||
|
Log.error("Not expected", e)
|
||||||
|
|
||||||
|
def ibytes2ilines(stream):
|
||||||
|
"""
|
||||||
|
CONVERT A GENERATOR OF (ARBITRARY-SIZED) byte BLOCKS
|
||||||
|
TO A LINE (CR-DELIMITED) GENERATOR
|
||||||
|
"""
|
||||||
|
_buffer = stream.next()
|
||||||
|
s = 0
|
||||||
|
e = _buffer.find(b"\n")
|
||||||
|
while True:
|
||||||
|
while e == -1:
|
||||||
|
try:
|
||||||
|
next_block = stream.next()
|
||||||
|
_buffer = _buffer[s:] + next_block
|
||||||
|
s = 0
|
||||||
|
e = _buffer.find(b"\n")
|
||||||
|
except StopIteration:
|
||||||
|
_buffer = _buffer[s:]
|
||||||
|
del stream
|
||||||
|
yield _buffer
|
||||||
|
return
|
||||||
|
|
||||||
|
yield _buffer[s:e]
|
||||||
|
s = e + 1
|
||||||
|
e = _buffer.find(b"\n", s)
|
||||||
|
|
||||||
|
def sbytes2ilines(stream):
|
||||||
|
"""
|
||||||
|
CONVERT A STREAM OF (ARBITRARY-SIZED) byte BLOCKS
|
||||||
|
TO A LINE (CR-DELIMITED) GENERATOR
|
||||||
|
"""
|
||||||
|
def read():
|
||||||
|
output = stream.read(MIN_READ_SIZE)
|
||||||
|
return output
|
||||||
|
|
||||||
|
return ibytes2ilines({"next": read})
|
||||||
|
|
||||||
|
|
||||||
|
class GzipLines(CompressedLines):
|
||||||
|
"""
|
||||||
|
SAME AS CompressedLines, BUT USING THE GzipFile FORMAT FOR COMPRESSED BYTES
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, compressed, encoding="utf8"):
|
||||||
|
CompressedLines.__init__(self, compressed, encoding=encoding)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
buff = BytesIO(self.compressed)
|
||||||
|
return LazyLines(gzip.GzipFile(fileobj=buff, mode='r'), encoding=self.encoding).__iter__()
|
||||||
|
|
||||||
|
|
||||||
|
class ZipfileLines(CompressedLines):
|
||||||
|
"""
|
||||||
|
SAME AS CompressedLines, BUT USING THE ZipFile FORMAT FOR COMPRESSED BYTES
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, compressed, encoding="utf8"):
|
||||||
|
CompressedLines.__init__(self, compressed, encoding=encoding)
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
buff = BytesIO(self.compressed)
|
||||||
|
archive = zipfile.ZipFile(buff, mode='r')
|
||||||
|
names = archive.namelist()
|
||||||
|
if len(names) != 1:
|
||||||
|
Log.error("*.zip file has {{num}} files, expecting only one.", num= len(names))
|
||||||
|
stream = archive.open(names[0], "r")
|
||||||
|
return LazyLines(sbytes2ilines(stream), encoding=self.encoding).__iter__()
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -0,0 +1,137 @@
|
||||||
|
# encoding: utf-8
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||||
|
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
#
|
||||||
|
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import smtplib
|
||||||
|
import sys
|
||||||
|
|
||||||
|
from email.mime.multipart import MIMEMultipart
|
||||||
|
from email.mime.text import MIMEText
|
||||||
|
|
||||||
|
from pyLibrary.debugs.logs import Log
|
||||||
|
from pyLibrary.dot import listwrap
|
||||||
|
from pyLibrary.dot import coalesce
|
||||||
|
from pyLibrary.meta import use_settings
|
||||||
|
|
||||||
|
|
||||||
|
class Emailer:
|
||||||
|
@use_settings
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
from_address,
|
||||||
|
to_address,
|
||||||
|
host,
|
||||||
|
username,
|
||||||
|
password,
|
||||||
|
subject="catchy title",
|
||||||
|
port=465,
|
||||||
|
use_ssl=1,
|
||||||
|
settings=None
|
||||||
|
):
|
||||||
|
self.settings = settings
|
||||||
|
self.server = None
|
||||||
|
|
||||||
|
def __enter__(self):
|
||||||
|
if self.server is not None:
|
||||||
|
Log.error("Got a problem")
|
||||||
|
|
||||||
|
if self.settings.use_ssl:
|
||||||
|
self.server = smtplib.SMTP_SSL(self.settings.host, self.settings.port)
|
||||||
|
else:
|
||||||
|
self.server = smtplib.SMTP(self.settings.host, self.settings.port)
|
||||||
|
|
||||||
|
if self.settings.username and self.settings.password:
|
||||||
|
self.server.login(self.settings.username, self.settings.password)
|
||||||
|
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __exit__(self, type, value, traceback):
|
||||||
|
try:
|
||||||
|
self.server.quit()
|
||||||
|
except Exception, e:
|
||||||
|
Log.warning("Problem with smtp server quit(), ignoring problem", e)
|
||||||
|
|
||||||
|
self.server = None
|
||||||
|
|
||||||
|
def send_email(self,
|
||||||
|
from_address=None,
|
||||||
|
to_address=None,
|
||||||
|
subject=None,
|
||||||
|
text_data=None,
|
||||||
|
html_data=None
|
||||||
|
):
|
||||||
|
"""Sends an email.
|
||||||
|
|
||||||
|
from_addr is an email address; to_addrs is a list of email adresses.
|
||||||
|
Addresses can be plain (e.g. "jsmith@example.com") or with real names
|
||||||
|
(e.g. "John Smith <jsmith@example.com>").
|
||||||
|
|
||||||
|
text_data and html_data are both strings. You can specify one or both.
|
||||||
|
If you specify both, the email will be sent as a MIME multipart
|
||||||
|
alternative, i.e., the recipient will see the HTML content if his
|
||||||
|
viewer supports it; otherwise he'll see the text content.
|
||||||
|
"""
|
||||||
|
|
||||||
|
settings = self.settings
|
||||||
|
|
||||||
|
from_address = coalesce(from_address, settings["from"], settings.from_address)
|
||||||
|
to_address = listwrap(coalesce(to_address, settings.to_address, settings.to_addrs))
|
||||||
|
|
||||||
|
if not from_address or not to_address:
|
||||||
|
raise Exception("Both from_addr and to_addrs must be specified")
|
||||||
|
if not text_data and not html_data:
|
||||||
|
raise Exception("Must specify either text_data or html_data")
|
||||||
|
|
||||||
|
if not html_data:
|
||||||
|
msg = MIMEText(text_data)
|
||||||
|
elif not text_data:
|
||||||
|
msg = MIMEText(html_data, 'html')
|
||||||
|
else:
|
||||||
|
msg = MIMEMultipart('alternative')
|
||||||
|
msg.attach(MIMEText(text_data, 'plain'))
|
||||||
|
msg.attach(MIMEText(html_data, 'html'))
|
||||||
|
|
||||||
|
msg['Subject'] = coalesce(subject, settings.subject)
|
||||||
|
msg['From'] = from_address
|
||||||
|
msg['To'] = ', '.join(to_address)
|
||||||
|
|
||||||
|
if self.server:
|
||||||
|
# CALL AS PART OF A SMTP SESSION
|
||||||
|
self.server.sendmail(from_address, to_address, msg.as_string())
|
||||||
|
else:
|
||||||
|
# CALL AS STAND-ALONE
|
||||||
|
with self:
|
||||||
|
self.server.sendmail(from_address, to_address, msg.as_string())
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
if sys.hexversion < 0x020603f0:
|
||||||
|
# versions earlier than 2.6.3 have a bug in smtplib when sending over SSL:
|
||||||
|
# http://bugs.python.org/issue4066
|
||||||
|
|
||||||
|
# Unfortunately the stock version of Python in Snow Leopard is 2.6.1, so
|
||||||
|
# we patch it here to avoid having to install an updated Python version.
|
||||||
|
import socket
|
||||||
|
import ssl
|
||||||
|
|
||||||
|
def _get_socket_fixed(self, host, port, timeout):
|
||||||
|
if self.debuglevel > 0:
|
||||||
|
print>> sys.stderr, 'connect:', (host, port)
|
||||||
|
new_socket = socket.create_connection((host, port), timeout)
|
||||||
|
new_socket = ssl.wrap_socket(new_socket, self.keyfile, self.certfile)
|
||||||
|
self.file = smtplib.SSLFakeFile(new_socket)
|
||||||
|
return new_socket
|
||||||
|
|
||||||
|
smtplib.SMTP_SSL._get_socket = _get_socket_fixed
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,331 @@
|
||||||
|
# encoding: utf-8
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||||
|
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
#
|
||||||
|
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||||
|
#
|
||||||
|
|
||||||
|
|
||||||
|
from datetime import datetime
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import shutil
|
||||||
|
|
||||||
|
from pyLibrary.strings import utf82unicode
|
||||||
|
from pyLibrary.maths import crypto
|
||||||
|
from pyLibrary.dot import coalesce, set_default, split_field, join_field
|
||||||
|
from pyLibrary.dot import listwrap, wrap
|
||||||
|
from pyLibrary import convert
|
||||||
|
|
||||||
|
|
||||||
|
class File(object):
|
||||||
|
"""
|
||||||
|
ASSUMES ALL FILE CONTENT IS UTF8 ENCODED STRINGS
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, filename, buffering=2 ** 14, suffix=None):
|
||||||
|
"""
|
||||||
|
YOU MAY SET filename TO {"path":p, "key":k} FOR CRYPTO FILES
|
||||||
|
"""
|
||||||
|
if filename == None:
|
||||||
|
from pyLibrary.debugs.logs import Log
|
||||||
|
|
||||||
|
Log.error("File must be given a filename")
|
||||||
|
elif isinstance(filename, basestring):
|
||||||
|
self.key = None
|
||||||
|
self._filename = "/".join(filename.split(os.sep)) # USE UNIX STANDARD
|
||||||
|
else:
|
||||||
|
self.key = convert.base642bytearray(filename.key)
|
||||||
|
self._filename = "/".join(filename.path.split(os.sep)) # USE UNIX STANDARD
|
||||||
|
|
||||||
|
while self._filename.find(".../") >= 0:
|
||||||
|
# LET ... REFER TO GRANDPARENT, .... REFER TO GREAT-GRAND-PARENT, etc...
|
||||||
|
self._filename = self._filename.replace(".../", "../../")
|
||||||
|
self.buffering = buffering
|
||||||
|
|
||||||
|
|
||||||
|
if suffix:
|
||||||
|
self._filename = File.add_suffix(self._filename, suffix)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def new_instance(cls, *path):
|
||||||
|
def scrub(i, p):
|
||||||
|
if isinstance(p, File):
|
||||||
|
p = p.abspath
|
||||||
|
p = p.replace(os.sep, "/")
|
||||||
|
if p[-1] == '/':
|
||||||
|
p = p[:-1]
|
||||||
|
if i > 0 and p[0] == '/':
|
||||||
|
p = p[1:]
|
||||||
|
return p
|
||||||
|
|
||||||
|
return File('/'.join(scrub(i, p) for i, p in enumerate(path)))
|
||||||
|
|
||||||
|
|
||||||
|
@property
|
||||||
|
def filename(self):
|
||||||
|
return self._filename.replace("/", os.sep)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def abspath(self):
|
||||||
|
if self._filename.startswith("~"):
|
||||||
|
home_path = os.path.expanduser("~")
|
||||||
|
if os.sep == "\\":
|
||||||
|
home_path = home_path.replace(os.sep, "/")
|
||||||
|
if home_path.endswith("/"):
|
||||||
|
home_path = home_path[:-1]
|
||||||
|
|
||||||
|
return home_path + self._filename[1::]
|
||||||
|
else:
|
||||||
|
if os.sep == "\\":
|
||||||
|
return os.path.abspath(self._filename).replace(os.sep, "/")
|
||||||
|
else:
|
||||||
|
return os.path.abspath(self._filename)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def add_suffix(filename, suffix):
|
||||||
|
"""
|
||||||
|
ADD suffix TO THE filename (NOT INCLUDING THE FILE EXTENSION)
|
||||||
|
"""
|
||||||
|
path = filename.split("/")
|
||||||
|
parts = path[-1].split(".")
|
||||||
|
i = max(len(parts) - 2, 0)
|
||||||
|
parts[i] = parts[i] + suffix
|
||||||
|
path[-1] = ".".join(parts)
|
||||||
|
return "/".join(path)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def extension(self):
|
||||||
|
parts = self._filename.split("/")[-1].split(".")
|
||||||
|
if len(parts) == 1:
|
||||||
|
return ""
|
||||||
|
else:
|
||||||
|
return parts[-1]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self):
|
||||||
|
parts = self._filename.split("/")[-1].split(".")
|
||||||
|
if len(parts) == 1:
|
||||||
|
return parts[0]
|
||||||
|
else:
|
||||||
|
return ".".join(parts[0:-1])
|
||||||
|
|
||||||
|
def set_extension(self, ext):
|
||||||
|
"""
|
||||||
|
RETURN NEW FILE WITH GIVEN EXTENSION
|
||||||
|
"""
|
||||||
|
path = self._filename.split("/")
|
||||||
|
parts = path[-1].split(".")
|
||||||
|
if len(parts) == 1:
|
||||||
|
parts.append(ext)
|
||||||
|
else:
|
||||||
|
parts[-1] = ext
|
||||||
|
|
||||||
|
path[-1] = ".".join(parts)
|
||||||
|
return File("/".join(path))
|
||||||
|
|
||||||
|
def set_name(self, name):
|
||||||
|
"""
|
||||||
|
RETURN NEW FILE WITH GIVEN EXTENSION
|
||||||
|
"""
|
||||||
|
path = self._filename.split("/")
|
||||||
|
parts = path[-1].split(".")
|
||||||
|
if len(parts) == 1:
|
||||||
|
path[-1] = name
|
||||||
|
else:
|
||||||
|
path[-1] = name + "." + parts[-1]
|
||||||
|
return File("/".join(path))
|
||||||
|
|
||||||
|
def backup_name(self, timestamp=None):
|
||||||
|
"""
|
||||||
|
RETURN A FILENAME THAT CAN SERVE AS A BACKUP FOR THIS FILE
|
||||||
|
"""
|
||||||
|
suffix = convert.datetime2string(coalesce(timestamp, datetime.now()), "%Y%m%d_%H%M%S")
|
||||||
|
return File.add_suffix(self._filename, suffix)
|
||||||
|
|
||||||
|
def read(self, encoding="utf8"):
|
||||||
|
with open(self._filename, "rb") as f:
|
||||||
|
content = f.read().decode(encoding)
|
||||||
|
if self.key:
|
||||||
|
return crypto.decrypt(content, self.key)
|
||||||
|
else:
|
||||||
|
return content
|
||||||
|
|
||||||
|
def read_json(self, encoding="utf8"):
|
||||||
|
from pyLibrary.jsons import ref
|
||||||
|
|
||||||
|
content = self.read(encoding=encoding)
|
||||||
|
value = convert.json2value(content, flexible=True, leaves=True)
|
||||||
|
abspath = self.abspath
|
||||||
|
if os.sep == "\\":
|
||||||
|
abspath = "/" + abspath.replace(os.sep, "/")
|
||||||
|
return ref.expand(value, "file://" + abspath)
|
||||||
|
|
||||||
|
def is_directory(self):
|
||||||
|
return os.path.isdir(self._filename)
|
||||||
|
|
||||||
|
def read_bytes(self):
|
||||||
|
try:
|
||||||
|
if not self.parent.exists:
|
||||||
|
self.parent.create()
|
||||||
|
with open(self._filename, "rb") as f:
|
||||||
|
return f.read()
|
||||||
|
except Exception, e:
|
||||||
|
from pyLibrary.debugs.logs import Log
|
||||||
|
|
||||||
|
Log.error("Problem reading file {{filename}}", self.abspath)
|
||||||
|
|
||||||
|
def write_bytes(self, content):
|
||||||
|
if not self.parent.exists:
|
||||||
|
self.parent.create()
|
||||||
|
with open(self._filename, "wb") as f:
|
||||||
|
f.write(content)
|
||||||
|
|
||||||
|
def write(self, data):
|
||||||
|
if not self.parent.exists:
|
||||||
|
self.parent.create()
|
||||||
|
with open(self._filename, "wb") as f:
|
||||||
|
if isinstance(data, list) and self.key:
|
||||||
|
from pyLibrary.debugs.logs import Log
|
||||||
|
|
||||||
|
Log.error("list of data and keys are not supported, encrypt before sending to file")
|
||||||
|
|
||||||
|
if isinstance(data, list):
|
||||||
|
pass
|
||||||
|
elif isinstance(data, basestring):
|
||||||
|
data=[data]
|
||||||
|
elif hasattr(data, "__iter__"):
|
||||||
|
pass
|
||||||
|
|
||||||
|
for d in data:
|
||||||
|
if not isinstance(d, unicode):
|
||||||
|
from pyLibrary.debugs.logs import Log
|
||||||
|
|
||||||
|
Log.error("Expecting unicode data only")
|
||||||
|
if self.key:
|
||||||
|
f.write(crypto.encrypt(d, self.key).encode("utf8"))
|
||||||
|
else:
|
||||||
|
f.write(d.encode("utf8"))
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
# NOT SURE HOW TO MAXIMIZE FILE READ SPEED
|
||||||
|
# http://stackoverflow.com/questions/8009882/how-to-read-large-file-line-by-line-in-python
|
||||||
|
# http://effbot.org/zone/wide-finder.htm
|
||||||
|
def output():
|
||||||
|
try:
|
||||||
|
path = self._filename
|
||||||
|
if path.startswith("~"):
|
||||||
|
home_path = os.path.expanduser("~")
|
||||||
|
path = home_path + path[1::]
|
||||||
|
|
||||||
|
with io.open(path, "rb") as f:
|
||||||
|
for line in f:
|
||||||
|
yield utf82unicode(line)
|
||||||
|
except Exception, e:
|
||||||
|
from pyLibrary.debugs.logs import Log
|
||||||
|
|
||||||
|
Log.error("Can not read line from {{filename}}", filename= self._filename, cause=e)
|
||||||
|
|
||||||
|
return output()
|
||||||
|
|
||||||
|
def append(self, content):
|
||||||
|
"""
|
||||||
|
add a line to file
|
||||||
|
"""
|
||||||
|
if not self.parent.exists:
|
||||||
|
self.parent.create()
|
||||||
|
with open(self._filename, "ab") as output_file:
|
||||||
|
if isinstance(content, str):
|
||||||
|
from pyLibrary.debugs.logs import Log
|
||||||
|
|
||||||
|
Log.error("expecting to write unicode only")
|
||||||
|
output_file.write(content.encode("utf-8"))
|
||||||
|
output_file.write(b"\n")
|
||||||
|
|
||||||
|
def add(self, content):
|
||||||
|
return self.append(content)
|
||||||
|
|
||||||
|
def extend(self, content):
|
||||||
|
try:
|
||||||
|
if not self.parent.exists:
|
||||||
|
self.parent.create()
|
||||||
|
with open(self._filename, "ab") as output_file:
|
||||||
|
for c in content:
|
||||||
|
if isinstance(c, str):
|
||||||
|
from pyLibrary.debugs.logs import Log
|
||||||
|
|
||||||
|
Log.error("expecting to write unicode only")
|
||||||
|
|
||||||
|
output_file.write(c.encode("utf-8"))
|
||||||
|
output_file.write(b"\n")
|
||||||
|
except Exception, e:
|
||||||
|
from pyLibrary.debugs.logs import Log
|
||||||
|
|
||||||
|
Log.error("Could not write to file", e)
|
||||||
|
|
||||||
|
def delete(self):
|
||||||
|
try:
|
||||||
|
if os.path.isdir(self._filename):
|
||||||
|
shutil.rmtree(self._filename)
|
||||||
|
elif os.path.isfile(self._filename):
|
||||||
|
os.remove(self._filename)
|
||||||
|
return self
|
||||||
|
except Exception, e:
|
||||||
|
if e.strerror == "The system cannot find the path specified":
|
||||||
|
return
|
||||||
|
from pyLibrary.debugs.logs import Log
|
||||||
|
|
||||||
|
Log.error("Could not remove file", e)
|
||||||
|
|
||||||
|
def backup(self):
|
||||||
|
names = self._filename.split("/")[-1].split(".")
|
||||||
|
if len(names) == 1:
|
||||||
|
backup = File(self._filename + ".backup " + datetime.utcnow().strftime("%Y%m%d %H%i%s"))
|
||||||
|
|
||||||
|
|
||||||
|
def create(self):
|
||||||
|
try:
|
||||||
|
os.makedirs(self._filename)
|
||||||
|
except Exception, e:
|
||||||
|
from pyLibrary.debugs.logs import Log
|
||||||
|
|
||||||
|
Log.error("Could not make directory {{dir_name}}", dir_name= self._filename, cause=e)
|
||||||
|
|
||||||
|
@property
|
||||||
|
def children(self):
|
||||||
|
return [File(self._filename + "/" + c) for c in os.listdir(self.filename)]
|
||||||
|
|
||||||
|
@property
|
||||||
|
def parent(self):
|
||||||
|
return File("/".join(self._filename.split("/")[:-1]))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def exists(self):
|
||||||
|
if self._filename in ["", "."]:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
return os.path.exists(self._filename)
|
||||||
|
except Exception, e:
|
||||||
|
return False
|
||||||
|
|
||||||
|
def __bool__(self):
|
||||||
|
return self.__nonzero__()
|
||||||
|
|
||||||
|
|
||||||
|
def __nonzero__(self):
|
||||||
|
"""
|
||||||
|
USED FOR FILE EXISTENCE TESTING
|
||||||
|
"""
|
||||||
|
if self._filename in ["", "."]:
|
||||||
|
return True
|
||||||
|
try:
|
||||||
|
return os.path.exists(self._filename)
|
||||||
|
except Exception, e:
|
||||||
|
return False
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def copy(cls, from_, to_):
|
||||||
|
File.new_instance(to_).write_bytes(File.new_instance(from_).read_bytes())
|
|
@ -0,0 +1,60 @@
|
||||||
|
# encoding: utf-8
|
||||||
|
#
|
||||||
|
#
|
||||||
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||||
|
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
#
|
||||||
|
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
from pyLibrary.meta import cache
|
||||||
|
from pyLibrary.thread.multiprocess import Process
|
||||||
|
|
||||||
|
|
||||||
|
@cache
|
||||||
|
def get_git_revision():
|
||||||
|
"""
|
||||||
|
GET THE CURRENT GIT REVISION
|
||||||
|
"""
|
||||||
|
proc = Process("git log", ["git", "log", "-1"])
|
||||||
|
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
line = proc.stdout.pop().strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
if line.startswith("commit "):
|
||||||
|
return line[7:]
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
proc.join()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
@cache
|
||||||
|
def get_remote_revision(url, branch):
|
||||||
|
"""
|
||||||
|
GET REVISION OF A REMOTE BRANCH
|
||||||
|
"""
|
||||||
|
|
||||||
|
proc = Process("git remote revision", ["git", "ls-remote", url, "refs/heads/" + branch])
|
||||||
|
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
line = proc.stdout.pop().strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
return line.split("\t")[0]
|
||||||
|
finally:
|
||||||
|
try:
|
||||||
|
proc.join()
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
|
@ -0,0 +1,264 @@
|
||||||
|
# encoding: utf-8
|
||||||
|
#
|
||||||
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||||
|
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
#
|
||||||
|
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||||
|
#
|
||||||
|
|
||||||
|
# MIMICS THE requests API (http://docs.python-requests.org/en/latest/)
|
||||||
|
# DEMANDS data IS A JSON-SERIALIZABLE STRUCTURE
|
||||||
|
# WITH ADDED default_headers THAT CAN BE SET USING pyLibrary.debugs.settings
|
||||||
|
# EG
|
||||||
|
# {"debug.constants":{
|
||||||
|
# "pyLibrary.env.http.default_headers={
|
||||||
|
# "From":"klahnakoski@mozilla.com"
|
||||||
|
# }
|
||||||
|
# }}
|
||||||
|
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import absolute_import
|
||||||
|
from copy import copy
|
||||||
|
from numbers import Number
|
||||||
|
|
||||||
|
from requests import sessions, Response
|
||||||
|
|
||||||
|
from pyLibrary import convert
|
||||||
|
from pyLibrary.debugs.exceptions import Except
|
||||||
|
from pyLibrary.debugs.logs import Log
|
||||||
|
from pyLibrary.dot import Dict, coalesce, wrap, set_default
|
||||||
|
from pyLibrary.env.big_data import safe_size, CompressedLines, ZipfileLines, GzipLines
|
||||||
|
from pyLibrary.maths import Math
|
||||||
|
from pyLibrary.queries import qb
|
||||||
|
from pyLibrary.thread.threads import Thread
|
||||||
|
from pyLibrary.times.durations import SECOND
|
||||||
|
|
||||||
|
|
||||||
|
FILE_SIZE_LIMIT = 100 * 1024 * 1024
|
||||||
|
MIN_READ_SIZE = 8 * 1024
|
||||||
|
ZIP_REQUEST = False
|
||||||
|
default_headers = Dict() # TODO: MAKE THIS VARIABLE A SPECIAL TYPE OF EXPECTED MODULE PARAMETER SO IT COMPLAINS IF NOT SET
|
||||||
|
default_timeout = 600
|
||||||
|
|
||||||
|
_warning_sent = False
|
||||||
|
|
||||||
|
|
||||||
|
def request(method, url, zip=None, retry=None, **kwargs):
|
||||||
|
"""
|
||||||
|
JUST LIKE requests.request() BUT WITH DEFAULT HEADERS AND FIXES
|
||||||
|
DEMANDS data IS ONE OF:
|
||||||
|
* A JSON-SERIALIZABLE STRUCTURE, OR
|
||||||
|
* LIST OF JSON-SERIALIZABLE STRUCTURES, OR
|
||||||
|
* None
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
* zip - ZIP THE REQUEST BODY, IF BIG ENOUGH
|
||||||
|
* json - JSON-SERIALIZABLE STRUCTURE
|
||||||
|
* retry - {"times": x, "sleep": y} STRUCTURE
|
||||||
|
|
||||||
|
THE BYTE_STRINGS (b"") ARE NECESSARY TO PREVENT httplib.py FROM **FREAKING OUT**
|
||||||
|
IT APPEARS requests AND httplib.py SIMPLY CONCATENATE STRINGS BLINDLY, WHICH
|
||||||
|
INCLUDES url AND headers
|
||||||
|
"""
|
||||||
|
global _warning_sent
|
||||||
|
if not default_headers and not _warning_sent:
|
||||||
|
_warning_sent = True
|
||||||
|
Log.warning(
|
||||||
|
"The pyLibrary.env.http module was meant to add extra "
|
||||||
|
"default headers to all requests, specifically the 'Referer' "
|
||||||
|
"header with a URL to the project. Use the `pyLibrary.debug.constants.set()` "
|
||||||
|
"function to set `pyLibrary.env.http.default_headers`"
|
||||||
|
)
|
||||||
|
|
||||||
|
if isinstance(url, list):
|
||||||
|
# TRY MANY URLS
|
||||||
|
failures = []
|
||||||
|
for remaining, u in qb.countdown(url):
|
||||||
|
try:
|
||||||
|
response = request(method, u, zip=zip, retry=retry, **kwargs)
|
||||||
|
if Math.round(response.status_code, decimal=-2) not in [400, 500]:
|
||||||
|
return response
|
||||||
|
if not remaining:
|
||||||
|
return response
|
||||||
|
except Exception, e:
|
||||||
|
e = Except.wrap(e)
|
||||||
|
failures.append(e)
|
||||||
|
Log.error("Tried {{num}} urls", num=len(url), cause=failures)
|
||||||
|
|
||||||
|
session = sessions.Session()
|
||||||
|
session.headers.update(default_headers)
|
||||||
|
|
||||||
|
if zip is None:
|
||||||
|
zip = ZIP_REQUEST
|
||||||
|
|
||||||
|
if isinstance(url, unicode):
|
||||||
|
# httplib.py WILL **FREAK OUT** IF IT SEES ANY UNICODE
|
||||||
|
url = url.encode("ascii")
|
||||||
|
|
||||||
|
_to_ascii_dict(kwargs)
|
||||||
|
timeout = kwargs[b'timeout'] = coalesce(kwargs.get(b'timeout'), default_timeout)
|
||||||
|
|
||||||
|
if retry is None:
|
||||||
|
retry = Dict(times=1, sleep=0)
|
||||||
|
elif isinstance(retry, Number):
|
||||||
|
retry = Dict(times=retry, sleep=SECOND)
|
||||||
|
else:
|
||||||
|
retry = wrap(retry)
|
||||||
|
set_default(retry.sleep, {"times": 1, "sleep": 0})
|
||||||
|
|
||||||
|
if b'json' in kwargs:
|
||||||
|
kwargs[b'data'] = convert.value2json(kwargs[b'json']).encode("utf8")
|
||||||
|
del kwargs[b'json']
|
||||||
|
|
||||||
|
try:
|
||||||
|
if zip and len(coalesce(kwargs.get(b"data"))) > 1000:
|
||||||
|
compressed = convert.bytes2zip(kwargs[b"data"])
|
||||||
|
if b"headers" not in kwargs:
|
||||||
|
kwargs[b"headers"] = {}
|
||||||
|
kwargs[b"headers"][b'content-encoding'] = b'gzip'
|
||||||
|
kwargs[b"data"] = compressed
|
||||||
|
|
||||||
|
_to_ascii_dict(kwargs[b"headers"])
|
||||||
|
else:
|
||||||
|
_to_ascii_dict(kwargs.get(b"headers"))
|
||||||
|
except Exception, e:
|
||||||
|
Log.error("Request setup failure on {{url}}", url=url, cause=e)
|
||||||
|
|
||||||
|
errors = []
|
||||||
|
for r in range(retry.times):
|
||||||
|
if r:
|
||||||
|
Thread.sleep(retry.sleep)
|
||||||
|
|
||||||
|
try:
|
||||||
|
return session.request(method=method, url=url, **kwargs)
|
||||||
|
except Exception, e:
|
||||||
|
errors.append(Except.wrap(e))
|
||||||
|
|
||||||
|
if " Read timed out." in errors[0]:
|
||||||
|
Log.error("Tried {{times}} times: Timeout failure (timeout was {{timeout}}", timeout=timeout, times=retry.times, cause=errors[0])
|
||||||
|
else:
|
||||||
|
Log.error("Tried {{times}} times: Request failure of {{url}}", url=url, times=retry.times, cause=errors[0])
|
||||||
|
|
||||||
|
|
||||||
|
def _to_ascii_dict(headers):
|
||||||
|
if headers is None:
|
||||||
|
return
|
||||||
|
for k, v in copy(headers).items():
|
||||||
|
if isinstance(k, unicode):
|
||||||
|
del headers[k]
|
||||||
|
if isinstance(v, unicode):
|
||||||
|
headers[k.encode("ascii")] = v.encode("ascii")
|
||||||
|
else:
|
||||||
|
headers[k.encode("ascii")] = v
|
||||||
|
elif isinstance(v, unicode):
|
||||||
|
headers[k] = v.encode("ascii")
|
||||||
|
|
||||||
|
|
||||||
|
def get(url, **kwargs):
|
||||||
|
kwargs.setdefault(b'allow_redirects', True)
|
||||||
|
kwargs[b"stream"] = True
|
||||||
|
return HttpResponse(request(b'get', url, **kwargs))
|
||||||
|
|
||||||
|
|
||||||
|
def get_json(url, **kwargs):
|
||||||
|
"""
|
||||||
|
ASSUME RESPONSE IN IN JSON
|
||||||
|
"""
|
||||||
|
response = get(url, **kwargs)
|
||||||
|
c = response.all_content
|
||||||
|
return convert.json2value(convert.utf82unicode(c))
|
||||||
|
|
||||||
|
def options(url, **kwargs):
|
||||||
|
kwargs.setdefault(b'allow_redirects', True)
|
||||||
|
kwargs[b"stream"] = True
|
||||||
|
return HttpResponse(request(b'options', url, **kwargs))
|
||||||
|
|
||||||
|
|
||||||
|
def head(url, **kwargs):
|
||||||
|
kwargs.setdefault(b'allow_redirects', False)
|
||||||
|
kwargs[b"stream"] = True
|
||||||
|
return HttpResponse(request(b'head', url, **kwargs))
|
||||||
|
|
||||||
|
|
||||||
|
def post(url, **kwargs):
|
||||||
|
kwargs[b"stream"] = True
|
||||||
|
return HttpResponse(request(b'post', url, **kwargs))
|
||||||
|
|
||||||
|
|
||||||
|
def post_json(url, **kwargs):
|
||||||
|
"""
|
||||||
|
ASSUME RESPONSE IN IN JSON
|
||||||
|
"""
|
||||||
|
kwargs["data"] = convert.unicode2utf8(convert.value2json(kwargs["data"]))
|
||||||
|
|
||||||
|
response = post(url, **kwargs)
|
||||||
|
c=response.content
|
||||||
|
return convert.json2value(convert.utf82unicode(c))
|
||||||
|
|
||||||
|
|
||||||
|
def put(url, **kwargs):
|
||||||
|
return HttpResponse(request(b'put', url, **kwargs))
|
||||||
|
|
||||||
|
|
||||||
|
def patch(url, **kwargs):
|
||||||
|
kwargs[b"stream"] = True
|
||||||
|
return HttpResponse(request(b'patch', url, **kwargs))
|
||||||
|
|
||||||
|
|
||||||
|
def delete(url, **kwargs):
|
||||||
|
kwargs[b"stream"] = True
|
||||||
|
return HttpResponse(request(b'delete', url, **kwargs))
|
||||||
|
|
||||||
|
|
||||||
|
class HttpResponse(Response):
|
||||||
|
def __new__(cls, resp):
|
||||||
|
resp.__class__ = HttpResponse
|
||||||
|
return resp
|
||||||
|
|
||||||
|
def __init__(self, resp):
|
||||||
|
pass
|
||||||
|
self._cached_content = None
|
||||||
|
|
||||||
|
@property
|
||||||
|
def all_content(self):
|
||||||
|
# response.content WILL LEAK MEMORY (?BECAUSE OF PYPY"S POOR HANDLING OF GENERATORS?)
|
||||||
|
# THE TIGHT, SIMPLE, LOOP TO FILL blocks PREVENTS THAT LEAK
|
||||||
|
if self._content is not False:
|
||||||
|
self._cached_content = self._content
|
||||||
|
elif self._cached_content is None:
|
||||||
|
def read(size):
|
||||||
|
if self.raw._fp.fp is not None:
|
||||||
|
return self.raw.read(amt=size, decode_content=True)
|
||||||
|
else:
|
||||||
|
self.close()
|
||||||
|
return None
|
||||||
|
|
||||||
|
self._cached_content = safe_size(Dict(read=read))
|
||||||
|
|
||||||
|
if hasattr(self._cached_content, "read"):
|
||||||
|
self._cached_content.seek(0)
|
||||||
|
|
||||||
|
return self._cached_content
|
||||||
|
|
||||||
|
@property
|
||||||
|
def all_lines(self):
|
||||||
|
return self._all_lines()
|
||||||
|
|
||||||
|
def _all_lines(self, encoding="utf8"):
|
||||||
|
try:
|
||||||
|
content = self.raw.read(decode_content=False)
|
||||||
|
if self.headers.get('content-encoding') == 'gzip':
|
||||||
|
return CompressedLines(content, encoding=encoding)
|
||||||
|
elif self.headers.get('content-type') == 'application/zip':
|
||||||
|
return ZipfileLines(content, encoding=encoding)
|
||||||
|
elif self.url.endswith(".gz"):
|
||||||
|
return GzipLines(content, encoding)
|
||||||
|
else:
|
||||||
|
return content.decode(encoding).split("\n")
|
||||||
|
except Exception, e:
|
||||||
|
Log.error("Can not read content", cause=e)
|
||||||
|
finally:
|
||||||
|
self.close()
|
|
@ -0,0 +1,53 @@
|
||||||
|
from pyLibrary.debugs.logs import Log
|
||||||
|
from pyLibrary.strings import expand_template
|
||||||
|
|
||||||
|
_using_mozlog = False
|
||||||
|
|
||||||
|
def use():
|
||||||
|
if _using_mozlog:
|
||||||
|
return
|
||||||
|
|
||||||
|
globals()["_using_mozlog"] = True
|
||||||
|
try:
|
||||||
|
from mozlog.structured import structuredlog
|
||||||
|
|
||||||
|
global logger
|
||||||
|
logger = structuredlog.get_default_logger()
|
||||||
|
ToMozLog.logger = logger
|
||||||
|
ToMozLog.old_class = Log
|
||||||
|
globals()["Log"] = ToMozLog
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class ToMozLog(object):
|
||||||
|
"""
|
||||||
|
MAP CALLS pyLibrary.debugs.logs.Log TO mozlog.structured.structuredlog.StructuredLogger
|
||||||
|
"""
|
||||||
|
logger = None
|
||||||
|
old_class = None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def debug(cls, template=None, params=None):
|
||||||
|
cls.logger.debug(expand_template(template, params))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def println(cls, template, params=None):
|
||||||
|
cls.logger.debug(expand_template(template, params))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def note(cls, template, params=None, stack_depth=0):
|
||||||
|
cls.logger.debug(expand_template(template, params))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def unexpected(cls, template, params=None, cause=None):
|
||||||
|
cls.logger.error(expand_template(template, params))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def warning(cls, template, params=None, *args, **kwargs):
|
||||||
|
cls.logger.warn(expand_template(template, params))
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def error(cls, template, params=None, cause=None, stack_depth=0):
|
||||||
|
cls.logger.error(expand_template(template, params))
|
||||||
|
cls.old_class.error(template, params, cause, stack_depth)
|
|
@ -0,0 +1,217 @@
|
||||||
|
# encoding: utf-8
|
||||||
|
#
|
||||||
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||||
|
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||||
|
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||||
|
#
|
||||||
|
# Author: Kyle Lahnakoski (kyle@lahnakoski.com)
|
||||||
|
#
|
||||||
|
|
||||||
|
from __future__ import unicode_literals
|
||||||
|
from __future__ import division
|
||||||
|
from __future__ import absolute_import
|
||||||
|
|
||||||
|
import datetime
|
||||||
|
from socket import timeout as socket_timeout
|
||||||
|
|
||||||
|
from kombu import Connection, Producer, Exchange
|
||||||
|
from pytz import timezone
|
||||||
|
from mozillapulse.utils import time_to_string
|
||||||
|
|
||||||
|
from pyLibrary.debugs import constants
|
||||||
|
from pyLibrary import jsons
|
||||||
|
from pyLibrary.debugs.exceptions import Except
|
||||||
|
from pyLibrary.debugs.logs import Log
|
||||||
|
from pyLibrary.dot import wrap, coalesce, Dict, set_default
|
||||||
|
from pyLibrary.meta import use_settings
|
||||||
|
from pyLibrary.thread.threads import Thread
|
||||||
|
from mozillapulse.consumers import GenericConsumer
|
||||||
|
|
||||||
|
|
||||||
|
class Consumer(Thread):
|
||||||
|
@use_settings
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
exchange, # name of the Pulse exchange
|
||||||
|
topic, # message name pattern to subscribe to ('#' is wildcard)
|
||||||
|
target=None, # WILL BE CALLED WITH PULSE PAYLOADS AND ack() IF COMPLETE$ED WITHOUT EXCEPTION
|
||||||
|
target_queue=None, # (aka self.queue) WILL BE FILLED WITH PULSE PAYLOADS
|
||||||
|
host='pulse.mozilla.org', # url to connect,
|
||||||
|
port=5671, # tcp port
|
||||||
|
user=None,
|
||||||
|
password=None,
|
||||||
|
vhost="/",
|
||||||
|
start=0, # USED AS STARTING POINT FOR ASSIGNING THE _meta.count ATTRIBUTE
|
||||||
|
ssl=True,
|
||||||
|
applabel=None,
|
||||||
|
heartbeat=False, # True to also get the Pulse heartbeat message
|
||||||
|
durable=False, # True to keep queue after shutdown
|
||||||
|
serializer='json',
|
||||||
|
broker_timezone='GMT',
|
||||||
|
settings=None
|
||||||
|
):
|
||||||
|
self.target_queue = target_queue
|
||||||
|
self.pulse_target = target
|
||||||
|
if (target_queue == None and target == None) or (target_queue != None and target != None):
|
||||||
|
Log.error("Expecting a queue (for fast digesters) or a target (for slow digesters)")
|
||||||
|
|
||||||
|
Thread.__init__(self, name="Pulse consumer for " + settings.exchange, target=self._worker)
|
||||||
|
self.settings = settings
|
||||||
|
settings.callback = self._got_result
|
||||||
|
settings.user = coalesce(settings.user, settings.username)
|
||||||
|
settings.applabel = coalesce(settings.applable, settings.queue, settings.queue_name)
|
||||||
|
settings.topic = topic
|
||||||
|
|
||||||
|
self.pulse = ModifiedGenericConsumer(settings, connect=True, **settings)
|
||||||
|
self.count = coalesce(start, 0)
|
||||||
|
self.start()
|
||||||
|
|
||||||
|
def _got_result(self, data, message):
|
||||||
|
data = wrap(data)
|
||||||
|
data._meta.count = self.count
|
||||||
|
self.count += 1
|
||||||
|
|
||||||
|
if self.settings.debug:
|
||||||
|
Log.note("{{data}}", data= data)
|
||||||
|
if self.target_queue != None:
|
||||||
|
try:
|
||||||
|
self.target_queue.add(data)
|
||||||
|
message.ack()
|
||||||
|
except Exception, e:
|
||||||
|
e = Except.wrap(e)
|
||||||
|
if not self.target_queue.closed: # EXPECTED TO HAPPEN, THIS THREAD MAY HAVE BEEN AWAY FOR A WHILE
|
||||||
|
raise e
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
self.pulse_target(data)
|
||||||
|
message.ack()
|
||||||
|
except Exception, e:
|
||||||
|
Log.warning("Problem processing pulse (see `data` in structured log)", data=data, cause=e)
|
||||||
|
|
||||||
|
def _worker(self, please_stop):
|
||||||
|
def disconnect():
|
||||||
|
try:
|
||||||
|
self.target_queue.close()
|
||||||
|
Log.note("stop put into queue")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
self.pulse.disconnect()
|
||||||
|
Log.note("pulse listener was given a disconnect()")
|
||||||
|
|
||||||
|
please_stop.on_go(disconnect)
|
||||||
|
|
||||||
|
while not please_stop:
|
||||||
|
try:
|
||||||
|
self.pulse.listen()
|
||||||
|
except Exception, e:
|
||||||
|
if not please_stop:
|
||||||
|
Log.warning("pulse had problem", e)
|
||||||
|
Log.note("pulse listener is done")
|
||||||
|
|
||||||
|
|
||||||
|
def __exit__(self, exc_type, exc_val, exc_tb):
|
||||||
|
Log.note("clean pulse exit")
|
||||||
|
self.please_stop.go()
|
||||||
|
try:
|
||||||
|
self.target_queue.close()
|
||||||
|
Log.note("stop put into queue")
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.pulse.disconnect()
|
||||||
|
except Exception, e:
|
||||||
|
Log.warning("Can not disconnect during pulse exit, ignoring", e)
|
||||||
|
Thread.__exit__(self, exc_type, exc_val, exc_tb)
|
||||||
|
|
||||||
|
|
||||||
|
class Publisher(object):
|
||||||
|
"""
|
||||||
|
Mimic GenericPublisher https://github.com/bhearsum/mozillapulse/blob/master/mozillapulse/publishers.py
|
||||||
|
"""
|
||||||
|
|
||||||
|
@use_settings
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
exchange, # name of the Pulse exchange
|
||||||
|
host='pulse.mozilla.org', # url to connect,
|
||||||
|
port=5671, # tcp port
|
||||||
|
user=None,
|
||||||
|
password=None,
|
||||||
|
vhost="/",
|
||||||
|
start=0, # USED AS STARTING POINT FOR ASSIGNING THE _meta.count ATTRIBUTE
|
||||||
|
ssl=True,
|
||||||
|
applabel=None,
|
||||||
|
heartbeat=False, # True to also get the Pulse heartbeat message
|
||||||
|
durable=False, # True to keep queue after shutdown
|
||||||
|
serializer='json',
|
||||||
|
broker_timezone='GMT',
|
||||||
|
settings=None
|
||||||
|
):
|
||||||
|
self.settings = settings
|
||||||
|
self.connection = None
|
||||||
|
self.count = 0
|
||||||
|
|
||||||
|
def connect(self):
|
||||||
|
if not self.connection:
|
||||||
|
self.connection = Connection(
|
||||||
|
hostname=self.settings.host,
|
||||||
|
port=self.settings.port,
|
||||||
|
userid=self.settings.user,
|
||||||
|
password=self.settings.password,
|
||||||
|
virtual_host=self.settings.vhost,
|
||||||
|
ssl=self.settings.ssl
|
||||||
|
)
|
||||||
|
|
||||||
|
def disconnect(self):
|
||||||
|
if self.connection:
|
||||||
|
self.connection.release()
|
||||||
|
self.connection = None
|
||||||
|
|
||||||
|
def send(self, topic, message):
|
||||||
|
"""Publishes a pulse message to the proper exchange."""
|
||||||
|
|
||||||
|
if not message:
|
||||||
|
Log.error("Expecting a message")
|
||||||
|
|
||||||
|
message._prepare()
|
||||||
|
|
||||||
|
if not self.connection:
|
||||||
|
self.connect()
|
||||||
|
|
||||||
|
producer = Producer(
|
||||||
|
channel=self.connection,
|
||||||
|
exchange=Exchange(self.settings.exchange, type='topic'),
|
||||||
|
routing_key=topic
|
||||||
|
)
|
||||||
|
|
||||||
|
# The message is actually a simple envelope format with a payload and
|
||||||
|
# some metadata.
|
||||||
|
final_data = Dict(
|
||||||
|
payload=message.data,
|
||||||
|
_meta=set_default({
|
||||||
|
'exchange': self.settings.exchange,
|
||||||
|
'routing_key': message.routing_key,
|
||||||
|
'serializer': self.settings.serializer,
|
||||||
|
'sent': time_to_string(datetime.datetime.now(timezone(self.settings.broker_timezone))),
|
||||||
|
'count': self.count
|
||||||
|
}, message.metadata)
|
||||||
|
)
|
||||||
|
|
||||||
|
producer.publish(jsons.scrub(final_data), serializer=self.settings.serializer)
|
||||||
|
self.count += 1
|
||||||
|
|
||||||
|
|
||||||
|
class ModifiedGenericConsumer(GenericConsumer):
|
||||||
|
def _drain_events_loop(self):
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
self.connection.drain_events(timeout=self.timeout)
|
||||||
|
except socket_timeout, e:
|
||||||
|
Log.warning("timeout! Restarting pulse consumer.", cause=e)
|
||||||
|
try:
|
||||||
|
self.disconnect()
|
||||||
|
except Exception, f:
|
||||||
|
Log.warning("Problem with disconnect()", cause=f)
|
||||||
|
break
|
Загрузка…
Ссылка в новой задаче