Get inbound email and route it to py3 code for processing (#1561)

* wrote code

* Simplify and add tests

* Revert unneeded file

* Revert unneeded file

* Addressed review comments

* Made code more robust during unit testing

* Mock the needed response code
This commit is contained in:
Jason Robbins 2021-09-28 14:57:37 -07:00 коммит произвёл GitHub
Родитель 8e4ed4bc00
Коммит a450fd02a0
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
11 изменённых файлов: 382 добавлений и 14 удалений

Просмотреть файл

@ -15,16 +15,20 @@ automatic_scaling:
builtins:
- remote_api: on
# These handlers are in this file because they require GAE py2.
# TODO(jrobbins): phase out all of this when we redo email.
handlers:
# Note: This handler must remain in this file because it requires GAE py2.
- url: /tasks/outbound-email
script: internals.sendemail.app
# Header checks prevent raw access to this handler. Tasks have headers.
# Note: This handler must remain in this file because it requires GAE py2.
- url: /_ah/mail/.+
script: internals.sendemail.app
login: admin # Prevents raw access to this handler.
- url: /_ah/bounce
# TODO(jrobbins): phase out this handler when we redo email.
script: internals.sendemail.app
login: admin # Prevents raw access to this handler.

Просмотреть файл

@ -8,6 +8,9 @@ dispatch:
# - url: "*/tasks/outbound-email"
# service: app-py2
- url: "*/tasks/detect-intent"
service: notifier
- url: "*/tasks/email-subscribers"
service: notifier

Просмотреть файл

@ -373,10 +373,19 @@ class FlaskHandler(BaseHandler):
def require_task_header(self):
"""Abort if this is not a Google Cloud Tasks request."""
if settings.UNIT_TEST_MODE:
if settings.UNIT_TEST_MODE or settings.DEV_MODE:
return
if 'X-AppEngine-QueueName' not in self.request.headers:
self.abort(403, msg='Lacking X-AppEngine-QueueName header')
if 'X-AppEngine-QueueName' in self.request.headers:
return
if self.request.headers.get('X-Appengine-Inbound-Appid') == settings.APP_ID:
return
logging.info('headers lack needed header:')
for k, v in self.request.headers:
logging.info('%r: %r', k, v)
self.abort(403, msg=('Lacking X-AppEngine-QueueName or '
'incorrect X-Appengine-Inbound-Appid headers'))
def split_input(self, field_name, delim='\\r?\\n'):
"""Split the input lines, strip whitespace, and skip blank lines."""

Просмотреть файл

@ -594,6 +594,13 @@ class FlaskHandlerTests(testing_config.CustomTestCase):
with test_app.test_request_context('/test', headers=headers):
self.handler.require_task_header()
@mock.patch('settings.UNIT_TEST_MODE', False)
def test_require_task_header__same_app(self):
"""If the incoming request is from our own app, we allow it."""
headers = {'X-Appengine-Inbound-Appid': 'dev'}
with test_app.test_request_context('/test', headers=headers):
self.handler.require_task_header()
@mock.patch('settings.UNIT_TEST_MODE', False)
def test_require_task_header__missing(self):
"""If the incoming request is not from GCT, abort."""

Просмотреть файл

@ -0,0 +1,53 @@
# -*- coding: utf-8 -*-
# Copyright 2021 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License")
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import division
from __future__ import print_function
import logging
import settings
from framework import basehandlers
class IntentEmailHandler(basehandlers.FlaskHandler):
"""This task handles an inbound email to detect intent threads."""
IS_INTERNAL_HANDLER = True
def process_post_data(self):
self.require_task_header()
from_addr = self.get_param('from_addr')
subject = self.get_param('subject')
in_reply_to = self.get_param('in_reply_to', required=False)
body = self.get_param('body')
logging.info('In IntentEmailHandler:\n'
'From addr: %r\n'
'Subject: %r\n'
'In reply to: %r\n'
'Body: %r\n',
from_addr, subject, in_reply_to, body)
# TODO(jrobbins): Write code to parse out:
# 1. The type of intent.
# 2. The feature ID number
# 3. Any clear "LGTM"s
# And retrieve model objects for the feature and the sending user.
# Set the thread URL on the feature.
# Set an LGTM if appropriate.
return {'message': 'Done'}

Просмотреть файл

@ -0,0 +1,55 @@
# Copyright 2021 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License")
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import testing_config # Must be imported first
import flask
import werkzeug
from internals import models
from internals import detect_intent
test_app = flask.Flask(__name__)
class IntentEmailHandlerTest(testing_config.CustomTestCase):
def setUp(self):
self.feature_1 = models.Feature(
name='feature one', summary='detailed sum', category=1, visibility=1,
standardization=1, web_dev_views=1, impl_status_chrome=1,
intent_stage=models.INTENT_IMPLEMENT)
self.feature_1.put()
self.feature_id = self.feature_1.key.integer_id()
self.request_path = '/tasks/detect-intent'
self.json_data = {
'from_addr': 'user@example.com',
'subject': 'Intent to Ship: Featurename',
'body': 'Please review',
}
self.handler = detect_intent.IntentEmailHandler()
def tearDown(self):
self.feature_1.key.delete()
def test_process_post_data__normal(self):
"""When everything is perfect, we record the intent thread."""
with test_app.test_request_context(
self.request_path, json=self.json_data):
actual = self.handler.process_post_data()
# TODO(jrobbins): Add checks after detect_intent is written.
self.assertEqual(actual, {'message': 'Done'})

Просмотреть файл

@ -13,12 +13,13 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import flask
import json
import logging
import re
import urllib
import requests
import rfc822
from google.appengine.api import mail
from google.appengine.ext.webapp.mail_handlers import BounceNotification
@ -27,10 +28,13 @@ import settings
app = flask.Flask(__name__)
# Parsing very large messages could cause out-of-memory errors.
MAX_BODY_SIZE = 100 * 1024
def require_task_header():
"""Abort if this is not a Google Cloud Tasks request."""
if settings.UNIT_TEST_MODE:
if settings.UNIT_TEST_MODE or settings.DEV_MODE:
return
if 'X-AppEngine-QueueName' not in flask.request.headers:
flask.abort(403, msg='Lacking X-AppEngine-QueueName header')
@ -118,3 +122,99 @@ def receive(bounce_message):
message.check_initialized()
if settings.SEND_EMAIL:
message.send()
def _extract_addrs(header_value):
"""Given a message header value, return email address found there."""
friendly_addr_pairs = list(rfc822.AddressList(header_value))
return [addr for _friendly, addr in friendly_addr_pairs]
def call_py3_task_handler(handler_path, task_dict):
"""Request that our py3 code handle the rest of the work."""
handler_host = 'http://localhost:8080'
if settings.APP_ID == 'cr-status':
handler_host = 'https://chromestatus.com'
if settings.APP_ID == 'cr-status-staging':
handler_host = 'https://cr-status-staging.appspot.com'
handler_url = handler_host + handler_path
request_body = json.dumps(task_dict).encode()
logging.info('task_dict is %r', task_dict)
# AppEngine automatically sets header X-Appengine-Inbound-Appid,
# and that header is stripped from external requests. So,
# require_task_header() can check for it to authenticate.
handler_response = requests.request(
'POST', handler_url, data=request_body, allow_redirects=False)
logging.info('request_response is %r', handler_response)
return handler_response
def get_incoming_message():
"""Get an email message object from the request data."""
data = flask.request.get_data(as_text=True)
msg = mail.InboundEmailMessage(data).original
return msg
@app.route('/_ah/mail/<string:addr>', methods=['POST'])
def handle_incoming_mail(addr=None):
"""Handle an incoming email by making a task to examine it.
This code checks some basic properties of the incoming message
to make sure that it is worth examining. Then it puts all the
relevent fields into a dict and makes a new Cloud Task which
is futher processed in python 3 code.
"""
logging.info('Request Headers: %r', flask.request.headers)
logging.info('\n\n\nPOST for InboundEmail and addr is %r', addr)
if addr != settings.INBOUND_EMAIL_ADDR:
logging.info('Message not sent directly to our address')
return {'message': 'Wrong address'}
if flask.request.content_length > MAX_BODY_SIZE:
logging.info('Message too big, ignoring')
return {'message': 'Too big'}
msg = get_incoming_message()
precedence = msg.get('precedence', '')
if precedence.lower() in ['bulk', 'junk']:
logging.info('Precedence: %r indicates an autoresponder', precedence)
return {'message': 'Wrong precedence'}
from_addrs = _extract_addrs(msg.get('from', ''))
if from_addrs:
from_addr = from_addrs[0]
else:
logging.info('could not parse from addr')
return {'message': 'Missing From'}
in_reply_to = msg.get('in-reply-to', '')
body = u''
for part in msg.walk():
# We only process plain text emails.
if part.get_content_type() == 'text/plain':
body = part.get_payload(decode=True)
if not isinstance(body, unicode):
body = body.decode('utf-8')
break # Only consider the first text part.
to_addr = urllib.unquote(addr)
subject = msg.get('subject', '')
task_dict = {
'to_addr': to_addr,
'from_addr': from_addr,
'subject': subject,
'in_reply_to': in_reply_to,
'body': body,
}
response = call_py3_task_handler('/tasks/detect-intent', task_dict)
if response.status_code and response.status_code != 200:
logging.warning('Handoff to py3 failed.')
flask.abort(400)
return {'message': 'Done'}

Просмотреть файл

@ -1,6 +1,3 @@
# Copyright 2021 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License")
@ -15,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import email
import collections
import json
import testing_config_py2 # Must be imported before the module under test.
@ -179,3 +177,132 @@ class BouncedEmailHandlerTest(unittest.TestCase):
mock_message = mock_emailmessage_constructor.return_value
mock_message.check_initialized.assert_called_once_with()
mock_message.send.assert_called()
class FunctionTest(unittest.TestCase):
def test_extract_addrs(self):
"""We can parse email From: lines."""
header_val = ''
self.assertEqual(
[], sendemail._extract_addrs(header_val))
header_val = 'J. Robbins <a@b.com>, c@d.com,\n Nick "Name" Dude <e@f.com>'
self.assertEqual(
['a@b.com', 'c@d.com', 'e@f.com'],
sendemail._extract_addrs(header_val))
header_val = ('hot: J. O\'Robbins <a@b.com>; '
'cool: "friendly" <e.g-h@i-j.k-L.com>')
self.assertEqual(
['a@b.com', 'e.g-h@i-j.k-L.com'],
sendemail._extract_addrs(header_val))
@mock.patch('requests.request')
def test_call_py3_task_handler(self, mock_request):
"""Our py2 code can make a request to our py3 code."""
mock_request.return_value = 'mock response'
actual = sendemail.call_py3_task_handler('/path', {'a': 1})
self.assertEqual('mock response', actual)
mock_request.assert_called_once_with(
'POST', 'http://localhost:8080/path',
data=b'{"a": 1}', allow_redirects=False)
def MakeMessage(header_list, body):
"""Convenience function to make an email.message.Message."""
msg = email.message.Message()
for key, value in header_list:
msg[key] = value
msg.set_payload(body)
return msg
HEADER_LINES = [
('From', 'user@example.com'),
('To', settings.INBOUND_EMAIL_ADDR),
('Cc', 'other@chromium.org'),
('Subject', 'Intent to Ship: Featurename'),
('In-Reply-To', 'fake message id'),
]
class InboundEmailHandlerTest(unittest.TestCase):
def test_handle_incoming_mail__wrong_to_addr(self):
"""Reject the email if the app was not on the To: line."""
with sendemail.app.test_request_context('/_ah/mail/other@example.com'):
actual = sendemail.handle_incoming_mail('other@example.com')
self.assertEqual(
{'message': 'Wrong address'},
actual)
def test_handle_incoming_mail__too_big(self):
"""Reject the incoming email if it is huge."""
data = b'x' * sendemail.MAX_BODY_SIZE + b' is too big'
with sendemail.app.test_request_context(
'/_ah/mail/%s' % settings.INBOUND_EMAIL_ADDR, data=data):
actual = sendemail.handle_incoming_mail(settings.INBOUND_EMAIL_ADDR)
self.assertEqual(
{'message': 'Too big'},
actual)
@mock.patch('internals.sendemail.get_incoming_message')
def test_handle_incoming_mail__junk_mail(self, mock_get_incoming_message):
"""Reject the incoming email if it has the wrong precedence header."""
for precedence in ['Bulk', 'Junk']:
msg = MakeMessage(
HEADER_LINES + [('Precedence', precedence)],
'I am on vacation!')
mock_get_incoming_message.return_value = msg
with sendemail.app.test_request_context(
'/_ah/mail/%s' % settings.INBOUND_EMAIL_ADDR):
actual = sendemail.handle_incoming_mail(settings.INBOUND_EMAIL_ADDR)
self.assertEqual(
{'message': 'Wrong precedence'},
actual)
@mock.patch('internals.sendemail.get_incoming_message')
def test_handle_incoming_mail__unclear_from(self, mock_get_incoming_message):
"""Reject the incoming email if it we cannot parse the From: line."""
msg = MakeMessage([], 'Guess who this is')
mock_get_incoming_message.return_value = msg
with sendemail.app.test_request_context(
'/_ah/mail/%s' % settings.INBOUND_EMAIL_ADDR):
actual = sendemail.handle_incoming_mail(settings.INBOUND_EMAIL_ADDR)
self.assertEqual(
{'message': 'Missing From'},
actual)
@mock.patch('internals.sendemail.call_py3_task_handler')
@mock.patch('internals.sendemail.get_incoming_message')
def test_handle_incoming_mail__normal(
self, mock_get_incoming_message, mock_call_py3):
"""Reject the incoming email if it we cannot parse the From: line."""
msg = MakeMessage(HEADER_LINES, 'Please review')
mock_get_incoming_message.return_value = msg
mock_call_py3.return_value = testing_config_py2.Blank(status_code=200)
with sendemail.app.test_request_context(
'/_ah/mail/%s' % settings.INBOUND_EMAIL_ADDR):
actual = sendemail.handle_incoming_mail(settings.INBOUND_EMAIL_ADDR)
self.assertEqual({'message': 'Done'}, actual)
expected_task_dict = {
'to_addr': settings.INBOUND_EMAIL_ADDR,
'from_addr': 'user@example.com',
'subject': 'Intent to Ship: Featurename',
'in_reply_to': 'fake message id',
'body': 'Please review',
}
mock_call_py3.assert_called_once_with(
'/tasks/detect-intent', expected_task_dict)

Просмотреть файл

@ -29,6 +29,7 @@ from api import stars_api
from api import token_refresh_api
from framework import basehandlers
from framework import csp
from internals import detect_intent
from internals import fetchmetrics
from internals import notifier
from pages import blink_handler
@ -160,6 +161,8 @@ internals_routes = [
('/cron/update_blink_components', fetchmetrics.BlinkComponentHandler),
('/tasks/email-subscribers', notifier.FeatureChangeHandler),
('/tasks/detect-intent', detect_intent.IntentEmailHandler),
]

Просмотреть файл

@ -2,7 +2,11 @@ runtime: python39
service: notifier
handlers:
- url: /tasks/.*
- url: /tasks/detect-intent
script: auto
# Header checks prevent raw access to this handler. Tasks have headers.
- url: /tasks/email-subscribers
script: auto
# Header checks prevent raw access to this handler. Tasks have headers.

Просмотреть файл

@ -69,6 +69,8 @@ GOOGLE_SIGN_IN_CLIENT_ID = (
# page that requires being signed in.
LOGIN_PAGE_URL = '/features?loginStatus=False'
INBOUND_EMAIL_ADDR = 'chromestatus@cr-status-staging.appspotmail.com'
if UNIT_TEST_MODE:
APP_TITLE = 'Local testing'
@ -87,6 +89,7 @@ elif APP_ID == 'cr-status':
GOOGLE_SIGN_IN_CLIENT_ID = (
'999517574127-7ueh2a17bv1ave9thlgtap19pt5qjp4g.'
'apps.googleusercontent.com')
INBOUND_EMAIL_ADDR = 'chromestatus@cr-status.appspotmail.com'
elif APP_ID == 'cr-status-staging':
STAGING = True
SEND_EMAIL = True