Use exception logging to capture failures in app insights (#14)

* Use exception logging to capture failures in app insights

* Format

* move global exception middleware to pccommon

* format

* fix return type of middleware

* remove unused

* Update changelog

* wip -- log encountered exceptions

* include extra metadata with exception

* remove metrics reporting

* configure liveness paths

* remove erroneous middlewares

* format

* remove more erroneous middleware

* Remove unused metrics module

* remove unused

* Remove pctiler unused

* Remove more unused

* Add special cd exception for this branch

* run on pull requests instead 🤞🏻

* correct tiler liveness env variable name

* bail on LIVENESS_PATH checks

* Format (_really_ thought I did that already)

* Remove pr trigger from cd

* Remove unused import

Co-authored-by: Rob Emanuele <rdemanuele@gmail.com>
Co-authored-by: Matt McFarland <mmcfarland@microsoft.com>
This commit is contained in:
James Santucci 2021-12-15 11:30:00 -07:00 коммит произвёл GitHub
Родитель caa2f70370
Коммит 837df9e6fb
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
10 изменённых файлов: 61 добавлений и 172 удалений

Просмотреть файл

@ -8,5 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed ### Changed
- Organized and updated documentation markdown docs [#11](https://github.com/microsoft/planetary-computer-apis/pull/11) - Organized and updated documentation markdown docs [#11](https://github.com/microsoft/planetary-computer-apis/pull/11)
### Added
- Errors are logged using exceptions and including request metadata [#14](https://github.com/microsoft/planetary-computer-apis/pull/14)
### Fixed ### Fixed
- Updated search limit constraints to avoid 500s [#15](https://github.com/microsoft/planetary-computer-apis/pull/15) - Updated search limit constraints to avoid 500s [#15](https://github.com/microsoft/planetary-computer-apis/pull/15)

Просмотреть файл

@ -86,11 +86,11 @@ spec:
livenessProbe: livenessProbe:
httpGet: httpGet:
path: /_mgmt/ping path: "/_mgmt/ping"
port: http port: http
readinessProbe: readinessProbe:
httpGet: httpGet:
path: /_mgmt/ping path: "/_mgmt/ping"
port: http port: http
resources: resources:
{{- toYaml .Values.stac.deploy.resources | nindent 12 }} {{- toYaml .Values.stac.deploy.resources | nindent 12 }}

Просмотреть файл

@ -72,32 +72,6 @@ def init_logging(service_name: str) -> None:
logger.info("Not adding Azure log handler since no instrumentation key defined") logger.info("Not adding Azure log handler since no instrumentation key defined")
def log_collection_request(
domain: str,
logger: logging.Logger,
collection_id: Optional[str],
item_id: Optional[str],
request: Request,
) -> None:
logger.info(
f"{domain} request for collection {collection_id}",
extra={
"custom_dimensions": dict(
[
(k, v)
for k, v in {
"collection_id": collection_id,
"item_id": item_id,
"url": f"{request.url}",
"service_name": domain,
}.items()
if v is not None
]
)
},
)
def request_to_path(request: Request) -> str: def request_to_path(request: Request) -> str:
parsed_url = urlparse(f"{request.url}") parsed_url = urlparse(f"{request.url}")
return parsed_url.path return parsed_url.path

Просмотреть файл

@ -1,22 +0,0 @@
from opencensus.ext.azure import metrics_exporter
from opencensus.stats import stats as stats_module
from pccommon.config import CommonConfig
_config = CommonConfig.from_environment()
_instrumentation_key = _config.app_insights_instrumentation_key
if _instrumentation_key:
_exporter = metrics_exporter.new_metrics_exporter(
connection_string=f"InstrumentationKey={_instrumentation_key}"
)
stats = stats_module.stats
view_manager = stats.view_manager
view_manager.register_exporter(_exporter)
stats_recorder = stats.stats_recorder
else:
view_manager = None
stats_recorder = None

32
pccommon/middleware.py Normal file
Просмотреть файл

@ -0,0 +1,32 @@
import logging
from typing import Awaitable, Callable
from fastapi import HTTPException, Request, Response
from pccommon.logging import request_to_path
from pccommon.tracing import HTTP_METHOD, HTTP_PATH, HTTP_URL
logger = logging.getLogger(__name__)
async def handle_exceptions(
request: Request, call_next: Callable[[Request], Awaitable[Response]]
) -> Response:
try:
return await call_next(request)
except HTTPException:
raise
except Exception as e:
logger.exception(
"Exception when handling request",
extra={
"custom_dimensions": {
"stackTrace": f"{e}",
HTTP_URL: str(request.url),
HTTP_METHOD: str(request.method),
HTTP_PATH: request_to_path(request),
"service": "tiler",
}
},
)
raise

Просмотреть файл

@ -1,3 +1,5 @@
import os
from opencensus.ext.azure.trace_exporter import AzureExporter from opencensus.ext.azure.trace_exporter import AzureExporter
from opencensus.trace.attributes_helper import COMMON_ATTRIBUTES from opencensus.trace.attributes_helper import COMMON_ATTRIBUTES

Просмотреть файл

@ -15,12 +15,13 @@ from starlette.middleware.cors import CORSMiddleware
from starlette.responses import PlainTextResponse from starlette.responses import PlainTextResponse
from pccommon.logging import init_logging from pccommon.logging import init_logging
from pccommon.middleware import handle_exceptions
from pccommon.openapi import fixup_schema from pccommon.openapi import fixup_schema
from pcstac.api import PCStacApi from pcstac.api import PCStacApi
from pcstac.client import PCClient from pcstac.client import PCClient
from pcstac.config import API_DESCRIPTION, API_TITLE, API_VERSION, get_settings from pcstac.config import API_DESCRIPTION, API_TITLE, API_VERSION, get_settings
from pcstac.errors import PC_DEFAULT_STATUS_CODES from pcstac.errors import PC_DEFAULT_STATUS_CODES
from pcstac.middleware import count_collection_requests, trace_request from pcstac.middleware import trace_request
from pcstac.search import PCItemCollectionUri, PCSearch, PCSearchGetRequest from pcstac.search import PCItemCollectionUri, PCSearch, PCSearchGetRequest
DEBUG: bool = os.getenv("DEBUG") == "TRUE" or False DEBUG: bool = os.getenv("DEBUG") == "TRUE" or False
@ -90,10 +91,10 @@ app.add_middleware(
@app.middleware("http") @app.middleware("http")
async def _count_collection_requests( async def _handle_exceptions(
request: Request, call_next: Callable[[Request], Awaitable[Response]] request: Request, call_next: Callable[[Request], Awaitable[Response]]
) -> Response: ) -> Response:
return await count_collection_requests(request, call_next) return await handle_exceptions(request, call_next)
@app.middleware("http") @app.middleware("http")

Просмотреть файл

@ -3,16 +3,11 @@ import re
from typing import Awaitable, Callable, Optional, Tuple from typing import Awaitable, Callable, Optional, Tuple
from fastapi import Request, Response from fastapi import Request, Response
from opencensus.stats import aggregation as aggregation_module
from opencensus.stats import measure as measure_module
from opencensus.stats import view as view_module
from opencensus.tags import tag_map as tag_map_module
from opencensus.trace.samplers import ProbabilitySampler from opencensus.trace.samplers import ProbabilitySampler
from opencensus.trace.span import SpanKind from opencensus.trace.span import SpanKind
from opencensus.trace.tracer import Tracer from opencensus.trace.tracer import Tracer
from pccommon.logging import log_collection_request, request_to_path from pccommon.logging import request_to_path
from pccommon.metrics import stats_recorder, view_manager
from pccommon.tracing import ( from pccommon.tracing import (
HTTP_METHOD, HTTP_METHOD,
HTTP_PATH, HTTP_PATH,
@ -23,25 +18,7 @@ from pccommon.tracing import (
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
_log_metrics = exporter is not None
browse_request_measure = measure_module.MeasureInt(
"browse-request-count",
"Browsing requests under a given collection",
"browse-request",
)
browse_request_view = view_module.View(
"Browse request view",
browse_request_measure.description,
["collection", "item", "requestPath"],
browse_request_measure,
aggregation_module.CountAggregation(),
)
_log_metrics = view_manager and stats_recorder and exporter
if _log_metrics:
view_manager.register_view(browse_request_view)
mmap = stats_recorder.new_measurement_map()
collection_id_re = re.compile( collection_id_re = re.compile(
r".*/collections/?(?P<collection_id>[a-zA-Z0-9\-\%]+)?(/items/(?P<item_id>.*))?.*" # noqa r".*/collections/?(?P<collection_id>[a-zA-Z0-9\-\%]+)?(/items/(?P<item_id>.*))?.*" # noqa
@ -63,7 +40,8 @@ def _collection_item_from_request(
async def trace_request( async def trace_request(
request: Request, call_next: Callable[[Request], Awaitable[Response]] request: Request, call_next: Callable[[Request], Awaitable[Response]]
) -> Response: ) -> Response:
if _log_metrics: request_path = request_to_path(request).strip("/")
if _log_metrics and request.method.lower() != "head":
tracer = Tracer( tracer = Tracer(
exporter=exporter, exporter=exporter,
sampler=ProbabilitySampler(1.0), sampler=ProbabilitySampler(1.0),
@ -86,7 +64,7 @@ async def trace_request(
attribute_key=HTTP_URL, attribute_value=f"{request.url}" attribute_key=HTTP_URL, attribute_value=f"{request.url}"
) )
tracer.add_attribute_to_current_span( tracer.add_attribute_to_current_span(
attribute_key=HTTP_PATH, attribute_value=request_to_path(request) attribute_key=HTTP_PATH, attribute_value=request_path
) )
tracer.add_attribute_to_current_span( tracer.add_attribute_to_current_span(
attribute_key=HTTP_METHOD, attribute_value=str(request.method) attribute_key=HTTP_METHOD, attribute_value=str(request.method)
@ -109,20 +87,3 @@ async def trace_request(
return response return response
else: else:
return await call_next(request) return await call_next(request)
async def count_collection_requests(
request: Request, call_next: Callable[[Request], Awaitable[Response]]
) -> Response:
if _log_metrics:
tmap = tag_map_module.TagMap()
(collection_id, item_id) = _collection_item_from_request(request)
if collection_id:
tmap.insert("collection", collection_id)
if item_id:
tmap.insert("item", item_id)
log_collection_request("stac", logger, collection_id, item_id, request)
tmap.insert("requestPath", request_to_path(request))
mmap.measure_int_put(browse_request_measure, 1)
mmap.record(tmap)
return await call_next(request)

Просмотреть файл

@ -18,11 +18,12 @@ from titiler.application.middleware import (
from titiler.core.errors import DEFAULT_STATUS_CODES, add_exception_handlers from titiler.core.errors import DEFAULT_STATUS_CODES, add_exception_handlers
from pccommon.logging import init_logging from pccommon.logging import init_logging
from pccommon.middleware import handle_exceptions
from pccommon.openapi import fixup_schema from pccommon.openapi import fixup_schema
from pctiler.config import get_settings from pctiler.config import get_settings
from pctiler.db import close_db_connection, connect_to_db from pctiler.db import close_db_connection, connect_to_db
from pctiler.endpoints import item, legend, pg_mosaic from pctiler.endpoints import item, legend, pg_mosaic
from pctiler.middleware import count_data_requests, trace_request from pctiler.middleware import trace_request
# Initialize logging # Initialize logging
init_logging("tiler") init_logging("tiler")
@ -58,13 +59,6 @@ app.include_router(
) )
@app.middleware("http")
async def _count_data_requests(
request: Request, call_next: Callable[[Request], Awaitable[Response]]
) -> Response:
return await count_data_requests(request, call_next)
@app.middleware("http") @app.middleware("http")
async def _trace_requests( async def _trace_requests(
request: Request, call_next: Callable[[Request], Awaitable[Response]] request: Request, call_next: Callable[[Request], Awaitable[Response]]
@ -72,6 +66,13 @@ async def _trace_requests(
return await trace_request(request, call_next) return await trace_request(request, call_next)
@app.middleware("http")
async def _handle_exceptions(
request: Request, call_next: Callable[[Request], Awaitable[Response]]
) -> Response:
return await handle_exceptions(request, call_next)
add_exception_handlers( add_exception_handlers(
app, {**DEFAULT_STATUS_CODES, NoAssetFoundError: status.HTTP_404_NOT_FOUND} app, {**DEFAULT_STATUS_CODES, NoAssetFoundError: status.HTTP_404_NOT_FOUND}
) # type: ignore ) # type: ignore

Просмотреть файл

@ -1,18 +1,12 @@
import logging import logging
from typing import Awaitable, Callable from typing import Awaitable, Callable
from urllib.parse import urlparse
from fastapi import Request, Response from fastapi import Request, Response
from opencensus.stats import aggregation as aggregation_module
from opencensus.stats import measure as measure_module
from opencensus.stats import view as view_module
from opencensus.tags import tag_map as tag_map_module
from opencensus.trace.samplers import ProbabilitySampler from opencensus.trace.samplers import ProbabilitySampler
from opencensus.trace.span import SpanKind from opencensus.trace.span import SpanKind
from opencensus.trace.tracer import Tracer from opencensus.trace.tracer import Tracer
from pccommon.logging import log_collection_request, request_to_path from pccommon.logging import request_to_path
from pccommon.metrics import stats_recorder, view_manager
from pccommon.tracing import ( from pccommon.tracing import (
HTTP_METHOD, HTTP_METHOD,
HTTP_PATH, HTTP_PATH,
@ -23,51 +17,14 @@ from pccommon.tracing import (
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# This list was extracted from every `get` query param in the _log_metrics = exporter is not None
# OpenAPI specification at
# http https://planetarycomputer.microsoft.com/api/data/v1/openapi.json
# If a string doesn't appear in the list of attributes in a view, we can't
# send it as a tag to application insights.
# The Azure exporter for python opencensus doesn't seem to want to send more
# than 10 tags, so this list is a bespoke selection from the full list of
# query params, also with the requestPath.
all_query_params = [
"assets",
"collection",
"colormap_name",
"expression",
"format",
"height",
"item",
"items",
"width",
"requestPath",
]
data_request_count_measure = measure_module.MeasureInt(
"data-request-count",
"Requests for data (info, bounds, maps, etc.)",
"data-request-count",
)
data_request_count_view = view_module.View(
"Data request count view",
data_request_count_measure.description,
all_query_params,
data_request_count_measure,
aggregation_module.CountAggregation(),
)
_log_metrics = view_manager and stats_recorder and exporter
if _log_metrics:
view_manager.register_view(data_request_count_view)
mmap = stats_recorder.new_measurement_map()
async def trace_request( async def trace_request(
request: Request, call_next: Callable[[Request], Awaitable[Response]] request: Request, call_next: Callable[[Request], Awaitable[Response]]
) -> Response: ) -> Response:
if _log_metrics: request_path = request_to_path(request).strip("/")
if _log_metrics and request.method.lower() != "head":
tracer = Tracer( tracer = Tracer(
exporter=exporter, exporter=exporter,
sampler=ProbabilitySampler(1.0), sampler=ProbabilitySampler(1.0),
@ -85,7 +42,7 @@ async def trace_request(
response = await call_next(request) response = await call_next(request)
tracer.add_attribute_to_current_span( tracer.add_attribute_to_current_span(
attribute_key=HTTP_PATH, attribute_value=request_to_path(request) attribute_key=HTTP_PATH, attribute_value=request_path
) )
tracer.add_attribute_to_current_span( tracer.add_attribute_to_current_span(
attribute_key=HTTP_STATUS_CODE, attribute_value=response.status_code attribute_key=HTTP_STATUS_CODE, attribute_value=response.status_code
@ -114,22 +71,3 @@ async def trace_request(
return response return response
else: else:
return await call_next(request) return await call_next(request)
async def count_data_requests(request: Request, call_next): # type: ignore
if _log_metrics:
tmap = tag_map_module.TagMap()
for k, v in request.query_params.items():
tmap.insert(k, v)
parsed_url = urlparse(f"{request.url}")
tmap.insert("requestPath", parsed_url.path)
mmap.measure_int_put(data_request_count_measure, 1)
mmap.record(tmap)
log_collection_request(
"tiler",
logger,
request.query_params.get("collection"),
request.query_params.get("item"),
request,
)
return await call_next(request)