Use exception logging to capture failures in app insights (#14)

* Use exception logging to capture failures in app insights

* Format

* move global exception middleware to pccommon

* format

* fix return type of middleware

* remove unused

* Update changelog

* wip -- log encountered exceptions

* include extra metadata with exception

* remove metrics reporting

* configure liveness paths

* remove erroneous middlewares

* format

* remove more erroneous middleware

* Remove unused metrics module

* remove unused

* Remove pctiler unused

* Remove more unused

* Add special cd exception for this branch

* run on pull requests instead 🤞🏻

* correct tiler liveness env variable name

* bail on LIVENESS_PATH checks

* Format (_really_ thought I did that already)

* Remove pr trigger from cd

* Remove unused import

Co-authored-by: Rob Emanuele <rdemanuele@gmail.com>
Co-authored-by: Matt McFarland <mmcfarland@microsoft.com>
This commit is contained in:
James Santucci 2021-12-15 11:30:00 -07:00 коммит произвёл GitHub
Родитель caa2f70370
Коммит 837df9e6fb
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
10 изменённых файлов: 61 добавлений и 172 удалений

Просмотреть файл

@ -8,5 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Changed
- Organized and updated documentation markdown docs [#11](https://github.com/microsoft/planetary-computer-apis/pull/11)
### Added
- Errors are logged using exceptions and including request metadata [#14](https://github.com/microsoft/planetary-computer-apis/pull/14)
### Fixed
- Updated search limit constraints to avoid 500s [#15](https://github.com/microsoft/planetary-computer-apis/pull/15)

Просмотреть файл

@ -86,11 +86,11 @@ spec:
livenessProbe:
httpGet:
path: /_mgmt/ping
path: "/_mgmt/ping"
port: http
readinessProbe:
httpGet:
path: /_mgmt/ping
path: "/_mgmt/ping"
port: http
resources:
{{- toYaml .Values.stac.deploy.resources | nindent 12 }}

Просмотреть файл

@ -72,32 +72,6 @@ def init_logging(service_name: str) -> None:
logger.info("Not adding Azure log handler since no instrumentation key defined")
def log_collection_request(
domain: str,
logger: logging.Logger,
collection_id: Optional[str],
item_id: Optional[str],
request: Request,
) -> None:
logger.info(
f"{domain} request for collection {collection_id}",
extra={
"custom_dimensions": dict(
[
(k, v)
for k, v in {
"collection_id": collection_id,
"item_id": item_id,
"url": f"{request.url}",
"service_name": domain,
}.items()
if v is not None
]
)
},
)
def request_to_path(request: Request) -> str:
parsed_url = urlparse(f"{request.url}")
return parsed_url.path

Просмотреть файл

@ -1,22 +0,0 @@
from opencensus.ext.azure import metrics_exporter
from opencensus.stats import stats as stats_module
from pccommon.config import CommonConfig
_config = CommonConfig.from_environment()
_instrumentation_key = _config.app_insights_instrumentation_key
if _instrumentation_key:
_exporter = metrics_exporter.new_metrics_exporter(
connection_string=f"InstrumentationKey={_instrumentation_key}"
)
stats = stats_module.stats
view_manager = stats.view_manager
view_manager.register_exporter(_exporter)
stats_recorder = stats.stats_recorder
else:
view_manager = None
stats_recorder = None

32
pccommon/middleware.py Normal file
Просмотреть файл

@ -0,0 +1,32 @@
import logging
from typing import Awaitable, Callable
from fastapi import HTTPException, Request, Response
from pccommon.logging import request_to_path
from pccommon.tracing import HTTP_METHOD, HTTP_PATH, HTTP_URL
logger = logging.getLogger(__name__)
async def handle_exceptions(
request: Request, call_next: Callable[[Request], Awaitable[Response]]
) -> Response:
try:
return await call_next(request)
except HTTPException:
raise
except Exception as e:
logger.exception(
"Exception when handling request",
extra={
"custom_dimensions": {
"stackTrace": f"{e}",
HTTP_URL: str(request.url),
HTTP_METHOD: str(request.method),
HTTP_PATH: request_to_path(request),
"service": "tiler",
}
},
)
raise

Просмотреть файл

@ -1,3 +1,5 @@
import os
from opencensus.ext.azure.trace_exporter import AzureExporter
from opencensus.trace.attributes_helper import COMMON_ATTRIBUTES

Просмотреть файл

@ -15,12 +15,13 @@ from starlette.middleware.cors import CORSMiddleware
from starlette.responses import PlainTextResponse
from pccommon.logging import init_logging
from pccommon.middleware import handle_exceptions
from pccommon.openapi import fixup_schema
from pcstac.api import PCStacApi
from pcstac.client import PCClient
from pcstac.config import API_DESCRIPTION, API_TITLE, API_VERSION, get_settings
from pcstac.errors import PC_DEFAULT_STATUS_CODES
from pcstac.middleware import count_collection_requests, trace_request
from pcstac.middleware import trace_request
from pcstac.search import PCItemCollectionUri, PCSearch, PCSearchGetRequest
DEBUG: bool = os.getenv("DEBUG") == "TRUE" or False
@ -90,10 +91,10 @@ app.add_middleware(
@app.middleware("http")
async def _count_collection_requests(
async def _handle_exceptions(
request: Request, call_next: Callable[[Request], Awaitable[Response]]
) -> Response:
return await count_collection_requests(request, call_next)
return await handle_exceptions(request, call_next)
@app.middleware("http")

Просмотреть файл

@ -3,16 +3,11 @@ import re
from typing import Awaitable, Callable, Optional, Tuple
from fastapi import Request, Response
from opencensus.stats import aggregation as aggregation_module
from opencensus.stats import measure as measure_module
from opencensus.stats import view as view_module
from opencensus.tags import tag_map as tag_map_module
from opencensus.trace.samplers import ProbabilitySampler
from opencensus.trace.span import SpanKind
from opencensus.trace.tracer import Tracer
from pccommon.logging import log_collection_request, request_to_path
from pccommon.metrics import stats_recorder, view_manager
from pccommon.logging import request_to_path
from pccommon.tracing import (
HTTP_METHOD,
HTTP_PATH,
@ -23,25 +18,7 @@ from pccommon.tracing import (
logger = logging.getLogger(__name__)
browse_request_measure = measure_module.MeasureInt(
"browse-request-count",
"Browsing requests under a given collection",
"browse-request",
)
browse_request_view = view_module.View(
"Browse request view",
browse_request_measure.description,
["collection", "item", "requestPath"],
browse_request_measure,
aggregation_module.CountAggregation(),
)
_log_metrics = view_manager and stats_recorder and exporter
if _log_metrics:
view_manager.register_view(browse_request_view)
mmap = stats_recorder.new_measurement_map()
_log_metrics = exporter is not None
collection_id_re = re.compile(
r".*/collections/?(?P<collection_id>[a-zA-Z0-9\-\%]+)?(/items/(?P<item_id>.*))?.*" # noqa
@ -63,7 +40,8 @@ def _collection_item_from_request(
async def trace_request(
request: Request, call_next: Callable[[Request], Awaitable[Response]]
) -> Response:
if _log_metrics:
request_path = request_to_path(request).strip("/")
if _log_metrics and request.method.lower() != "head":
tracer = Tracer(
exporter=exporter,
sampler=ProbabilitySampler(1.0),
@ -86,7 +64,7 @@ async def trace_request(
attribute_key=HTTP_URL, attribute_value=f"{request.url}"
)
tracer.add_attribute_to_current_span(
attribute_key=HTTP_PATH, attribute_value=request_to_path(request)
attribute_key=HTTP_PATH, attribute_value=request_path
)
tracer.add_attribute_to_current_span(
attribute_key=HTTP_METHOD, attribute_value=str(request.method)
@ -109,20 +87,3 @@ async def trace_request(
return response
else:
return await call_next(request)
async def count_collection_requests(
request: Request, call_next: Callable[[Request], Awaitable[Response]]
) -> Response:
if _log_metrics:
tmap = tag_map_module.TagMap()
(collection_id, item_id) = _collection_item_from_request(request)
if collection_id:
tmap.insert("collection", collection_id)
if item_id:
tmap.insert("item", item_id)
log_collection_request("stac", logger, collection_id, item_id, request)
tmap.insert("requestPath", request_to_path(request))
mmap.measure_int_put(browse_request_measure, 1)
mmap.record(tmap)
return await call_next(request)

Просмотреть файл

@ -18,11 +18,12 @@ from titiler.application.middleware import (
from titiler.core.errors import DEFAULT_STATUS_CODES, add_exception_handlers
from pccommon.logging import init_logging
from pccommon.middleware import handle_exceptions
from pccommon.openapi import fixup_schema
from pctiler.config import get_settings
from pctiler.db import close_db_connection, connect_to_db
from pctiler.endpoints import item, legend, pg_mosaic
from pctiler.middleware import count_data_requests, trace_request
from pctiler.middleware import trace_request
# Initialize logging
init_logging("tiler")
@ -58,13 +59,6 @@ app.include_router(
)
@app.middleware("http")
async def _count_data_requests(
request: Request, call_next: Callable[[Request], Awaitable[Response]]
) -> Response:
return await count_data_requests(request, call_next)
@app.middleware("http")
async def _trace_requests(
request: Request, call_next: Callable[[Request], Awaitable[Response]]
@ -72,6 +66,13 @@ async def _trace_requests(
return await trace_request(request, call_next)
@app.middleware("http")
async def _handle_exceptions(
request: Request, call_next: Callable[[Request], Awaitable[Response]]
) -> Response:
return await handle_exceptions(request, call_next)
add_exception_handlers(
app, {**DEFAULT_STATUS_CODES, NoAssetFoundError: status.HTTP_404_NOT_FOUND}
) # type: ignore

Просмотреть файл

@ -1,18 +1,12 @@
import logging
from typing import Awaitable, Callable
from urllib.parse import urlparse
from fastapi import Request, Response
from opencensus.stats import aggregation as aggregation_module
from opencensus.stats import measure as measure_module
from opencensus.stats import view as view_module
from opencensus.tags import tag_map as tag_map_module
from opencensus.trace.samplers import ProbabilitySampler
from opencensus.trace.span import SpanKind
from opencensus.trace.tracer import Tracer
from pccommon.logging import log_collection_request, request_to_path
from pccommon.metrics import stats_recorder, view_manager
from pccommon.logging import request_to_path
from pccommon.tracing import (
HTTP_METHOD,
HTTP_PATH,
@ -23,51 +17,14 @@ from pccommon.tracing import (
logger = logging.getLogger(__name__)
# This list was extracted from every `get` query param in the
# OpenAPI specification at
# http https://planetarycomputer.microsoft.com/api/data/v1/openapi.json
# If a string doesn't appear in the list of attributes in a view, we can't
# send it as a tag to application insights.
# The Azure exporter for python opencensus doesn't seem to want to send more
# than 10 tags, so this list is a bespoke selection from the full list of
# query params, also with the requestPath.
all_query_params = [
"assets",
"collection",
"colormap_name",
"expression",
"format",
"height",
"item",
"items",
"width",
"requestPath",
]
data_request_count_measure = measure_module.MeasureInt(
"data-request-count",
"Requests for data (info, bounds, maps, etc.)",
"data-request-count",
)
data_request_count_view = view_module.View(
"Data request count view",
data_request_count_measure.description,
all_query_params,
data_request_count_measure,
aggregation_module.CountAggregation(),
)
_log_metrics = view_manager and stats_recorder and exporter
if _log_metrics:
view_manager.register_view(data_request_count_view)
mmap = stats_recorder.new_measurement_map()
_log_metrics = exporter is not None
async def trace_request(
request: Request, call_next: Callable[[Request], Awaitable[Response]]
) -> Response:
if _log_metrics:
request_path = request_to_path(request).strip("/")
if _log_metrics and request.method.lower() != "head":
tracer = Tracer(
exporter=exporter,
sampler=ProbabilitySampler(1.0),
@ -85,7 +42,7 @@ async def trace_request(
response = await call_next(request)
tracer.add_attribute_to_current_span(
attribute_key=HTTP_PATH, attribute_value=request_to_path(request)
attribute_key=HTTP_PATH, attribute_value=request_path
)
tracer.add_attribute_to_current_span(
attribute_key=HTTP_STATUS_CODE, attribute_value=response.status_code
@ -114,22 +71,3 @@ async def trace_request(
return response
else:
return await call_next(request)
async def count_data_requests(request: Request, call_next): # type: ignore
if _log_metrics:
tmap = tag_map_module.TagMap()
for k, v in request.query_params.items():
tmap.insert(k, v)
parsed_url = urlparse(f"{request.url}")
tmap.insert("requestPath", parsed_url.path)
mmap.measure_int_put(data_request_count_measure, 1)
mmap.record(tmap)
log_collection_request(
"tiler",
logger,
request.query_params.get("collection"),
request.query_params.get("item"),
request,
)
return await call_next(request)