Use exception logging to capture failures in app insights (#14)
* Use exception logging to capture failures in app insights
* Format
* move global exception middleware to pccommon
* format
* fix return type of middleware
* remove unused
* Update changelog
* wip -- log encountered exceptions
* include extra metadata with exception
* remove metrics reporting
* configure liveness paths
* remove erroneous middlewares
* format
* remove more erroneous middleware
* Remove unused metrics module
* remove unused
* Remove pctiler unused
* Remove more unused
* Add special cd exception for this branch
* run on pull requests instead 🤞🏻
* correct tiler liveness env variable name
* bail on LIVENESS_PATH checks
* Format (_really_ thought I did that already)
* Remove pr trigger from cd
* Remove unused import
Co-authored-by: Rob Emanuele <rdemanuele@gmail.com>
Co-authored-by: Matt McFarland <mmcfarland@microsoft.com>
This commit is contained in:
Родитель
caa2f70370
Коммит
837df9e6fb
|
@ -8,5 +8,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
### Changed
|
### Changed
|
||||||
- Organized and updated documentation markdown docs [#11](https://github.com/microsoft/planetary-computer-apis/pull/11)
|
- Organized and updated documentation markdown docs [#11](https://github.com/microsoft/planetary-computer-apis/pull/11)
|
||||||
|
|
||||||
|
### Added
|
||||||
|
- Errors are logged using exceptions and including request metadata [#14](https://github.com/microsoft/planetary-computer-apis/pull/14)
|
||||||
### Fixed
|
### Fixed
|
||||||
- Updated search limit constraints to avoid 500s [#15](https://github.com/microsoft/planetary-computer-apis/pull/15)
|
- Updated search limit constraints to avoid 500s [#15](https://github.com/microsoft/planetary-computer-apis/pull/15)
|
||||||
|
|
|
@ -86,11 +86,11 @@ spec:
|
||||||
|
|
||||||
livenessProbe:
|
livenessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /_mgmt/ping
|
path: "/_mgmt/ping"
|
||||||
port: http
|
port: http
|
||||||
readinessProbe:
|
readinessProbe:
|
||||||
httpGet:
|
httpGet:
|
||||||
path: /_mgmt/ping
|
path: "/_mgmt/ping"
|
||||||
port: http
|
port: http
|
||||||
resources:
|
resources:
|
||||||
{{- toYaml .Values.stac.deploy.resources | nindent 12 }}
|
{{- toYaml .Values.stac.deploy.resources | nindent 12 }}
|
||||||
|
|
|
@ -72,32 +72,6 @@ def init_logging(service_name: str) -> None:
|
||||||
logger.info("Not adding Azure log handler since no instrumentation key defined")
|
logger.info("Not adding Azure log handler since no instrumentation key defined")
|
||||||
|
|
||||||
|
|
||||||
def log_collection_request(
|
|
||||||
domain: str,
|
|
||||||
logger: logging.Logger,
|
|
||||||
collection_id: Optional[str],
|
|
||||||
item_id: Optional[str],
|
|
||||||
request: Request,
|
|
||||||
) -> None:
|
|
||||||
logger.info(
|
|
||||||
f"{domain} request for collection {collection_id}",
|
|
||||||
extra={
|
|
||||||
"custom_dimensions": dict(
|
|
||||||
[
|
|
||||||
(k, v)
|
|
||||||
for k, v in {
|
|
||||||
"collection_id": collection_id,
|
|
||||||
"item_id": item_id,
|
|
||||||
"url": f"{request.url}",
|
|
||||||
"service_name": domain,
|
|
||||||
}.items()
|
|
||||||
if v is not None
|
|
||||||
]
|
|
||||||
)
|
|
||||||
},
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def request_to_path(request: Request) -> str:
|
def request_to_path(request: Request) -> str:
|
||||||
parsed_url = urlparse(f"{request.url}")
|
parsed_url = urlparse(f"{request.url}")
|
||||||
return parsed_url.path
|
return parsed_url.path
|
||||||
|
|
|
@ -1,22 +0,0 @@
|
||||||
from opencensus.ext.azure import metrics_exporter
|
|
||||||
from opencensus.stats import stats as stats_module
|
|
||||||
|
|
||||||
from pccommon.config import CommonConfig
|
|
||||||
|
|
||||||
_config = CommonConfig.from_environment()
|
|
||||||
|
|
||||||
_instrumentation_key = _config.app_insights_instrumentation_key
|
|
||||||
|
|
||||||
if _instrumentation_key:
|
|
||||||
_exporter = metrics_exporter.new_metrics_exporter(
|
|
||||||
connection_string=f"InstrumentationKey={_instrumentation_key}"
|
|
||||||
)
|
|
||||||
|
|
||||||
stats = stats_module.stats
|
|
||||||
view_manager = stats.view_manager
|
|
||||||
view_manager.register_exporter(_exporter)
|
|
||||||
|
|
||||||
stats_recorder = stats.stats_recorder
|
|
||||||
else:
|
|
||||||
view_manager = None
|
|
||||||
stats_recorder = None
|
|
|
@ -0,0 +1,32 @@
|
||||||
|
import logging
|
||||||
|
from typing import Awaitable, Callable
|
||||||
|
|
||||||
|
from fastapi import HTTPException, Request, Response
|
||||||
|
|
||||||
|
from pccommon.logging import request_to_path
|
||||||
|
from pccommon.tracing import HTTP_METHOD, HTTP_PATH, HTTP_URL
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
async def handle_exceptions(
|
||||||
|
request: Request, call_next: Callable[[Request], Awaitable[Response]]
|
||||||
|
) -> Response:
|
||||||
|
try:
|
||||||
|
return await call_next(request)
|
||||||
|
except HTTPException:
|
||||||
|
raise
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(
|
||||||
|
"Exception when handling request",
|
||||||
|
extra={
|
||||||
|
"custom_dimensions": {
|
||||||
|
"stackTrace": f"{e}",
|
||||||
|
HTTP_URL: str(request.url),
|
||||||
|
HTTP_METHOD: str(request.method),
|
||||||
|
HTTP_PATH: request_to_path(request),
|
||||||
|
"service": "tiler",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
raise
|
|
@ -1,3 +1,5 @@
|
||||||
|
import os
|
||||||
|
|
||||||
from opencensus.ext.azure.trace_exporter import AzureExporter
|
from opencensus.ext.azure.trace_exporter import AzureExporter
|
||||||
from opencensus.trace.attributes_helper import COMMON_ATTRIBUTES
|
from opencensus.trace.attributes_helper import COMMON_ATTRIBUTES
|
||||||
|
|
||||||
|
|
|
@ -15,12 +15,13 @@ from starlette.middleware.cors import CORSMiddleware
|
||||||
from starlette.responses import PlainTextResponse
|
from starlette.responses import PlainTextResponse
|
||||||
|
|
||||||
from pccommon.logging import init_logging
|
from pccommon.logging import init_logging
|
||||||
|
from pccommon.middleware import handle_exceptions
|
||||||
from pccommon.openapi import fixup_schema
|
from pccommon.openapi import fixup_schema
|
||||||
from pcstac.api import PCStacApi
|
from pcstac.api import PCStacApi
|
||||||
from pcstac.client import PCClient
|
from pcstac.client import PCClient
|
||||||
from pcstac.config import API_DESCRIPTION, API_TITLE, API_VERSION, get_settings
|
from pcstac.config import API_DESCRIPTION, API_TITLE, API_VERSION, get_settings
|
||||||
from pcstac.errors import PC_DEFAULT_STATUS_CODES
|
from pcstac.errors import PC_DEFAULT_STATUS_CODES
|
||||||
from pcstac.middleware import count_collection_requests, trace_request
|
from pcstac.middleware import trace_request
|
||||||
from pcstac.search import PCItemCollectionUri, PCSearch, PCSearchGetRequest
|
from pcstac.search import PCItemCollectionUri, PCSearch, PCSearchGetRequest
|
||||||
|
|
||||||
DEBUG: bool = os.getenv("DEBUG") == "TRUE" or False
|
DEBUG: bool = os.getenv("DEBUG") == "TRUE" or False
|
||||||
|
@ -90,10 +91,10 @@ app.add_middleware(
|
||||||
|
|
||||||
|
|
||||||
@app.middleware("http")
|
@app.middleware("http")
|
||||||
async def _count_collection_requests(
|
async def _handle_exceptions(
|
||||||
request: Request, call_next: Callable[[Request], Awaitable[Response]]
|
request: Request, call_next: Callable[[Request], Awaitable[Response]]
|
||||||
) -> Response:
|
) -> Response:
|
||||||
return await count_collection_requests(request, call_next)
|
return await handle_exceptions(request, call_next)
|
||||||
|
|
||||||
|
|
||||||
@app.middleware("http")
|
@app.middleware("http")
|
||||||
|
|
|
@ -3,16 +3,11 @@ import re
|
||||||
from typing import Awaitable, Callable, Optional, Tuple
|
from typing import Awaitable, Callable, Optional, Tuple
|
||||||
|
|
||||||
from fastapi import Request, Response
|
from fastapi import Request, Response
|
||||||
from opencensus.stats import aggregation as aggregation_module
|
|
||||||
from opencensus.stats import measure as measure_module
|
|
||||||
from opencensus.stats import view as view_module
|
|
||||||
from opencensus.tags import tag_map as tag_map_module
|
|
||||||
from opencensus.trace.samplers import ProbabilitySampler
|
from opencensus.trace.samplers import ProbabilitySampler
|
||||||
from opencensus.trace.span import SpanKind
|
from opencensus.trace.span import SpanKind
|
||||||
from opencensus.trace.tracer import Tracer
|
from opencensus.trace.tracer import Tracer
|
||||||
|
|
||||||
from pccommon.logging import log_collection_request, request_to_path
|
from pccommon.logging import request_to_path
|
||||||
from pccommon.metrics import stats_recorder, view_manager
|
|
||||||
from pccommon.tracing import (
|
from pccommon.tracing import (
|
||||||
HTTP_METHOD,
|
HTTP_METHOD,
|
||||||
HTTP_PATH,
|
HTTP_PATH,
|
||||||
|
@ -23,25 +18,7 @@ from pccommon.tracing import (
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
_log_metrics = exporter is not None
|
||||||
browse_request_measure = measure_module.MeasureInt(
|
|
||||||
"browse-request-count",
|
|
||||||
"Browsing requests under a given collection",
|
|
||||||
"browse-request",
|
|
||||||
)
|
|
||||||
browse_request_view = view_module.View(
|
|
||||||
"Browse request view",
|
|
||||||
browse_request_measure.description,
|
|
||||||
["collection", "item", "requestPath"],
|
|
||||||
browse_request_measure,
|
|
||||||
aggregation_module.CountAggregation(),
|
|
||||||
)
|
|
||||||
|
|
||||||
_log_metrics = view_manager and stats_recorder and exporter
|
|
||||||
|
|
||||||
if _log_metrics:
|
|
||||||
view_manager.register_view(browse_request_view)
|
|
||||||
mmap = stats_recorder.new_measurement_map()
|
|
||||||
|
|
||||||
collection_id_re = re.compile(
|
collection_id_re = re.compile(
|
||||||
r".*/collections/?(?P<collection_id>[a-zA-Z0-9\-\%]+)?(/items/(?P<item_id>.*))?.*" # noqa
|
r".*/collections/?(?P<collection_id>[a-zA-Z0-9\-\%]+)?(/items/(?P<item_id>.*))?.*" # noqa
|
||||||
|
@ -63,7 +40,8 @@ def _collection_item_from_request(
|
||||||
async def trace_request(
|
async def trace_request(
|
||||||
request: Request, call_next: Callable[[Request], Awaitable[Response]]
|
request: Request, call_next: Callable[[Request], Awaitable[Response]]
|
||||||
) -> Response:
|
) -> Response:
|
||||||
if _log_metrics:
|
request_path = request_to_path(request).strip("/")
|
||||||
|
if _log_metrics and request.method.lower() != "head":
|
||||||
tracer = Tracer(
|
tracer = Tracer(
|
||||||
exporter=exporter,
|
exporter=exporter,
|
||||||
sampler=ProbabilitySampler(1.0),
|
sampler=ProbabilitySampler(1.0),
|
||||||
|
@ -86,7 +64,7 @@ async def trace_request(
|
||||||
attribute_key=HTTP_URL, attribute_value=f"{request.url}"
|
attribute_key=HTTP_URL, attribute_value=f"{request.url}"
|
||||||
)
|
)
|
||||||
tracer.add_attribute_to_current_span(
|
tracer.add_attribute_to_current_span(
|
||||||
attribute_key=HTTP_PATH, attribute_value=request_to_path(request)
|
attribute_key=HTTP_PATH, attribute_value=request_path
|
||||||
)
|
)
|
||||||
tracer.add_attribute_to_current_span(
|
tracer.add_attribute_to_current_span(
|
||||||
attribute_key=HTTP_METHOD, attribute_value=str(request.method)
|
attribute_key=HTTP_METHOD, attribute_value=str(request.method)
|
||||||
|
@ -109,20 +87,3 @@ async def trace_request(
|
||||||
return response
|
return response
|
||||||
else:
|
else:
|
||||||
return await call_next(request)
|
return await call_next(request)
|
||||||
|
|
||||||
|
|
||||||
async def count_collection_requests(
|
|
||||||
request: Request, call_next: Callable[[Request], Awaitable[Response]]
|
|
||||||
) -> Response:
|
|
||||||
if _log_metrics:
|
|
||||||
tmap = tag_map_module.TagMap()
|
|
||||||
(collection_id, item_id) = _collection_item_from_request(request)
|
|
||||||
if collection_id:
|
|
||||||
tmap.insert("collection", collection_id)
|
|
||||||
if item_id:
|
|
||||||
tmap.insert("item", item_id)
|
|
||||||
log_collection_request("stac", logger, collection_id, item_id, request)
|
|
||||||
tmap.insert("requestPath", request_to_path(request))
|
|
||||||
mmap.measure_int_put(browse_request_measure, 1)
|
|
||||||
mmap.record(tmap)
|
|
||||||
return await call_next(request)
|
|
||||||
|
|
|
@ -18,11 +18,12 @@ from titiler.application.middleware import (
|
||||||
from titiler.core.errors import DEFAULT_STATUS_CODES, add_exception_handlers
|
from titiler.core.errors import DEFAULT_STATUS_CODES, add_exception_handlers
|
||||||
|
|
||||||
from pccommon.logging import init_logging
|
from pccommon.logging import init_logging
|
||||||
|
from pccommon.middleware import handle_exceptions
|
||||||
from pccommon.openapi import fixup_schema
|
from pccommon.openapi import fixup_schema
|
||||||
from pctiler.config import get_settings
|
from pctiler.config import get_settings
|
||||||
from pctiler.db import close_db_connection, connect_to_db
|
from pctiler.db import close_db_connection, connect_to_db
|
||||||
from pctiler.endpoints import item, legend, pg_mosaic
|
from pctiler.endpoints import item, legend, pg_mosaic
|
||||||
from pctiler.middleware import count_data_requests, trace_request
|
from pctiler.middleware import trace_request
|
||||||
|
|
||||||
# Initialize logging
|
# Initialize logging
|
||||||
init_logging("tiler")
|
init_logging("tiler")
|
||||||
|
@ -58,13 +59,6 @@ app.include_router(
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
@app.middleware("http")
|
|
||||||
async def _count_data_requests(
|
|
||||||
request: Request, call_next: Callable[[Request], Awaitable[Response]]
|
|
||||||
) -> Response:
|
|
||||||
return await count_data_requests(request, call_next)
|
|
||||||
|
|
||||||
|
|
||||||
@app.middleware("http")
|
@app.middleware("http")
|
||||||
async def _trace_requests(
|
async def _trace_requests(
|
||||||
request: Request, call_next: Callable[[Request], Awaitable[Response]]
|
request: Request, call_next: Callable[[Request], Awaitable[Response]]
|
||||||
|
@ -72,6 +66,13 @@ async def _trace_requests(
|
||||||
return await trace_request(request, call_next)
|
return await trace_request(request, call_next)
|
||||||
|
|
||||||
|
|
||||||
|
@app.middleware("http")
|
||||||
|
async def _handle_exceptions(
|
||||||
|
request: Request, call_next: Callable[[Request], Awaitable[Response]]
|
||||||
|
) -> Response:
|
||||||
|
return await handle_exceptions(request, call_next)
|
||||||
|
|
||||||
|
|
||||||
add_exception_handlers(
|
add_exception_handlers(
|
||||||
app, {**DEFAULT_STATUS_CODES, NoAssetFoundError: status.HTTP_404_NOT_FOUND}
|
app, {**DEFAULT_STATUS_CODES, NoAssetFoundError: status.HTTP_404_NOT_FOUND}
|
||||||
) # type: ignore
|
) # type: ignore
|
||||||
|
|
|
@ -1,18 +1,12 @@
|
||||||
import logging
|
import logging
|
||||||
from typing import Awaitable, Callable
|
from typing import Awaitable, Callable
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
from fastapi import Request, Response
|
from fastapi import Request, Response
|
||||||
from opencensus.stats import aggregation as aggregation_module
|
|
||||||
from opencensus.stats import measure as measure_module
|
|
||||||
from opencensus.stats import view as view_module
|
|
||||||
from opencensus.tags import tag_map as tag_map_module
|
|
||||||
from opencensus.trace.samplers import ProbabilitySampler
|
from opencensus.trace.samplers import ProbabilitySampler
|
||||||
from opencensus.trace.span import SpanKind
|
from opencensus.trace.span import SpanKind
|
||||||
from opencensus.trace.tracer import Tracer
|
from opencensus.trace.tracer import Tracer
|
||||||
|
|
||||||
from pccommon.logging import log_collection_request, request_to_path
|
from pccommon.logging import request_to_path
|
||||||
from pccommon.metrics import stats_recorder, view_manager
|
|
||||||
from pccommon.tracing import (
|
from pccommon.tracing import (
|
||||||
HTTP_METHOD,
|
HTTP_METHOD,
|
||||||
HTTP_PATH,
|
HTTP_PATH,
|
||||||
|
@ -23,51 +17,14 @@ from pccommon.tracing import (
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# This list was extracted from every `get` query param in the
|
_log_metrics = exporter is not None
|
||||||
# OpenAPI specification at
|
|
||||||
# http https://planetarycomputer.microsoft.com/api/data/v1/openapi.json
|
|
||||||
# If a string doesn't appear in the list of attributes in a view, we can't
|
|
||||||
# send it as a tag to application insights.
|
|
||||||
# The Azure exporter for python opencensus doesn't seem to want to send more
|
|
||||||
# than 10 tags, so this list is a bespoke selection from the full list of
|
|
||||||
# query params, also with the requestPath.
|
|
||||||
all_query_params = [
|
|
||||||
"assets",
|
|
||||||
"collection",
|
|
||||||
"colormap_name",
|
|
||||||
"expression",
|
|
||||||
"format",
|
|
||||||
"height",
|
|
||||||
"item",
|
|
||||||
"items",
|
|
||||||
"width",
|
|
||||||
"requestPath",
|
|
||||||
]
|
|
||||||
|
|
||||||
data_request_count_measure = measure_module.MeasureInt(
|
|
||||||
"data-request-count",
|
|
||||||
"Requests for data (info, bounds, maps, etc.)",
|
|
||||||
"data-request-count",
|
|
||||||
)
|
|
||||||
data_request_count_view = view_module.View(
|
|
||||||
"Data request count view",
|
|
||||||
data_request_count_measure.description,
|
|
||||||
all_query_params,
|
|
||||||
data_request_count_measure,
|
|
||||||
aggregation_module.CountAggregation(),
|
|
||||||
)
|
|
||||||
|
|
||||||
_log_metrics = view_manager and stats_recorder and exporter
|
|
||||||
|
|
||||||
if _log_metrics:
|
|
||||||
view_manager.register_view(data_request_count_view)
|
|
||||||
mmap = stats_recorder.new_measurement_map()
|
|
||||||
|
|
||||||
|
|
||||||
async def trace_request(
|
async def trace_request(
|
||||||
request: Request, call_next: Callable[[Request], Awaitable[Response]]
|
request: Request, call_next: Callable[[Request], Awaitable[Response]]
|
||||||
) -> Response:
|
) -> Response:
|
||||||
if _log_metrics:
|
request_path = request_to_path(request).strip("/")
|
||||||
|
if _log_metrics and request.method.lower() != "head":
|
||||||
tracer = Tracer(
|
tracer = Tracer(
|
||||||
exporter=exporter,
|
exporter=exporter,
|
||||||
sampler=ProbabilitySampler(1.0),
|
sampler=ProbabilitySampler(1.0),
|
||||||
|
@ -85,7 +42,7 @@ async def trace_request(
|
||||||
response = await call_next(request)
|
response = await call_next(request)
|
||||||
|
|
||||||
tracer.add_attribute_to_current_span(
|
tracer.add_attribute_to_current_span(
|
||||||
attribute_key=HTTP_PATH, attribute_value=request_to_path(request)
|
attribute_key=HTTP_PATH, attribute_value=request_path
|
||||||
)
|
)
|
||||||
tracer.add_attribute_to_current_span(
|
tracer.add_attribute_to_current_span(
|
||||||
attribute_key=HTTP_STATUS_CODE, attribute_value=response.status_code
|
attribute_key=HTTP_STATUS_CODE, attribute_value=response.status_code
|
||||||
|
@ -114,22 +71,3 @@ async def trace_request(
|
||||||
return response
|
return response
|
||||||
else:
|
else:
|
||||||
return await call_next(request)
|
return await call_next(request)
|
||||||
|
|
||||||
|
|
||||||
async def count_data_requests(request: Request, call_next): # type: ignore
|
|
||||||
if _log_metrics:
|
|
||||||
tmap = tag_map_module.TagMap()
|
|
||||||
for k, v in request.query_params.items():
|
|
||||||
tmap.insert(k, v)
|
|
||||||
parsed_url = urlparse(f"{request.url}")
|
|
||||||
tmap.insert("requestPath", parsed_url.path)
|
|
||||||
mmap.measure_int_put(data_request_count_measure, 1)
|
|
||||||
mmap.record(tmap)
|
|
||||||
log_collection_request(
|
|
||||||
"tiler",
|
|
||||||
logger,
|
|
||||||
request.query_params.get("collection"),
|
|
||||||
request.query_params.get("item"),
|
|
||||||
request,
|
|
||||||
)
|
|
||||||
return await call_next(request)
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче