148 строки
5.7 KiB
Python
148 строки
5.7 KiB
Python
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import logging
|
|
import os
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
|
|
from airflow import settings
|
|
from airflow.utils.helpers import parse_template_string
|
|
|
|
|
|
class FileProcessorHandler(logging.Handler):
|
|
"""
|
|
FileProcessorHandler is a python log handler that handles
|
|
dag processor logs. It creates and delegates log handling
|
|
to `logging.FileHandler` after receiving dag processor context.
|
|
|
|
:param base_log_folder: Base log folder to place logs.
|
|
:param filename_template: template filename string
|
|
"""
|
|
|
|
def __init__(self, base_log_folder, filename_template):
|
|
super().__init__()
|
|
self.handler = None
|
|
self.base_log_folder = base_log_folder
|
|
self.dag_dir = os.path.expanduser(settings.DAGS_FOLDER)
|
|
self.filename_template, self.filename_jinja_template = parse_template_string(filename_template)
|
|
|
|
self._cur_date = datetime.today()
|
|
Path(self._get_log_directory()).mkdir(parents=True, exist_ok=True)
|
|
|
|
self._symlink_latest_log_directory()
|
|
|
|
def set_context(self, filename):
|
|
"""
|
|
Provide filename context to airflow task handler.
|
|
|
|
:param filename: filename in which the dag is located
|
|
"""
|
|
local_loc = self._init_file(filename)
|
|
self.handler = logging.FileHandler(local_loc)
|
|
self.handler.setFormatter(self.formatter)
|
|
self.handler.setLevel(self.level)
|
|
|
|
if self._cur_date < datetime.today():
|
|
self._symlink_latest_log_directory()
|
|
self._cur_date = datetime.today()
|
|
|
|
def emit(self, record):
|
|
if self.handler is not None:
|
|
self.handler.emit(record)
|
|
|
|
def flush(self):
|
|
if self.handler is not None:
|
|
self.handler.flush()
|
|
|
|
def close(self):
|
|
if self.handler is not None:
|
|
self.handler.close()
|
|
|
|
def _render_filename(self, filename):
|
|
|
|
# Airflow log path used to be generated by the relative path
|
|
# `os.path.relpath(filename, self.dag_dir)`, however the DAG `smart_sensor_group` and
|
|
# all DAGs in airflow source code are not located in the DAG dir as other DAGs.
|
|
# That will create a log filepath which is not under control since it could be outside
|
|
# of the log dir. The change here is to make sure the log path for DAGs in airflow code
|
|
# is always inside the log dir as other DAGs. To be differentiate with regular DAGs,
|
|
# their logs will be in the `log_dir/native_dags`.
|
|
import airflow
|
|
|
|
airflow_directory = airflow.__path__[0]
|
|
if filename.startswith(airflow_directory):
|
|
filename = os.path.join("native_dags", os.path.relpath(filename, airflow_directory))
|
|
else:
|
|
filename = os.path.relpath(filename, self.dag_dir)
|
|
ctx = {}
|
|
ctx['filename'] = filename
|
|
|
|
if self.filename_jinja_template:
|
|
return self.filename_jinja_template.render(**ctx)
|
|
|
|
return self.filename_template.format(filename=ctx['filename'])
|
|
|
|
def _get_log_directory(self):
|
|
now = datetime.utcnow()
|
|
|
|
return os.path.join(self.base_log_folder, now.strftime("%Y-%m-%d"))
|
|
|
|
def _symlink_latest_log_directory(self):
|
|
"""
|
|
Create symbolic link to the current day's log directory to
|
|
allow easy access to the latest scheduler log files.
|
|
|
|
:return: None
|
|
"""
|
|
log_directory = self._get_log_directory()
|
|
latest_log_directory_path = os.path.join(self.base_log_folder, "latest")
|
|
if os.path.isdir(log_directory): # pylint: disable=too-many-nested-blocks
|
|
try:
|
|
# if symlink exists but is stale, update it
|
|
if os.path.islink(latest_log_directory_path):
|
|
if os.readlink(latest_log_directory_path) != log_directory:
|
|
os.unlink(latest_log_directory_path)
|
|
os.symlink(log_directory, latest_log_directory_path)
|
|
elif os.path.isdir(latest_log_directory_path) or os.path.isfile(latest_log_directory_path):
|
|
logging.warning(
|
|
"%s already exists as a dir/file. Skip creating symlink.", latest_log_directory_path
|
|
)
|
|
else:
|
|
os.symlink(log_directory, latest_log_directory_path)
|
|
except OSError:
|
|
logging.warning("OSError while attempting to symlink the latest log directory")
|
|
|
|
def _init_file(self, filename):
|
|
"""
|
|
Create log file and directory if required.
|
|
|
|
:param filename: task instance object
|
|
:return: relative log path of the given task instance
|
|
"""
|
|
relative_log_file_path = os.path.join(self._get_log_directory(), self._render_filename(filename))
|
|
log_file_path = os.path.abspath(relative_log_file_path)
|
|
directory = os.path.dirname(log_file_path)
|
|
|
|
Path(directory).mkdir(parents=True, exist_ok=True)
|
|
|
|
if not os.path.exists(log_file_path):
|
|
open(log_file_path, "a").close()
|
|
|
|
return log_file_path
|