Remove MySQL integration and dedicated code (#7814)

* Remove MySQL integration and dedicated code

* Nit

* Clean up a few more files

* Fix linting

* Propagate DATABASE_URL in other containers
This commit is contained in:
EvaBardou 2024-05-30 15:19:56 +02:00 коммит произвёл GitHub
Родитель 96e7651f06
Коммит 494ca3c615
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: B5690EEEBB952194
30 изменённых файлов: 554 добавлений и 921 удалений

Просмотреть файл

@ -49,24 +49,6 @@ jobs:
command: tox -e linters
name: Run linters
python-tests-mysql:
machine:
image: default
steps:
- checkout
- docker/install-docker-compose:
version: 1.29.2
- run:
command: sudo apt-get update && sudo apt-get install python3-venv -y
name: python for glean
- run:
name: Run tests and coverage within Docker container
command: |
pip install --upgrade pip
pip install tox
tox -e docker
- codecov/upload
python-tests-postgres:
machine:
image: default
@ -158,7 +140,6 @@ workflows:
jobs:
- javascript-tests
- builds
- python-tests-mysql
- python-tests-postgres
- test-docker-build
- deploy:

Просмотреть файл

@ -15,7 +15,7 @@ services:
# Development/CI-specific environment variables only.
# Those that do not vary across environments should go in `Dockerfile`.
- BROKER_URL=amqp://guest:guest@rabbitmq//
- DATABASE_URL=${DATABASE_URL:-mysql://root@mysql/treeherder}
- DATABASE_URL=${DATABASE_URL:-psql://postgres:mozilla1234@postgres:5432/treeherder}
- GITHUB_TOKEN=${GITHUB_TOKEN:-}
- UPSTREAM_DATABASE_URL=${UPSTREAM_DATABASE_URL:-}
- PERF_SHERIFF_BOT_CLIENT_ID=${PERF_SHERIFF_BOT_CLIENT_ID:-}
@ -43,7 +43,6 @@ services:
ports:
- '8000:8000'
depends_on:
- mysql
- redis
- postgres
- rabbitmq
@ -68,22 +67,6 @@ services:
- '5000:5000'
platform: linux/amd64
mysql:
container_name: mysql
# https://hub.docker.com/r/library/mysql/
image: mysql:5.7.44
platform: linux/amd64
environment:
- MYSQL_ALLOW_EMPTY_PASSWORD=true
- MYSQL_DATABASE=treeherder
- LANG=C.UTF_8
volumes:
- ./docker/mysql.cnf:/etc/mysql/conf.d/mysql.cnf
- mysql_data:/var/lib/mysql
ports:
- '3306:3306'
command: --character-set-server=utf8 --collation-server=utf8_bin
postgres:
container_name: postgres
# https://hub.docker.com/r/library/postgres/
@ -126,7 +109,7 @@ services:
- PULSE_URL=${PULSE_URL:-}
- LOGGING_LEVEL=INFO
- PULSE_AUTO_DELETE_QUEUES=True
- DATABASE_URL=mysql://root@mysql:3306/treeherder
- DATABASE_URL=${DATABASE_URL:-psql://postgres:mozilla1234@postgres:5432/treeherder}
- BROKER_URL=amqp://guest:guest@rabbitmq//
- SKIP_INGESTION=${SKIP_INGESTION:-False}
entrypoint: './docker/entrypoint.sh'
@ -134,7 +117,6 @@ services:
volumes:
- .:/app
depends_on:
- mysql
- postgres
- rabbitmq
platform: linux/amd64
@ -145,14 +127,13 @@ services:
dockerfile: docker/dev.Dockerfile
environment:
- BROKER_URL=amqp://guest:guest@rabbitmq:5672//
- DATABASE_URL=mysql://root@mysql:3306/treeherder
- DATABASE_URL=${DATABASE_URL:-psql://postgres:mozilla1234@postgres:5432/treeherder}
- PROJECTS_TO_INGEST=${PROJECTS_TO_INGEST:-autoland,try}
entrypoint: './docker/entrypoint.sh'
command: celery -A treeherder worker --uid=nobody --gid=nogroup --without-gossip --without-mingle --without-heartbeat -Q store_pulse_pushes,store_pulse_tasks,store_pulse_tasks_classification,statsd --concurrency=1 --loglevel=INFO
volumes:
- .:/app
depends_on:
- mysql
- postgres
- redis
- rabbitmq
@ -168,5 +149,4 @@ services:
volumes:
# TODO: Experiment with using tmpfs when testing, to speed up database-using Python tests.
mysql_data: {}
postgres_data: {}

Просмотреть файл

@ -17,12 +17,6 @@ RUN yarn build
## Backend stage
FROM python:3.9.19-slim-bullseye
# libmysqlclient-dev is required for the mysqlclient Python package.
RUN apt-get update && apt-get install -y --no-install-recommends \
pkg-config \
default-libmysqlclient-dev \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app
COPY requirements/ /app/requirements/

Просмотреть файл

@ -3,12 +3,8 @@ FROM python:3.9.19-bullseye
# Variables that are not specific to a particular environment.
ENV NEW_RELIC_CONFIG_FILE newrelic.ini
# libmysqlclient-dev and gcc are required for the mysqlclient Python package.
# netcat is used for the MySQL readiness check in entrypoint.sh.
# netcat is used for the Postgres readiness check in entrypoint.sh.
RUN apt-get update && apt-get install -y --no-install-recommends \
pkg-config \
default-libmysqlclient-dev \
gcc \
netcat \
&& rm -rf /var/lib/apt/lists/*

Просмотреть файл

@ -1,6 +1,6 @@
#!/usr/bin/env bash
# This file is the entrypoint for the backend container.
# It takes care of making sure to wait for the mysql and rabbitmq containers to be ready
# It takes care of making sure to wait for the postgres and rabbitmq containers to be ready
# Make non-zero exit codes & other errors fatal.
set -euo pipefail
@ -18,9 +18,6 @@ function check_service () {
# Keep these in sync with DATABASE_URL.
echo "Checking database status at $DATABASE_URL"
if [[ ${DATABASE_URL:0:8} == "mysql://" ]]; then
check_service "MySQL" "mysql" 3306;
fi
if [[ ${DATABASE_URL:0:7} == "psql://" ]]; then
check_service "PostgreSQL" "postgres" 5432;
fi

Просмотреть файл

@ -1,28 +0,0 @@
# Overrides the mysql defaults in /etc/mysql/my.cnf
# Any changes here will require restarting docker-compose to take effect.
# NB: This file is only used by docker-compose, and must be kept in sync with
# the RDS parameter group used by stage/prod:
# https://github.com/mozilla-platform-ops/devservices-aws/blob/master/treeherder/rds.tf
[mysqld]
character_set_server="utf8"
collation_server="utf8_bin"
max_connections=1000
# Ensure operations involving astral characters fail loudly,
# rather than mysql silently replacing each each byte of the
# original character with a U+FFFD replacement character.
# See bug 1275425.
sql_mode="NO_ENGINE_SUBSTITUTION,STRICT_ALL_TABLES"
# Django advises using READ-COMMITTED instead of REPEATABLE-READ:
# https://docs.djangoproject.com/en/1.10/ref/models/querysets/#get-or-create
# Unhelpfully MySQL uses a different (undocumented) variable name if set via config file:
# https://bugs.mysql.com/bug.php?id=70008
transaction-isolation=READ-COMMITTED
# Development/CI only: Speed up I/O by reducing data-loss protection.
innodb_flush_log_at_trx_commit="0"
# Development/CI only: Reduce verbosity of log output (output errors only).
log_error_verbosity="1"

Просмотреть файл

@ -60,29 +60,29 @@ toolbar to your right.
## Connecting to Services Running inside Docker
Treeherder uses various services to function, eg MySQL, etc.
Treeherder uses various services to function, eg Postgres, etc.
At times it can be useful to connect to them from outside the Docker environment.
The `docker-compose.yml` file defines how internal ports are mapped to the host OS' ports.
In the below example we're mapping the container's port 3306 (MySQL's default port) to host port 3306.
In the below example we're mapping the container's port 5432 (Postgres's default port) to host port 5432.
```yaml
# This is a line from the docker-compose.yml file
ports:
- '3306:3306'
- '5432:5432'
```
<!-- prettier-ignore -->
!!! note
Any forwarded ports will block usage of that port on the host OS even if there isn't a service running inside the VM talking to it.
With MySQL exposed at port 3306 you can connect to it from your host OS with the following credentials:
With Postgres exposed at port 5432 you can connect to it from your host OS with the following credentials:
- host: `localhost`
- port: `3306`
- user: `root`
- password: leave blank
- port: `5432`
- user: `postgres`
- password: `mozilla1234`
Other services running inside the Compose project, can be accessed in the same way.

Просмотреть файл

@ -25,7 +25,7 @@ For less urgent issues or general support, you can file a bug with [cloudOps](ht
[prototype](https://rpm.newrelic.com/accounts/677903/applications/7385291/transactions?type=other&show_browser=false) |
[stage](https://rpm.newrelic.com/accounts/677903/applications/14179733/transactions?type=other&show_browser=false) |
[prod](https://rpm.newrelic.com/accounts/677903/applications/14179757/transactions?type=other&show_browser=false)
- MySQL/Redis client request stats:
- Postgres/Redis client request stats:
[prototype](https://rpm.newrelic.com/accounts/677903/applications/7385291/datastores) |
[stage](https://rpm.newrelic.com/accounts/677903/applications/14179733/datastores) |
[prod](https://rpm.newrelic.com/accounts/677903/applications/14179757/datastores)

Просмотреть файл

@ -117,14 +117,14 @@ is what needs to be set. You can do this in a file in the root of `/treeherder`
`.env`:
```bash
DATABASE_URL=mysql://user:password@hostname/treeherder
DATABASE_URL=psql://user:password@hostname/treeherder
```
Alternatively, you can `export` that value in your terminal prior to executing
`docker-compose up` or just specify it on the command line as you execute:
```bash
DATABASE_URL=mysql://user:password@hostname/treeherder SKIP_INGESTION=True docker-compose up
DATABASE_URL=psql://user:password@hostname/treeherder SKIP_INGESTION=True docker-compose up
```
<!-- prettier-ignore -->
@ -134,14 +134,14 @@ DATABASE_URL=mysql://user:password@hostname/treeherder SKIP_INGESTION=True docke
even if `DATABASE_URL` is set. But it will use your system's resources unnecessarily.
To skip data ingestion, set the var `SKIP_INGESTION=True`
### Deleting the MySql database
### Deleting the Postgres database
The MySql database is kept locally and is not destroyed when the Docker containers are destroyed.
The Postgres database is kept locally and is not destroyed when the Docker containers are destroyed.
If you want to start from scratch type the following commands:
```bash
docker-compose down
docker volume rm treeherder_mysql_data
docker volume rm treeherder_postgres_data
```
### Running the ingestion tasks

Просмотреть файл

@ -2,10 +2,10 @@
# Make non-zero exit codes & other errors fatal.
set -euo pipefail
export DATABASE_URL=${DATABASE_URL:-mysql://root@127.0.0.1:3306/treeherder}
# Only execute if we're using the Mysql container
if [ "${DATABASE_URL}" == "mysql://root@mysql/treeherder" ] ||
[ "${DATABASE_URL}" == "mysql://root@127.0.0.1:3306/treeherder" ]; then
export DATABASE_URL=${DATABASE_URL:-psql://postgres:mozilla1234@127.0.0.1:5432/treeherder}
# Only execute if we're using the Postgres container
if [ "${DATABASE_URL}" == "psql://postgres:mozilla1234@postgres:5432/treeherder" ] ||
[ "${DATABASE_URL}" == "psql://postgres:mozilla1234@127.0.0.1:5432/treeherder" ]; then
# Initialize migrations
echo '-----> Running Django migrations and loading reference data'
./manage.py migrate --noinput

Просмотреть файл

@ -8,7 +8,6 @@ simplejson==3.19.2 # import simplejson
newrelic==9.10.0
certifi==2024.2.2
mysqlclient==2.2.4 # Required by Django
psycopg2-binary==2.9.9
jsonschema==4.22.0 # import jsonschema

Разница между файлами не показана из-за своего большого размера Загрузить разницу

Просмотреть файл

@ -7,13 +7,13 @@ The backend test suite can be run outside the Docker container.
We need all but the main `backend` service running. Since multiple `backend` instances are allowed, we will simply start everything up
# ENSURE THE IMAGES ARE CLEAN
docker rm "/mysql"
docker rm "/postgres"
docker rm "/frontend"
docker rm "/rabbitmq"
docker rm "/redis"
docker rm "/backend"
docker-compose down
docker volume rm treeherder_mysql_data
docker volume rm treeherder_postgres_data
# SETUP ALL IMAGES
docker-compose up --build
@ -42,7 +42,6 @@ Be sure you are in the `treeherder` main directory
python -m venv .venv # IMPORTANT: Notice the dot in the name
source .venv/bin/activate
export LDFLAGS=-L/usr/local/opt/openssl/lib # https://github.com/PyMySQL/mysqlclient-python/issues/131#issuecomment-503644579
pip install --no-deps -r requirements/dev.txt
pip install --no-deps -r requirements/common.txt
@ -56,7 +55,7 @@ For Windows, Treeherder requires a number of environment variables that point to
If you plan to use an IDE, here is the same, as a very long line
BROKER_URL=localhost//guest:guest@rabbitmq//;DATABASE_URL=mysql://root@localhost:3306/treeherder;REDIS_URL=redis://localhost:6379;SITE_URL=http://backend:8000/;TREEHERDER_DEBUG=True;TREEHERDER_DJANGO_SECRET_KEY=secret-key-of-at-least-50-characters-to-pass-check-deploy;NEW_RELIC_DEVELOPER_MODE=True
BROKER_URL=localhost//guest:guest@rabbitmq//;DATABASE_URL=psql://postgres:mozilla1234@postgres:5432/treeherder;REDIS_URL=redis://localhost:6379;SITE_URL=http://backend:8000/;TREEHERDER_DEBUG=True;TREEHERDER_DJANGO_SECRET_KEY=secret-key-of-at-least-50-characters-to-pass-check-deploy;NEW_RELIC_DEVELOPER_MODE=True
## Ensure everything is working

Просмотреть файл

@ -1,5 +1,5 @@
SET BROKER_URL=localhost//guest:guest@rabbitmq//
SET DATABASE_URL=mysql://root@localhost:3306/treeherder
SET DATABASE_URL=psql://postgres:mozilla1234@postgres:5432/treeherder
SET REDIS_URL=redis://localhost:6379
SET SITE_URL=http://backend:8000/
SET TREEHERDER_DEBUG=True

Просмотреть файл

@ -99,23 +99,12 @@ def test_store_error_summary_astral(activate_responses, test_repository, test_jo
assert failure.repository == test_repository
# Specific unicode chars cannot be inserted as MySQL pseudo-UTF8 and are replaced by a plain text representation
if settings.DATABASES["default"]["ENGINE"] == "django.db.backends.mysql":
assert (
failure.test
== "toolkit/content/tests/widgets/test_videocontrols_video_direction.html <U+01F346>"
)
assert failure.subtest == "Test timed out. <U+010081>"
assert failure.message == "<U+0F0151>"
assert failure.stack.endswith("<U+0F0151>")
else:
assert (
failure.test
== "toolkit/content/tests/widgets/test_videocontrols_video_direction.html 🍆"
)
assert failure.subtest == "Test timed out. 𐂁"
assert failure.message == "󰅑"
assert failure.stack.endswith("󰅑")
assert (
failure.test == "toolkit/content/tests/widgets/test_videocontrols_video_direction.html 🍆"
)
assert failure.subtest == "Test timed out. 𐂁"
assert failure.message == "󰅑"
assert failure.stack.endswith("󰅑")
assert failure.stackwalk_stdout is None
assert failure.stackwalk_stderr is None

Просмотреть файл

@ -4,7 +4,6 @@ from unittest.mock import MagicMock
from unittest.mock import patch
import pytest
from django.conf import settings
from django.core.management import call_command
from django.db import connection, IntegrityError
@ -810,16 +809,7 @@ def test_deleting_performance_data_cascades_to_perf_multicomit_data(test_perf_da
try:
cursor = connection.cursor()
if settings.DATABASES["default"]["ENGINE"] == "django.db.backends.mysql":
cursor.execute(
"""
DELETE FROM `performance_datum`
WHERE id = %s
""",
[perf_datum.id],
)
else:
PerformanceDatum.objects.filter(id=perf_datum.id).delete()
PerformanceDatum.objects.filter(id=perf_datum.id).delete()
except IntegrityError:
pytest.fail()
finally:

Просмотреть файл

@ -141,31 +141,6 @@ def test_bug_properties(transactional_db, sample_bugs):
assert set(suggestions["open_recent"][0].keys()) == expected_keys
SEARCH_TERMS = (
("(test_popup_preventdefault_chrome.xul+)", " test_popup_preventdefault_chrome.xul "),
(
"TEST-UNEXPECTED-TIMEOUT | /webrtc/promises-call.html | Can set up a basic WebRTC call with only data using promises. - Test timed out",
"TEST UNEXPECTED TIMEOUT | /webrtc/promises call.html | Can set up a basic WebRTC call with only data using promises. Test timed out",
),
(
"*command timed out: 3600 seconds without output running~",
" command timed out: 3600 seconds without output running ",
),
(
'"input password unmask.html#abc_def 0 7 7 7"',
" input password unmask.html#abc_def 0 7 7 7 ",
),
)
def test_sanitized_search_term():
"""Test that search terms are properly sanitized (this method is called in Bugscache.search before executing queries)."""
for case in SEARCH_TERMS:
sanitized_term = Bugscache.sanitized_search_term(case[0])
assert sanitized_term == case[1]
@pytest.mark.django_db(transaction=True)
def test_import(mock_bugscache_bugzilla_request):
"""

12
tox.ini
Просмотреть файл

@ -10,7 +10,7 @@ allowlist_externals =
sh
docker-compose
commands_pre =
docker-compose up --detach mysql postgres redis rabbitmq
docker-compose up --detach postgres redis rabbitmq
pip install --no-deps -r {toxinidir}/requirements/dev.txt
pip install --no-deps -r {toxinidir}/requirements/common.txt
commands =
@ -41,16 +41,12 @@ commands =
mkdocs build
commands_post =
[testenv:docker]
[testenv:docker-postgres]
commands_pre =
allowlist_externals=
docker-compose
commands =
docker-compose run -e TREEHERDER_DEBUG=False backend bash -c "pytest --cov --cov-report=xml tests/ --runslow -p no:unraisableexception"
[testenv:docker-postgres]
commands_pre =
allowlist_externals=
docker-compose
commands =
docker-compose run -e TREEHERDER_DEBUG=False -e DATABASE_URL=psql://postgres:mozilla1234@postgres:5432/treeherder backend bash -c "pytest --cov --cov-report=xml tests/ --runslow -p no:unraisableexception"
[flake8]
per-file-ignores = treeherder/model/models.py:E402

Просмотреть файл

@ -125,14 +125,14 @@ MIDDLEWARE = [
# Database
# The database config is defined using environment variables of form:
#
# 'mysql://username:password@host:optional_port/database_name'
# 'psql://username:password@host:optional_port/database_name'
#
# which django-environ converts into the Django DB settings dict format.
LOCALHOST_MYSQL_HOST = "mysql://root@{}:3306/treeherder".format(
LOCALHOST_PSQL_HOST = "psql://postgres:mozilla1234@{}:5432/treeherder".format(
"localhost" if IS_WINDOWS else "127.0.0.1"
)
DATABASES = {
"default": env.db_url("DATABASE_URL", default=LOCALHOST_MYSQL_HOST),
"default": env.db_url("DATABASE_URL", default=LOCALHOST_PSQL_HOST),
}
# Only used when syncing local database with production replicas
@ -147,27 +147,6 @@ for alias, db in DATABASES.items():
# Persist database connections for 5 minutes, to avoid expensive reconnects.
db["CONN_MAX_AGE"] = 300
# These options are only valid for mysql
if db["ENGINE"] != "django.db.backends.mysql":
continue
db["OPTIONS"] = {
# Override Django's default connection charset of 'utf8', otherwise it's
# still not possible to insert non-BMP unicode into utf8mb4 tables.
"charset": "utf8mb4",
# From MySQL 5.7 onwards and on fresh installs of MySQL 5.6, the default value of the sql_mode
# option contains STRICT_TRANS_TABLES. That option escalates warnings into errors when data are
# truncated upon insertion, so Django highly recommends activating a strict mode for MySQL to
# prevent data loss (either STRICT_TRANS_TABLES or STRICT_ALL_TABLES).
"init_command": "SET sql_mode='STRICT_TRANS_TABLES'",
}
# For use of the stage replica, use the 'deployment/gcp/ca-cert.pem' path for use in your local env file
# or pass the variable to docker-compose command; additional certs are in the deployment directory.
if connection_should_use_tls(db["HOST"]):
db["OPTIONS"]["ssl"] = {
"ca": env("TLS_CERT_PATH", default=None),
}
# Since Django 3.2, the default AutoField must be configured
DEFAULT_AUTO_FIELD = "django.db.models.AutoField"

Просмотреть файл

@ -4,4 +4,4 @@ from furl import furl
def connection_should_use_tls(url):
# Ensure use of celery workers for local development
host = furl(url).host or url # The url passed is already just the hostname.
return host not in ("127.0.0.1", "localhost", "mysql", "rabbitmq")
return host not in ("127.0.0.1", "localhost", "postgres", "rabbitmq")

Просмотреть файл

@ -5,7 +5,6 @@ from abc import ABC, abstractmethod
from datetime import timedelta, datetime
from itertools import cycle
from django.conf import settings
from django.db.backends.utils import CursorWrapper
from treeherder.model.models import Repository
@ -80,29 +79,12 @@ class MainRemovalStrategy(RemovalStrategy):
def remove(self, using: CursorWrapper):
chunk_size = self._find_ideal_chunk_size()
if settings.DATABASES["default"]["ENGINE"] == "django.db.backends.mysql":
# Django's queryset API doesn't support MySQL's
# DELETE statements with LIMIT constructs,
# even though this database is capable of doing that.
#
# If ever this support is added in Django, replace
# raw SQL bellow with equivalent queryset commands.
using.execute(
"""
DELETE FROM `performance_datum`
WHERE push_timestamp <= %s
LIMIT %s
""",
[self._max_timestamp, chunk_size],
)
else:
deleted, _ = PerformanceDatum.objects.filter(
id__in=PerformanceDatum.objects.filter(
push_timestamp__lte=self._max_timestamp
).values_list("id")[:chunk_size]
).delete()
using.rowcount = deleted
deleted, _ = PerformanceDatum.objects.filter(
id__in=PerformanceDatum.objects.filter(
push_timestamp__lte=self._max_timestamp
).values_list("id")[:chunk_size]
).delete()
using.rowcount = deleted
@property
def name(self) -> str:
@ -192,35 +174,14 @@ class TryDataRemoval(RemovalStrategy):
return "try data removal strategy"
def __attempt_remove(self, using):
if settings.DATABASES["default"]["ENGINE"] == "django.db.backends.mysql":
# Django's queryset API doesn't support MySQL's
# DELETE statements with LIMIT constructs,
# even though this database is capable of doing that.
#
# If ever this support is added in Django, replace
# raw SQL bellow with equivalent queryset commands.
total_signatures = len(self.target_signatures)
from_target_signatures = " OR ".join(["signature_id = %s"] * total_signatures)
delete_try_data = f"""
DELETE FROM `performance_datum`
WHERE repository_id = %s AND push_timestamp <= %s AND ({from_target_signatures})
LIMIT %s
"""
using.execute(
delete_try_data,
[self.try_repo, self._max_timestamp, *self.target_signatures, self._chunk_size],
)
else:
deleted, _ = PerformanceDatum.objects.filter(
id__in=PerformanceDatum.objects.filter(
repository_id=self.try_repo,
push_timestamp__lte=self._max_timestamp,
signature_id__in=self.target_signatures,
).values_list("id")[: self._chunk_size]
).delete()
using.rowcount = deleted
deleted, _ = PerformanceDatum.objects.filter(
id__in=PerformanceDatum.objects.filter(
repository_id=self.try_repo,
push_timestamp__lte=self._max_timestamp,
signature_id__in=self.target_signatures,
).values_list("id")[: self._chunk_size]
).delete()
using.rowcount = deleted
def __lookup_new_signature(self):
self.__target_signatures = self.__try_signatures[: self.SIGNATURE_BULK_SIZE]
@ -284,33 +245,12 @@ class IrrelevantDataRemoval(RemovalStrategy):
def remove(self, using: CursorWrapper):
chunk_size = self._find_ideal_chunk_size()
if settings.DATABASES["default"]["ENGINE"] == "django.db.backends.mysql":
# Django's queryset API doesn't support MySQL's
# DELETE statements with LIMIT constructs,
# even though this database is capable of doing that.
#
# If ever this support is added in Django, replace
# raw SQL bellow with equivalent queryset commands.
using.execute(
"""
DELETE FROM `performance_datum`
WHERE repository_id = %s AND push_timestamp <= %s
LIMIT %s
""",
[
self.irrelevant_repo,
self._max_timestamp,
chunk_size,
],
)
else:
deleted, _ = PerformanceDatum.objects.filter(
id__in=PerformanceDatum.objects.filter(
repository_id=self.irrelevant_repo, push_timestamp__lte=self._max_timestamp
).values_list("id")[:chunk_size]
).delete()
using.rowcount = deleted
deleted, _ = PerformanceDatum.objects.filter(
id__in=PerformanceDatum.objects.filter(
repository_id=self.irrelevant_repo, push_timestamp__lte=self._max_timestamp
).values_list("id")[:chunk_size]
).delete()
using.rowcount = deleted
def _find_ideal_chunk_size(self) -> int:
max_id_of_non_expired_row = (
@ -402,35 +342,14 @@ class StalledDataRemoval(RemovalStrategy):
return "stalled data removal strategy"
def __attempt_remove(self, using: CursorWrapper):
if settings.DATABASES["default"]["ENGINE"] == "django.db.backends.mysql":
# Django's queryset API doesn't support MySQL's
# DELETE statements with LIMIT constructs,
# even though this database is capable of doing that.
#
# If ever this support is added in Django, replace
# raw SQL bellow with equivalent queryset commands.
using.execute(
"""
DELETE FROM `performance_datum`
WHERE repository_id = %s AND signature_id = %s AND push_timestamp <= %s
LIMIT %s
""",
[
self.target_signature.repository_id,
self.target_signature.id,
self._max_timestamp,
self._chunk_size,
],
)
else:
deleted, _ = PerformanceDatum.objects.filter(
id__in=PerformanceDatum.objects.filter(
repository_id=self.target_signature.repository_id,
signature_id=self.target_signature.id,
push_timestamp__lte=self._max_timestamp,
).values_list("id")[: self._chunk_size]
).delete()
using.rowcount = deleted
deleted, _ = PerformanceDatum.objects.filter(
id__in=PerformanceDatum.objects.filter(
repository_id=self.target_signature.repository_id,
signature_id=self.target_signature.id,
push_timestamp__lte=self._max_timestamp,
).values_list("id")[: self._chunk_size]
).delete()
using.rowcount = deleted
def __lookup_new_signature(self):
try:

Просмотреть файл

@ -220,7 +220,7 @@ def get_error_search_term_and_path(error_line):
Generate a search term from the given error_line string.
Attempt to build a search term that will yield meaningful results when used
in a MySQL FTS query.
in a FTS query.
"""
if not error_line:
return None

Просмотреть файл

@ -6,72 +6,6 @@ from django.conf import settings
from django.db import migrations, models
if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql':
EXTRA_MIGRATIONS = [
# Manually created migrations.
# Since Django doesn't natively support creating FULLTEXT indices.
migrations.RunSQL(
[
# Suppress the MySQL warning "InnoDB rebuilding table to add column FTS_DOC_ID":
# https://dev.mysql.com/doc/refman/5.7/en/innodb-fulltext-index.html#innodb-fulltext-index-docid
# The table is empty when the index is added, so we don't care about it being rebuilt,
# and there isn't a better way to add the index without Django FULLTEXT support.
'SET @old_max_error_count=@@max_error_count, max_error_count=0;',
'CREATE FULLTEXT INDEX idx_summary ON bugscache (summary);',
'SET max_error_count=@old_max_error_count;',
],
reverse_sql=['ALTER TABLE bugscache DROP INDEX idx_summary;'],
state_operations=[
migrations.AddIndex(
model_name='bugscache',
index=models.Index(fields=['summary'], name='bugscache_summary_7f6b96_idx'),
)
],
),
# Since Django doesn't natively support creating composite prefix indicies for Mysql
migrations.RunSQL(
[
'CREATE INDEX failure_line_test_idx ON failure_line (test(50), subtest(25), status, expected, created);',
'CREATE INDEX failure_line_signature_test_idx ON failure_line (signature(25), test(50), created);',
],
reverse_sql=[
'DROP INDEX failure_line_test_idx ON failure_line;',
'DROP INDEX failure_line_signature_test_idx ON failure_line;',
],
state_operations=[
migrations.AlterIndexTogether(
name='failureline',
index_together=set(
[
('test', 'subtest', 'status', 'expected', 'created'),
('job_guid', 'repository'),
('signature', 'test', 'created'),
]
),
),
],
),
]
else:
# On postgres we can use standard migrations
EXTRA_MIGRATIONS = [
migrations.AlterIndexTogether(
name='failureline',
index_together=set(
[
('test', 'subtest', 'status', 'expected', 'created'),
('job_guid', 'repository'),
('signature', 'test', 'created'),
]
),
),
migrations.AddIndex(
model_name='bugscache',
index=models.Index(fields=['summary'], name='bugscache_summary_7f6b96_idx'),
),
]
class Migration(migrations.Migration):
initial = True
@ -1030,4 +964,18 @@ class Migration(migrations.Migration):
name='bugjobmap',
unique_together=set([('job', 'bug_id')]),
),
] + EXTRA_MIGRATIONS
migrations.AlterIndexTogether(
name='failureline',
index_together=set(
[
('test', 'subtest', 'status', 'expected', 'created'),
('job_guid', 'repository'),
('signature', 'test', 'created'),
]
),
),
migrations.AddIndex(
model_name='bugscache',
index=models.Index(fields=['summary'], name='bugscache_summary_7f6b96_idx'),
),
]

Просмотреть файл

@ -1,7 +1,6 @@
import datetime
import itertools
import logging
import re
import time
from hashlib import sha1
@ -10,7 +9,6 @@ import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning, module="newrelic")
import newrelic.agent
from django.conf import settings
from django.contrib.auth.models import User
from django.core.cache import cache
from django.core.exceptions import ObjectDoesNotExist
@ -233,58 +231,20 @@ class Bugscache(models.Model):
def __str__(self):
return f"{self.id}"
@classmethod
def sanitized_search_term(cls, search_term):
# MySQL Full Text Search operators, based on:
# https://dev.mysql.com/doc/refman/5.7/en/fulltext-boolean.html
# and other characters we want to remove
mysql_fts_operators_re = re.compile(r'[-+@<>()~*"\\]')
# Replace MySQL's Full Text Search Operators with spaces so searching
# for errors that have been pasted in still works.
return re.sub(mysql_fts_operators_re, " ", search_term)
@classmethod
def search(cls, search_term):
max_size = 50
if settings.DATABASES["default"]["ENGINE"] == "django.db.backends.mysql":
# Do not wrap a string in quotes to search as a phrase;
# see https://bugzilla.mozilla.org/show_bug.cgi?id=1704311
search_term_fulltext = cls.sanitized_search_term(search_term)
# Substitute escape and wildcard characters, so the search term is used
# literally in the LIKE statement.
search_term_like = (
search_term.replace("=", "==")
.replace("%", "=%")
.replace("_", "=_")
.replace('\\"', "")
)
recent_qs = cls.objects.raw(
"""
SELECT id, summary, crash_signature, keywords, resolution, status, dupe_of,
MATCH (`summary`) AGAINST (%s IN BOOLEAN MODE) AS relevance
FROM bugscache
WHERE 1
AND `summary` LIKE CONCAT ('%%%%', %s, '%%%%') ESCAPE '='
ORDER BY relevance DESC
LIMIT 0,%s
""",
[search_term_fulltext, search_term_like, max_size],
)
else:
# On PostgreSQL we can use the ORM directly, but NOT the full text search
# as the ranking algorithm expects english words, not paths
# So we use standard pattern matching AND trigram similarity to compare suite of characters
# instead of words
# Django already escapes special characters, so we do not need to handle that here
recent_qs = (
Bugscache.objects.filter(summary__icontains=search_term)
.annotate(similarity=TrigramSimilarity("summary", search_term))
.order_by("-similarity")[0:max_size]
)
# On PostgreSQL we can use the ORM directly, but NOT the full text search
# as the ranking algorithm expects english words, not paths
# So we use standard pattern matching AND trigram similarity to compare suite of characters
# instead of words
# Django already escapes special characters, so we do not need to handle that here
recent_qs = (
Bugscache.objects.filter(summary__icontains=search_term)
.annotate(similarity=TrigramSimilarity("summary", search_term))
.order_by("-similarity")[0:max_size]
)
exclude_fields = ["modified", "processed_update"]
try:

Просмотреть файл

@ -1,15 +1,9 @@
# Generated by Django 3.0.8 on 2020-12-11 14:42
from django.db import migrations
from django.conf import settings
MULTICOMMIT_CONSTRAINT_SYMBOL = 'perf_multicommitdatu_perf_datum_id_c2d7eb14_fk_performan'
if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql':
DROP_TYPE = 'FOREIGN KEY'
else:
DROP_TYPE = 'CONSTRAINT'
class Migration(migrations.Migration):
dependencies = [
@ -21,7 +15,7 @@ class Migration(migrations.Migration):
# add ON DELETE CASCADE at database level
[
f'ALTER TABLE perf_multicommitdatum '
f'DROP {DROP_TYPE} {MULTICOMMIT_CONSTRAINT_SYMBOL};',
f'DROP CONSTRAINT {MULTICOMMIT_CONSTRAINT_SYMBOL};',
f'ALTER TABLE perf_multicommitdatum '
f'ADD CONSTRAINT {MULTICOMMIT_CONSTRAINT_SYMBOL} '
f'FOREIGN KEY (perf_datum_id) REFERENCES performance_datum (ID) ON DELETE CASCADE;',
@ -29,7 +23,7 @@ class Migration(migrations.Migration):
# put back the non-CASCADE foreign key constraint
reverse_sql=[
f'ALTER TABLE perf_multicommitdatum '
f'DROP {DROP_TYPE} {MULTICOMMIT_CONSTRAINT_SYMBOL};',
f'DROP CONSTRAINT {MULTICOMMIT_CONSTRAINT_SYMBOL};',
f'ALTER TABLE perf_multicommitdatum '
f'ADD CONSTRAINT {MULTICOMMIT_CONSTRAINT_SYMBOL} '
f'FOREIGN KEY (perf_datum_id) REFERENCES performance_datum (ID);',

Просмотреть файл

@ -3,12 +3,6 @@ On large tables or production environment, it is recommanded to use an external
to update the column and fake this migration. Migration perf.0045 will restore a valid django's schema.
"""
from django.db import migrations, connection
from django.conf import settings
if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql':
QUERY = "ALTER TABLE performance_datum MODIFY COLUMN id BIGINT(20) NOT NULL AUTO_INCREMENT"
else:
QUERY = "ALTER TABLE performance_datum ALTER COLUMN id TYPE BIGINT using id::bigint"
def alter_perfdatum_pk(apps, schema_editor):
@ -24,7 +18,7 @@ def alter_perfdatum_pk(apps, schema_editor):
if pursue.lower() not in ('', 'y', 'yes'):
raise Exception("Aborting…")
with connection.cursor() as cursor:
cursor.execute(QUERY)
cursor.execute("ALTER TABLE performance_datum ALTER COLUMN id TYPE BIGINT using id::bigint")
return

Просмотреть файл

@ -1,29 +1,5 @@
from django.db import migrations, models, connection
from django.conf import settings
from django.db import migrations, models
import django.db.models.deletion
from django.db.utils import DatabaseError
def check_perfdatum_pk(apps, schema_editor):
"""Ensure performance_datum FK has been updated to bigint type"""
# Not needed on postgresql
if settings.DATABASES['default']['ENGINE'] != 'django.db.backends.mysql':
return
with connection.cursor() as cursor:
cursor.execute(
"SELECT COLUMN_TYPE from INFORMATION_SCHEMA.COLUMNS WHERE "
f"""table_schema = '{connection.settings_dict["NAME"]}' and """
"table_name = 'performance_datum' and "
"COLUMN_NAME = 'id'"
)
column_type = cursor.fetchone()
if column_type != ("bigint(20)",):
raise DatabaseError(
f"PerformanceDatum PK column type is {column_type} but should be bigint(20)"
)
class Migration(migrations.Migration):
@ -36,10 +12,6 @@ class Migration(migrations.Migration):
]
operations = [
# Ensure the PK has been updated
migrations.RunPython(
check_perfdatum_pk,
),
# Empty SQL migration that update django state schema
migrations.RunSQL(
migrations.RunSQL.noop,

Просмотреть файл

@ -2,15 +2,9 @@
It restores the DB side CASCADE deletion behavior for perf_multicommitdatum table toward performance_datum
"""
from django.db import migrations
from django.conf import settings
MULTICOMMIT_CONSTRAINT_SYMBOL = 'perf_multicommitdatu_perf_datum_id_c2d7eb14_fk_performan'
if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql':
DROP_TYPE = 'FOREIGN KEY'
else:
DROP_TYPE = 'CONSTRAINT'
class Migration(migrations.Migration):
dependencies = [
@ -22,7 +16,7 @@ class Migration(migrations.Migration):
# add ON DELETE CASCADE at database level
[
f'ALTER TABLE perf_multicommitdatum '
f'DROP {DROP_TYPE} {MULTICOMMIT_CONSTRAINT_SYMBOL};',
f'DROP CONSTRAINT {MULTICOMMIT_CONSTRAINT_SYMBOL};',
f'ALTER TABLE perf_multicommitdatum '
f'ADD CONSTRAINT {MULTICOMMIT_CONSTRAINT_SYMBOL} '
f'FOREIGN KEY (perf_datum_id) REFERENCES performance_datum (ID) ON DELETE CASCADE;',
@ -30,7 +24,7 @@ class Migration(migrations.Migration):
# put back the non-CASCADE foreign key constraint
reverse_sql=[
f'ALTER TABLE perf_multicommitdatum '
f'DROP {DROP_TYPE} {MULTICOMMIT_CONSTRAINT_SYMBOL};',
f'DROP CONSTRAINT {MULTICOMMIT_CONSTRAINT_SYMBOL};',
f'ALTER TABLE perf_multicommitdatum '
f'ADD CONSTRAINT {MULTICOMMIT_CONSTRAINT_SYMBOL} '
f'FOREIGN KEY (perf_datum_id) REFERENCES performance_datum (ID);',

Просмотреть файл

@ -1,17 +1,11 @@
# Generated by Django 3.0.8 on 2020-12-11 14:42
from django.db import migrations
from django.conf import settings
DATUM_REPLICATE_CONSTRAINT_SYMBOL = (
'performance_datum_re_performance_datum_id_fe2ed518_fk_performan'
)
if settings.DATABASES['default']['ENGINE'] == 'django.db.backends.mysql':
DROP_TYPE = 'FOREIGN KEY'
else:
DROP_TYPE = 'CONSTRAINT'
class Migration(migrations.Migration):
dependencies = [
@ -23,7 +17,7 @@ class Migration(migrations.Migration):
# add ON DELETE CASCADE at database level
[
f'ALTER TABLE performance_datum_replicate '
f'DROP {DROP_TYPE} {DATUM_REPLICATE_CONSTRAINT_SYMBOL};',
f'DROP CONSTRAINT {DATUM_REPLICATE_CONSTRAINT_SYMBOL};',
f'ALTER TABLE performance_datum_replicate '
f'ADD CONSTRAINT {DATUM_REPLICATE_CONSTRAINT_SYMBOL} '
f'FOREIGN KEY (performance_datum_id) REFERENCES performance_datum (ID) ON DELETE CASCADE;',
@ -31,7 +25,7 @@ class Migration(migrations.Migration):
# put back the non-CASCADE foreign key constraint
reverse_sql=[
f'ALTER TABLE performance_datum_replicate '
f'DROP {DROP_TYPE} {DATUM_REPLICATE_CONSTRAINT_SYMBOL};',
f'DROP CONSTRAINT {DATUM_REPLICATE_CONSTRAINT_SYMBOL};',
f'ALTER TABLE performance_datum_replicate '
f'ADD CONSTRAINT {DATUM_REPLICATE_CONSTRAINT_SYMBOL} '
f'FOREIGN KEY (performance_datum_id) REFERENCES performance_datum (ID);',

Просмотреть файл

@ -4,8 +4,7 @@ from django.db.models import Q
def get_build_failures(push):
# icontains doesn't work with mysql unless collation settings are adjusted: https://code.djangoproject.com/ticket/9682
build_types = JobType.objects.filter(Q(name__contains="Build") | Q(name__contains="build"))
build_types = JobType.objects.filter(Q(name__icontains="build"))
build_results = Job.objects.filter(
push=push,