From 59156f8674380b9e530c59609f6ac836daa19446 Mon Sep 17 00:00:00 2001 From: Kevin Meinhardt Date: Fri, 21 Jun 2024 14:22:42 +0200 Subject: [PATCH] Stability fixes for docker compose lifecycle (#22378) * Stability fixes for docker compose lifecycle * Update docs/topics/development/setup_and_configuration.md Co-authored-by: Andrew Williamson * TMP: response to comments * Update docs/topics/development/setup_and_configuration.md Co-authored-by: William Durand --------- Co-authored-by: Andrew Williamson Co-authored-by: William Durand --- Makefile-os | 30 +- docker-compose.yml | 14 +- docker/entrypoint.sh | 1 + docs/topics/development/index.rst | 1 + .../development/setup_and_configuration.md | 364 ++++++++++++++++++ scripts/setup.py | 5 + tests/make/make.spec.js | 34 +- 7 files changed, 441 insertions(+), 8 deletions(-) create mode 100644 docs/topics/development/setup_and_configuration.md diff --git a/Makefile-os b/Makefile-os index 8b96dce764..2922dfa2f6 100644 --- a/Makefile-os +++ b/Makefile-os @@ -17,6 +17,13 @@ override BACKUPS_DIR = $(shell pwd)/backups override EXPORT_DIR = $(BACKUPS_DIR)/$(shell date +%Y%m%d%H%M%S) RESTORE_DIR ?= $(BACKUPS_DIR)/$(shell ls -1 backups | sort -r | head -n 1) +# Paths should be cleaned before mounting .:/data/olympia +# These are files which should be sourced from the container +# or should be fresh on every run of the project +CLEAN_PATHS := \ + src/olympia.egg-info \ + supervisord.pid \ + version.json \ .PHONY: help_redirect help_redirect: @@ -31,6 +38,7 @@ help_submake: .PHONY: setup setup: ## create configuration files version.json and .env required to run this project + for path in $(CLEAN_PATHS); do rm -rf "$(PWD)/$$path" && echo "$$path removed"; done ./scripts/setup.py .PHONY: push_locales @@ -116,11 +124,25 @@ docker_mysqld_volume_remove: ## Remove the mysqld volume docker_compose_down: ## Stop the docker containers docker compose down --rmi local --remove-orphans --volumes -.PHONY: clean_docker -clean_docker: docker_compose_down docker_mysqld_volume_remove ## Clean up docker containers, images, caches, volumes and local cache directories. Use with caution. To restart the app run make initialize_docker after this command. +.PHONY: docker_clean_volumes +docker_clean_volumes: ## Remove dangling volumes + docker volume prune --force + +.PHONY: docker_clean_images +docker_clean_images: ## Remove dangling images + docker image prune --filter "dangling=true" --force + +.PHONY: docker_clean_build_cache +docker_clean_build_cache: ## Remove buildx build cache docker buildx prune -af + +.PHONY: clean_deps +clean_deps: ## Remove all files in the ./deps directory rm -rf ./deps/** +.PHONY: clean_docker +clean_docker: docker_compose_down docker_mysqld_volume_remove docker_clean_images docker_clean_volumes docker_clean_build_cache clean_deps ## Remove all docker resources taking space on the host machine + .PHONY: docker_compose_up docker_compose_up: docker_mysqld_volume_create ## Start the docker containers docker compose up $(DOCKER_SERVICES) -d --wait --remove-orphans --force-recreate --quiet-pull $(ARGS) @@ -141,10 +163,10 @@ docker_extract_deps: ## Extract dependencies from the docker image to a local vo docker compose run --rm --quiet-pull web make update_deps .PHONY: up -up: setup docker_mysqld_volume_create docker_compose_up ## Create and start docker compose +up: setup docker_mysqld_volume_create docker_compose_up docker_clean_images docker_clean_volumes ## Create and start docker compose .PHONY: down -down: docker_compose_down ## Stop the docker containers +down: docker_compose_down docker_clean_images docker_clean_volumes ## Stop the docker containers and clean up non-peristent dangling resources .PHONY: initialize_docker initialize_docker: up diff --git a/docker-compose.yml b/docker-compose.yml index e33247ab9e..6a50cd1bee 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -26,6 +26,8 @@ services: worker: &worker <<: *env image: ${DOCKER_TAG:-} + # Ignore any linting saying we have an invalid value. + pull_policy: ${DOCKER_PULL_POLICY:-} build: context: . dockerfile: Dockerfile @@ -43,6 +45,9 @@ services: - supervisord -n -c /data/olympia/docker/supervisor-celery.conf volumes: - .:/data/olympia + # Exclude contents in this directory from mounting + # Otherwise our `olympia` installation will be invalid + # Causing failures or requiring re-installation of dependencies - /data/olympia/src/olympia.egg-info - storage:/data/olympia/storage extra_hosts: @@ -56,9 +61,12 @@ services: "CMD-SHELL", "if [ -f /data/olympia/supervisord.pid ]; then echo 'File exists'; else echo 'File not found'; exit 1; fi" ] - interval: 5s - timeout: 10s - retries: 30 + # Checking every 3 seconds for 100 tries is 5 minutes + # With a timeout of 45 seconds gives a total of 5 minutes 45 seconds + # For entrypoint.sh to complete + interval: 3s + timeout: 45s + retries: 100 web: <<: *worker diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh index b966e082d2..e42bd3e7ce 100755 --- a/docker/entrypoint.sh +++ b/docker/entrypoint.sh @@ -23,6 +23,7 @@ if [[ -n "${HOST_UID:-}" ]]; then echo "${OLYMPIA_USER} UID: ${OLYMPIA_UID} -> ${HOST_UID}" # Ensure the olympia user has access to the /deps directory + echo "Updating file ownership for ${OLYMPIA_USER}" time chown -R "$(get_olympia_uid):$(get_olympia_gid)" /deps fi diff --git a/docs/topics/development/index.rst b/docs/topics/development/index.rst index d3a427c06a..fb5860e2f5 100644 --- a/docs/topics/development/index.rst +++ b/docs/topics/development/index.rst @@ -5,6 +5,7 @@ Development .. toctree:: :maxdepth: 2 + setup_and_configuration tests debugging dependencies diff --git a/docs/topics/development/setup_and_configuration.md b/docs/topics/development/setup_and_configuration.md new file mode 100644 index 0000000000..c54c967bc4 --- /dev/null +++ b/docs/topics/development/setup_and_configuration.md @@ -0,0 +1,364 @@ +# Setup and Configuration + +This section covers how to run `addons-server` both locally and in CI environments. This should be where you start +if you are running `addons-server` for the first time. + +## Local Development Environment + +Setting up the local development environment for **addons-server** involves configuring Docker Compose to run the necessary services. Follow these steps to get started: + +### Prerequisites + +- Ensure Docker and Docker Compose are installed on your system. +- Clone the **addons-server** repository from GitHub: + + ```sh + git clone https://github.com/mozilla/addons-server + cd addons-server + ``` + +(running-for-the-first-time)= +### Running for the first time + +When running the project for the first time, execute: + +```sh +make initialize_docker +``` + +This command will run: + +- `make up` to start the Docker containers. +- `make initialize` to set up the initial Docker environment, including database initialization and data population. +Detailed steps for `make initialize` will be covered in Section 6 on Data Management. + +If you run `make up` without running `make initialize` the docker compose services will be running, but you will not have a database +and the app might crash or otherwise be unusable. + +Similarly, you can run `make initialize` even after you have an up and running environment, but this will totally reset your database +as if you were running the application fresh. + +### Updating your environment + +> TLDR; Just run `make up`. + +The `make up` command ensures all necessary files are created on the host and starts the Docker Compose project, +including volumes, containers, and networks. It is meant to be run frequently whenever you want to bring your environment "up". + +Here’s a high-level overview of what `make up` does: + +```yaml +up: setup docker_mysqld_volume_create docker_extract_deps docker_compose_up +``` + +- **setup**: Creates configuration files such as `.env`. +- **docker_mysqld_volume_create**: Ensures the MySQL volume is created. +- **docker_extract_deps**: Installs dependencies inside the Docker container. +- **docker_compose_up**: Starts the Docker containers defined in [docker-compose.yml][docker-compose]. + +What happens if you run `make up` when your environment is already running? +This will result in all services and volumes being recreated as if starting them for the first time, +and will clear any local state from the containers. The `make up` command is {ref}`idempotent ` so you can run it over and over. + +### Shutting down your environment + +> TLDR; just run `make down` + +The `make down` command does almost the complete opposite of `make up`. +It stops all docker services and removes locally built images and any used volumes. + +Running `make down` will free up resources on your machine and can help if your environment gets stuck in a difficult to debug state. + +A common solution to many problems is to run `make down && make up`. + +### Accessing the Development App + +- Add the following entry to your `/etc/hosts` file to access **addons-server** via a local domain: + + ```sh + 127.0.0.1 olympia.test + ``` + +- The web application should now be accessible at `http://olympia.test`. +- You can access the web container for debugging and development: + + ```sh + make shell + ``` + +- To access the Django shell within the container: + + ```sh + make djshell + ``` + +## Configuring your environment + +Addons-server runs via docker-compose and can be run in a local environment or on CI. It is highly configurable to meet +the requirements for different environments and use cases. Here are some practical ways you can configure how `addons-server` runs. + +### Build vs Pull + +By default, `addons-server` builds a [docker image](./docker.md) tagged `local` before running the containers as a part of `make up`. +To run `addons-server` with the `local` image, just run `make up` like you normally would. It is the default. + +Instead of building, you can configure your environment to run a pulled image instead. To run a pulled image, +specify a {ref}`version or digest ` when calling `make up`. E.g `make up DOCKER_VERSION=latest` to run +the latest published version of `addons-server`. + +For typical development it is recommended to use the default built image. It is aggresively cached and most closely +reflects the current state of your local repository. Pulling a published image can be useful if you have limited CPU +or if you want to run a very specific version of addons-server for testing a Pull request +or debugging a currently deployed version. + +(version-vs-digest)= +### Version vs Digest + +The default behavior is to build the docker image locally, but if you want to run addons-server with a remote image +you can specify a docker image version to pull with: + +```bash +make up DOCKER_VERSION= +``` + +Version is the published tag of addons-server and corresponds to `mozilla/addons-server:`in [dockerhub][addons-server-tags]. + +Specify a version will configure docker compose to set the [pull policy] to `always` and specify the `image` property +in the docker compose config to pull the latest build of the specified `version`. Once, you've specified a version +subsequent calls to `make up` will pull the same version consistently {ref}`see idempotence ` for more details. + +What if you want to run an exact build of `addons-server`, +without fetching later versions that might subsequently get published to the same tag? + +You can specify a `DOCKER_DIGEST` to pull a specific build of addons-server. This can be very useful if you want +to guarantee the exact state of the image you are running. This is used in our own CI environments to ensure each job +runs with the exact same image built in the run. + +```bash +make up DOCKER_DIGEST=sha256@abc123 +``` + +A docker [build digest][docker-image-digest] corresponds to the precies state of a docker image. +Think of it like a content hash, though it's a bit more complicated than that. +Specifying a build digest means you will always run the exact same version +of the image and it will not change the contents of the image. + +Our [CI][ci-workflow] workflow builds and pushes a docker image on each run. To run the exact image built during a CI run, +copy the image digest from the `build` job logs. Look for a log line like this: + +```shell +#36 pushing manifest for docker.io/mozilla/addons-server:pr-22395-ci@sha256:8464804ed645e429ccb3585a50c6003fafd81bd43407d8d4ab575adb8391537d +``` + +The version for the above image is `pr-22395-ci` and the digest is `sha256:8464804ed645e429ccb3585a50c6003fafd81bd43407d8d4ab575adb8391537d`. +To run the specific build of the exact run for `pr-22395` you would run: + +```bash + make up DOCKER_VERSION=pr-22395-ci +``` + +And to run, exactly the version built in this run, even if it is not the latest version, you would run: + +```bash + make up DOCKER_DIGEST=sha256:8464804ed645e429ccb3585a50c6003fafd81bd43407d8d4ab575adb8391537d +``` + +If you specify both a version and digest, digest as the more specific attribute takes precedence. + +(idempotence)= +### Idempotence + +The `make up` command and all of its sub-commands are idempotent. +That means if the command is repeated with the same inputs you will always get the same result. +If you run + +```bash + make up DOCKER_VERSION=banana +``` + +and then run make up again, the .env file will have a docker tag specifying the version `banana`. +This prevents you from needing to constantly specify parameters over and over. +But it also means you have to remember what values you have set for different properties as they can have huge +impacts on what is actually running in your environment. + +`make up` logs the current environment specifications to the terminal as it is running so you should always know +what exactly is happening in your environment at any given time. + +Additionally, by defining all of the critical docker compose variables in a .env file, it means that the behaviour +of running commands via `make` or running the same command directly via the docker CLI should produce the same result. + +Though it is **highly recommended to use the make commands** instead of directly calling docker in your terminal. + +## Continuous Integration Environment + +The **addons-server** project uses GitHub Actions to automate testing and building processes in the CI environment. Here’s an overview of the existing CI workflows and their architecture: + +1. **Existing Workflows**: + - The CI pipeline is defined in the `.github/workflows` directory. The main workflow file, typically named `ci.yml`, orchestrates the build and test processes for the project. + +2. **Reusable Actions**: + The project leverages reusable actions: + - [build-docker](../../../.github/actions/build-docker/action.yml) + - [run-docker](../../../.github/actions/run-docker/action.yml) + + These actions simplify the workflow definitions and ensure consistency across different jobs. + +3. **Workflow Example**: + - A typical workflow file includes steps such as checking out the repository, setting up Docker Buildx, building the Docker image, and running the tests: + + ```yaml + name: CI + on: [push, pull_request] + jobs: + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + + - name: Build Docker Image + uses: ./.github/actions/build-docker + - name: Run Docker Container + uses: ./.github/actions/run-docker + ``` + + It is important to note, reusable actions cannot checkout code, so code is always checked out on the workflow. + +4. **Docker Compose Files**: + - **[docker-compose.yml][docker-compose]**: The primary Docker Compose file defining services, networks, and volumes for local and CI environments. + - **[docker-compose.ci.yml][docker-compose-ci]**: Overrides certain configurations for CI-specific needs, ensuring the environment is optimized for automated testing and builds. + - **[docker-compose.deps.yml][docker-compose-deps]**: Attaches a mount at ./deps to /deps in the container, exposing the contents to the host + - **[docker-compose.private.yml][docker-compose-private]**: Runs addons-server with the `customs` service that is only avaiable to Mozilla employees + +Our docker compose files rely on substituted values, all of which are included in our .env file for direct CLI compatibility. +Any referenced `${VARIABLE}` in the docker-compose files will be replaced with the value from the .env file. We have tests +that ensure any references are included in the .env file with valid values. + +This means when you run `make docker_compose_up`, the output on your machine will be exactly the same is if you ran +`docker compose up -d --wait --remove-orphans --force-recreate --quiet-pull` directly. You **should** use make commands, +but sometimes you need to debug further what a command is running on the terminal and this architecture allows you to do that. + +By following these steps, you can set up your local development environment and understand the existing CI workflows for the **addons-server** project. For more details on specific commands and configurations, refer to the upcoming sections in this documentation. + +## Gotchas + +Here's a list of a few of the issues you might face when setting up your development environment + +### Can't access the web server? + +Check you've created a hosts file entry pointing `olympia.test` to the relevant IP address. + +If containers are failing to start use `docker compose ps` to check their running status. + +Another way to find out what's wrong is to run `docker compose logs`. + +### Getting "Programming error [table] doesn't exist"? + +Make sure you've run the `make initialize_docker` step as {ref}`detailed ` in the initial setup instructions. + +### ConnectionError during initialize (elasticsearch container fails to start) + +When running `make initialize_docker` without a working elasticsearch container, you'll get a ConnectionError. Check the logs with `docker compose logs`. If elasticsearch is complaining about `vm.max_map_count`, run this command on your computer or your docker-machine VM: + +```sh + sudo sysctl -w vm.max_map_count=262144 +``` + +This allows processes to allocate more [memory map areas](https://stackoverflow.com/a/11685165/4496684). + +### Connection to elasticsearch timed out (elasticsearch container exits with code 137) + +`docker compose up -d` brings up all containers, but running `make initialize_docker` causes the elasticsearch container to go down. Running `docker compose ps` shows `Exited (137)` against it. + +Update default settings in Docker Desktop - we suggest increasing RAM limit to at least 4 GB in the Resources/Advanced section and click on "Apply and Restart". + +### Port collisions (nginx container fails to start) + +If you're already running a service on port 80 or 8000 on your host machine, the `nginx` container will fail to start. This is because the `docker-compose.override.yml` file tells `nginx` to listen on port 80 and the web service to listen on port 8000 by default. + +This problem will manifest itself by the services failing to start. Here's an example for the most common case of `nginx` not starting due to a collision on port 80: + +```shell + ERROR: for nginx Cannot start service nginx:..... + ...Error starting userland proxy: Bind for 0.0.0.0:80: unexpected error (Failure EADDRINUSE) + ERROR: Encountered errors while bringing up the project. +``` + +You can check what's running on that port by using (sudo is required if you're looking at port < 1024): + +```sh + sudo lsof -i :80 +``` + +We specify the ports `nginx` listens on in the `docker-compose.override.yml` file. If you wish to override the ports you can do so by creating a new `docker-compose` config and starting the containers using that config alongside the default config. + +For example, if you create a file called `docker-compose-ports.yml`: + +```yaml + nginx: + ports: + - 8880:80 +``` + +Next, you would stop and start the containers with the following: + +```sh + docker compose stop # only needed if running + docker compose -f docker-compose.yml -f docker-compose-ports.yml up -d +``` + +Now the container `nginx` is listening on 8880 on the host. You can now proxy to the container `nginx` from the host `nginx` with the following `nginx` config: + +```nginx + server { + listen 80; + server_name olympia.test; + location / { + proxy_pass http://olympia.test:8880; + } + } +``` + +### returned Internal Server Error for API route and version + +This can occur if the docker daemon has crashed. Running docker commands will return errors as the CLI cannot communicate +with the daemon. The best thing to do is to restart docker and to check your docker memory usage. The most likely cause +is limited memory. You can check the make commands to see how you can free up space on your machine. + +```bash + docker volume create addons-server_data_mysqld + request returned Internal Server Error for API route and version http://%2FUsers%2Fwilliam%2F.docker%2Frun%2Fdocker.sock/v1.45/volumes/create, check if the server supports the requested API version + make: *** [docker_mysqld_volume_create] Error 1 +``` + +### Mysqld failing to start + +Our MYSQLD service relies on a persistent data volume in order to save the database even after containers are removed. +It is possible that the volume is in an incorrect state during startup which can lead to erros like the following: + +```bash + mysqld-1 | 2024-06-14T13:50:33.169411Z 0 [ERROR] [MY-010457] [Server] --initialize specified but the data directory has files in it. Aborting. + mysqld-1 | 2024-06-14T13:50:33.169416Z 0 [ERROR] [MY-013236] [Server] The designated data directory /var/lib/mysql/ is unusable. You can remove all files that the server added to it. +``` + +The best way around this is to `make down && make up` This will prune volumes and restart addons-server. + +### stat /Users/kmeinhardt/src/mozilla/addons-server/env: no such file or directory + +If you ran into this issue, it is likely due to an invalid .env likely created via running tests for our makefile +and docker-comose.yml file locally. + +```bash + docker compose up -d --wait --remove-orphans --force-recreate --quiet-pull + stat /Users/kmeinhardt/src/mozilla/addons-server/env: no such file or directory + make: *** [docker_compose_up] Error 14 +``` + +To fix this error `rm -f .env` to remove your .env and `make up` to restart the containers. + +[docker-compose]: ../../../docker-compose.yml +[docker-compose-ci]: ../../../docker-compose.ci.yml +[docker-compose-deps]: ../../../docker-compose.deps.yml +[docker-compose-private]: ../../../docker-compose.private.yml +[docker-image-digest]: https://github.com/opencontainers/.github/blob/main/docs/docs/introduction/digests.md +[addons-server-tags]: https://hub.docker.com/r/mozilla/addons-server/tags +[ci-workflow]: https://github.com/mozilla/addons-server/actions/workflows/ci.yml diff --git a/scripts/setup.py b/scripts/setup.py index d5c9cc8349..87e6f2c6ce 100755 --- a/scripts/setup.py +++ b/scripts/setup.py @@ -87,11 +87,16 @@ def get_docker_tag(): docker_tag, docker_version, docker_digest = get_docker_tag() +# set pull_policy of web/worker containers based on the specified tag +# for digest or non `local` versions, we should avoid building and pull aggressively +docker_pull_policy = 'always' if docker_digest or docker_version != 'local' else 'build' + set_env_file( { 'COMPOSE_FILE': get_value('COMPOSE_FILE', ('docker-compose.yml')), 'DOCKER_TAG': docker_tag, 'DOCKER_TARGET': get_value('DOCKER_TARGET', 'development'), + 'DOCKER_PULL_POLICY': docker_pull_policy, 'HOST_UID': get_value('HOST_UID', os.getuid()), } ) diff --git a/tests/make/make.spec.js b/tests/make/make.spec.js index 44f24bcf01..fd86df218c 100644 --- a/tests/make/make.spec.js +++ b/tests/make/make.spec.js @@ -161,8 +161,40 @@ describe.each(testCases)('.env file', ({ name, file, env, expected }) => { }); }); +describe.each([ + { + version: 'local', + digest: undefined, + expected: 'build', + }, + { + version: 'local', + digest: 'sha256:123', + expected: 'always', + }, + { + version: 'latest', + digest: undefined, + expected: 'always', + }, +])('DOCKER_PULL_POLICY', ({ version, digest, expected }) => { + it(`is set to ${expected} when version is ${version} and digest is ${digest}`, () => { + fs.writeFileSync(envPath, ''); + runSetup({ + DOCKER_VERSION: version, + DOCKER_DIGEST: digest, + }); + + const actual = readEnvFile('DOCKER_PULL_POLICY'); + expect(actual).toStrictEqual(expected); + }); +}); + const testedKeys = new Set(testCases.map(({ name }) => name)); +// Keys testsed outside the scope of testCases +const skippedKeys = ['DOCKER_PULL_POLICY']; + test('All dynamic properties in any docker compose file are referenced in the test', () => { const composeFiles = globSync('docker-compose*.yml', { cwd: rootPath }); const variableDefinitions = []; @@ -177,7 +209,7 @@ test('All dynamic properties in any docker compose file are referenced in the te let match; while ((match = regex.exec(line)) !== null) { const variable = match[1]; - variableDefinitions.push(variable); + if (!skippedKeys.includes(variable)) variableDefinitions.push(variable); } } }