This commit is contained in:
Yuge Zhang 2022-07-08 11:13:41 +08:00 коммит произвёл GitHub
Родитель 570448eab8
Коммит 4e71ed6211
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
23 изменённых файлов: 279 добавлений и 87 удалений

Просмотреть файл

@ -171,7 +171,8 @@ def load_training_service_config(config) -> TrainingServiceConfig:
cls = _get_ts_config_class(config['platform'])
if cls is not None:
return cls(**config)
return config # not valid json, don't touch
# not valid json, don't touch
return config # type: ignore
def _get_ts_config_class(platform: str) -> type[TrainingServiceConfig] | None:
from ..training_service import TrainingServiceConfig # avoid circular import

Просмотреть файл

@ -10,6 +10,7 @@ import string
from typing import Any, Dict, Iterable, List
from nni.experiment import rest
from nni.retiarii.integration import RetiariiAdvisor
from .interface import AbstractExecutionEngine, AbstractGraphListener
from .utils import get_mutation_summary
@ -75,20 +76,21 @@ class BaseExecutionEngine(AbstractExecutionEngine):
self.url_prefix = rest_url_prefix
self._listeners: List[AbstractGraphListener] = []
# register advisor callbacks
advisor = get_advisor()
advisor.send_trial_callback = self._send_trial_callback
advisor.request_trial_jobs_callback = self._request_trial_jobs_callback
advisor.trial_end_callback = self._trial_end_callback
advisor.intermediate_metric_callback = self._intermediate_metric_callback
advisor.final_metric_callback = self._final_metric_callback
self._running_models: Dict[int, Model] = dict()
self._history: List[Model] = []
self.resources = 0
# register advisor callbacks
advisor: RetiariiAdvisor = get_advisor()
advisor.register_callbacks({
'send_trial': self._send_trial_callback,
'request_trial_jobs': self._request_trial_jobs_callback,
'trial_end': self._trial_end_callback,
'intermediate_metric': self._intermediate_metric_callback,
'final_metric': self._final_metric_callback
})
def submit_models(self, *models: Model) -> None:
for model in models:
data = self.pack_model_data(model)

Просмотреть файл

@ -14,6 +14,7 @@ from dataclasses import dataclass
from nni.common.device import GPUDevice, Device
from nni.experiment.config.training_services import RemoteConfig
from nni.retiarii.integration import RetiariiAdvisor
from .interface import AbstractExecutionEngine, AbstractGraphListener, WorkerInfo
from .. import codegen, utils
from ..graph import Model, ModelStatus, MetricData, Node
@ -28,6 +29,10 @@ from .base import BaseGraphData
_logger = logging.getLogger(__name__)
def _noop(*args, **kwargs):
pass
@dataclass
class TrialSubmission:
model: Model
@ -90,12 +95,14 @@ class CGOExecutionEngine(AbstractExecutionEngine):
self._queue_lock = threading.Lock()
# register advisor callbacks
advisor = get_advisor()
# advisor.send_trial_callback = self._send_trial_callback
# advisor.request_trial_jobs_callback = self._request_trial_jobs_callback
advisor.trial_end_callback = self._trial_end_callback
advisor.intermediate_metric_callback = self._intermediate_metric_callback
advisor.final_metric_callback = self._final_metric_callback
advisor: RetiariiAdvisor = get_advisor()
advisor.register_callbacks({
'send_trial': _noop,
'request_trial_jobs': _noop,
'trial_end': self._trial_end_callback,
'intermediate_metric': self._intermediate_metric_callback,
'final_metric': self._final_metric_callback
})
self._stopped = False
self._consumer_thread = threading.Thread(target=self._consume_models)

Просмотреть файл

@ -3,7 +3,7 @@
import logging
import os
from typing import Any, Callable, Optional
from typing import Any, Callable, Optional, Dict, List, Tuple
import nni
from nni.common.serializer import PayloadTooLarge
@ -21,6 +21,7 @@ _logger = logging.getLogger(__name__)
class RetiariiAdvisor(MsgDispatcherBase):
"""
The class is to connect Retiarii components to NNI backend.
It can be considered as a Python wrapper of NNI manager.
It will function as the main thread when running a Retiarii experiment through NNI.
Strategy will be launched as its thread, who will call APIs in execution engine. Execution
@ -32,9 +33,6 @@ class RetiariiAdvisor(MsgDispatcherBase):
The conversion advisor provides are minimum. It is only a send/receive module, and execution engine
needs to handle all the rest.
FIXME
How does advisor exit when strategy exists?
Attributes
----------
send_trial_callback
@ -61,6 +59,63 @@ class RetiariiAdvisor(MsgDispatcherBase):
self.parameters_count = 0
# Sometimes messages arrive first before the callbacks get registered.
# Or in case that we allow engine to be absent during the experiment.
# Here we need to store the messages and invoke them later.
self.call_queue: List[Tuple[str, list]] = []
def register_callbacks(self, callbacks: Dict[str, Callable[..., None]]):
"""
Register callbacks for NNI backend.
Parameters
----------
callbacks
A dictionary of callbacks.
The key is the name of the callback. The value is the callback function.
"""
self.send_trial_callback = callbacks.get('send_trial')
self.request_trial_jobs_callback = callbacks.get('request_trial_jobs')
self.trial_end_callback = callbacks.get('trial_end')
self.intermediate_metric_callback = callbacks.get('intermediate_metric')
self.final_metric_callback = callbacks.get('final_metric')
self.process_queued_callbacks()
def process_queued_callbacks(self) -> None:
"""
Process callbacks in queue.
Consume the messages that haven't been handled previously.
"""
processed_idx = []
for queue_idx, (call_name, call_args) in enumerate(self.call_queue):
if call_name == 'send_trial' and self.send_trial_callback is not None:
self.send_trial_callback(*call_args) # pylint: disable=not-callable
processed_idx.append(queue_idx)
if call_name == 'request_trial_jobs' and self.request_trial_jobs_callback is not None:
self.request_trial_jobs_callback(*call_args) # pylint: disable=not-callable
processed_idx.append(queue_idx)
if call_name == 'trial_end' and self.trial_end_callback is not None:
self.trial_end_callback(*call_args) # pylint: disable=not-callable
processed_idx.append(queue_idx)
if call_name == 'intermediate_metric' and self.intermediate_metric_callback is not None:
self.intermediate_metric_callback(*call_args) # pylint: disable=not-callable
processed_idx.append(queue_idx)
if call_name == 'final_metric' and self.final_metric_callback is not None:
self.final_metric_callback(*call_args) # pylint: disable=not-callable
processed_idx.append(queue_idx)
# Remove processed messages
for idx in reversed(processed_idx):
self.call_queue.pop(idx)
def invoke_callback(self, name: str, *args: Any) -> None:
"""
Invoke callback.
"""
self.call_queue.append((name, list(args)))
self.process_queued_callbacks()
def handle_initialize(self, data):
"""callback for initializing the advisor
Parameters
@ -140,8 +195,7 @@ class RetiariiAdvisor(MsgDispatcherBase):
# nevertheless, there could still be blocked by pipe / nni-manager
self.send(CommandType.NewTrialJob, send_payload)
if self.send_trial_callback is not None:
self.send_trial_callback(parameters) # pylint: disable=not-callable
self.invoke_callback('send_trial', parameters)
return self.parameters_count
def mark_experiment_as_ending(self):
@ -149,8 +203,7 @@ class RetiariiAdvisor(MsgDispatcherBase):
def handle_request_trial_jobs(self, num_trials):
_logger.debug('Request trial jobs: %s', num_trials)
if self.request_trial_jobs_callback is not None:
self.request_trial_jobs_callback(num_trials) # pylint: disable=not-callable
self.invoke_callback('request_trial_jobs', num_trials)
def handle_update_search_space(self, data):
_logger.debug('Received search space: %s', data)
@ -158,22 +211,16 @@ class RetiariiAdvisor(MsgDispatcherBase):
def handle_trial_end(self, data):
_logger.debug('Trial end: %s', data)
if self.trial_end_callback is not None:
self.trial_end_callback(nni.load(data['hyper_params'])['parameter_id'], # pylint: disable=not-callable
data['event'] == 'SUCCEEDED')
self.invoke_callback('trial_end', nni.load(data['hyper_params'])['parameter_id'], data['event'] == 'SUCCEEDED')
def handle_report_metric_data(self, data):
_logger.debug('Metric reported: %s', data)
if data['type'] == MetricType.REQUEST_PARAMETER:
raise ValueError('Request parameter not supported')
elif data['type'] == MetricType.PERIODICAL:
if self.intermediate_metric_callback is not None:
self.intermediate_metric_callback(data['parameter_id'], # pylint: disable=not-callable
self._process_value(data['value']))
self.invoke_callback('intermediate_metric', data['parameter_id'], self._process_value(data['value']))
elif data['type'] == MetricType.FINAL:
if self.final_metric_callback is not None:
self.final_metric_callback(data['parameter_id'], # pylint: disable=not-callable
self._process_value(data['value']))
self.invoke_callback('final_metric', data['parameter_id'], self._process_value(data['value']))
@staticmethod
def _process_value(value) -> Any: # hopefully a float

Просмотреть файл

@ -127,9 +127,11 @@ class Random(BaseStrategy):
if budget_exhausted():
return
time.sleep(self._polling_interval)
_logger.debug('Still waiting for resource.')
try:
model = get_targeted_model(base_model, applied_mutators, sample)
if filter_model(self.filter, model):
_logger.debug('Submitting model: %s', model)
submit_models(model)
except InvalidMutation as e:
_logger.warning(f'Invalid mutation: {e}. Skip.')

Просмотреть файл

@ -15,14 +15,19 @@ def main(argv):
metrics_output_dir = os.environ['METRIC_OUTPUT_DIR']
cmd = 'nvidia-smi -q -x'.split()
while(True):
try:
smi_output = subprocess.check_output(cmd)
except Exception:
traceback.print_exc()
retry = 0
while True:
smi = subprocess.run(cmd, timeout=20, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if smi.returncode != 0:
retry += 1
print(f'gpu_metrics_collector error: nvidia-smi return code is {smi.returncode}', file=sys.stderr)
print('=' * 20 + f'\nCaptured stdout: {smi.stdout}', file=sys.stderr)
print('=' * 20 + f'\nCaptured stderr: {smi.stderr}', file=sys.stderr)
gen_empty_gpu_metric(metrics_output_dir)
break
parse_nvidia_smi_result(smi_output, metrics_output_dir)
if retry >= 5:
break
else:
parse_nvidia_smi_result(smi.stdout, metrics_output_dir)
# TODO: change to sleep time configurable via arguments
time.sleep(5)

Просмотреть файл

@ -1,3 +1,5 @@
# FIXME: This pipeline is broken due to resource group location limitation.
trigger: none
pr: none
@ -11,6 +13,7 @@ variables:
jobs:
- job: linux
pool: nni-it
pool:
vmImage: ubuntu-latest
steps:
- template: templates/build-vm-image-template.yml

Просмотреть файл

@ -1,3 +1,5 @@
# FIXME: This pipeline is broken due to resource group location limitation.
trigger: none
pr: none
@ -11,7 +13,7 @@ variables:
jobs:
- job: windows
pool: nni-it
pool: nni-it-1es-11
timeoutInMinutes: 90
steps:
- template: templates/build-vm-image-template.yml

Просмотреть файл

@ -31,15 +31,18 @@ stages:
condition: and(succeeded(), ne(dependencies.filter.outputs['check.execution.skipsubsequent'], 'true'))
jobs:
- job: linux
# move back after we complete the 1ES pool...
pool:
vmImage: ubuntu-latest
pool: nni-it-1es-11
timeoutInMinutes: 60
steps:
- template: templates/fix-apt-1es.yml
parameters:
check_gpu: true
- template: templates/install-dependencies.yml
parameters:
platform: ubuntu-latest
platform: ubuntu-latest-gpu
python_env: venv
- template: templates/install-nni.yml
@ -48,10 +51,9 @@ stages:
- script: |
cd test/algo
python -m pytest compression
displayName: compression unit test
displayName: Compression unit test
# add back after we complete the 1ES pool...
# - script: |
# cd test
# source scripts/model_compression.sh
# displayName: Model compression test
- script: |
cd test
source scripts/model_compression.sh
displayName: Model compression test

Просмотреть файл

@ -31,15 +31,18 @@ stages:
condition: and(succeeded(), ne(dependencies.filter.outputs['check.execution.skipsubsequent'], 'true'))
jobs:
- job: linux
# move back after we complete the 1ES pool...
pool:
vmImage: ubuntu-latest
pool: nni-it-1es-11
timeoutInMinutes: 60
steps:
- template: templates/fix-apt-1es.yml
parameters:
check_gpu: true
- template: templates/install-dependencies.yml
parameters:
platform: ubuntu-latest
platform: ubuntu-latest-gpu
python_env: venv
- template: templates/install-nni.yml
@ -57,10 +60,7 @@ stages:
- script: |
cd test
python training_service/nnitest/run_tests.py \
--config training_service/config/integration_tests.yml \
--ts local \
--exclude mnist-pytorch-local-gpu
python training_service/nnitest/run_tests.py --config training_service/config/integration_tests.yml --ts local
displayName: Integration test
# TODO: should add a test on platforms other than linux

Просмотреть файл

@ -31,15 +31,18 @@ stages:
condition: and(succeeded(), ne(dependencies.filter.outputs['check.execution.skipsubsequent'], 'true'))
jobs:
- job: linux
# move back after we complete the 1ES pool...
pool:
vmImage: ubuntu-latest
pool: nni-it-1es-11
timeoutInMinutes: 60
steps:
- template: templates/fix-apt-1es.yml
parameters:
check_gpu: true
- template: templates/install-dependencies.yml
parameters:
platform: ubuntu-latest
platform: ubuntu-latest-gpu
python_env: venv
- template: templates/install-nni.yml
@ -51,15 +54,17 @@ stages:
displayName: NAS test
- job: windows
# move back after we complete the 1ES pool...
pool:
vmImage: windows-latest
pool: nni-it-1es-windows
timeoutInMinutes: 60
steps:
# FIXME: Windows should use GPU,
# but it's not used now since driver is not installed in the image.
- template: templates/install-dependencies.yml
parameters:
platform: windows
python_env: noop
- template: templates/install-nni.yml
parameters:

Просмотреть файл

@ -7,11 +7,12 @@ schedules:
jobs:
- job: hybrid
pool: nni-it
pool: nni-it-1es-11
timeoutInMinutes: 90
steps:
# FIXME: should use GPU here
- template: templates/fix-apt-1es.yml
- template: templates/install-dependencies.yml
parameters:

Просмотреть файл

@ -7,10 +7,14 @@ schedules:
jobs:
- job: linux
pool: nni-it
pool: nni-it-1es-11
timeoutInMinutes: 60
steps:
- template: templates/fix-apt-1es.yml
parameters:
check_gpu: true
- template: templates/install-dependencies.yml
parameters:
platform: ubuntu-latest-gpu

Просмотреть файл

@ -7,7 +7,7 @@ schedules:
jobs:
- job: windows
pool: nni-it-windows
pool: nni-it-1es-windows
timeoutInMinutes: 120
steps:
@ -43,3 +43,5 @@ jobs:
displayName: Integration test
- template: templates/save-crashed-info.yml
parameters:
training_service: local

Просмотреть файл

@ -12,10 +12,11 @@ schedules:
jobs:
- job: remote_linux2linux
pool: nni-it
pool: nni-it-1es-11
timeoutInMinutes: 120
steps:
- template: templates/fix-apt-1es.yml
# FIXME: GPU is not supported yet.
# Change to ubuntu-latest-gpu when it's done.
@ -97,4 +98,4 @@ jobs:
- template: templates/save-crashed-info.yml
parameters:
remote: true
training_service: remote

Просмотреть файл

@ -11,7 +11,7 @@ variables:
jobs:
- job: remote_windows2windows
pool: nni-it-windows
pool: nni-it-1es-windows
timeoutInMinutes: 120
steps:
@ -49,4 +49,4 @@ jobs:
- template: templates/save-crashed-info.yml
parameters:
remote: true
training_service: remote

Просмотреть файл

@ -8,8 +8,11 @@ steps:
# 1. Assign the role following the instruction.
# 2. Assign contributor role of the resource group to the identity.
# 3. Add the identity to VMSS.
#
# Update 2022/7 (running on Microsoft-hosted agents).
# Use a service principal. This service principal must be assigned contributor access to the resource group.
- script: |
az login --identity --allow-no-subscriptions --username $(identity_id)
az login --service-principal -u $(client_id) -p $(client_secret) --tenant $(tenant_id)
displayName: Login to Azure
# Make sure all these are registered.
@ -65,7 +68,8 @@ steps:
export IP_ADDRESS=$(curl -s ifconfig.me)
export VERSION=$(date "+%Y").$(date "+%m%d").$(date "+%H%M%S")
export CONFIG_PATH=$(packer_config).json
sed -i -e "s/<client_id>/$(identity_id)/g" $CONFIG_PATH
sed -i -e "s/<client_id>/$(client_id)/g" $CONFIG_PATH
sed -i -e "s/<client_secret>/$(client_secret)/g" $CONFIG_PATH
sed -i -e "s/<subscription_id>/$(subscription_id)/g" $CONFIG_PATH
sed -i -e "s/<managed_image_name>/$(managed_image_name)/g" $CONFIG_PATH
sed -i -e "s/<resource_group>/$(resource_group)/g" $CONFIG_PATH
@ -113,3 +117,6 @@ steps:
# az vmss update -n nni-it -g nni --set virtualMachineProfile.storageProfile.osDisk.diskSizeGb=50
#
# No need to update the image every time, because it's already set to latest.
#
# NOTE: After using 1ES pool, the pool image has to be updated manually to the latest version.
# However, no successful build has been performed yet, because of resource shortage in Southeast Asia.

Просмотреть файл

@ -0,0 +1,37 @@
# Fix apt-related issues on 1ES linux pipeline.
# 1ES has an auto-upgraded with apt-get running in the background, periodically.
# This leads to bad consequences:
# 1) apt is locked when install is actually needed
# 2) unattended upgrade could possibly break the GPU driver version, and crash nvidia-smi.
#
# The ultimate solution should be to upgrade the VM image correctly,
# but it's currently infeasible because of a resource group limitation.
# We introduce a workaround here by force disabling the auto-upgrade and,
# fix the broken dependencies if upgrade has already been accidentally run.
#
# This file can be removed after image is updated to latest.
parameters:
- name: check_gpu
type: boolean
default: false
steps:
# Don't set -e
# Always make sure the lock is released.
- script: |
set -x
sudo bash test/vso_tools/build_vm/disable_apt_daily.sh
sudo apt-get -o DPkg::Lock::Timeout=120 --fix-broken -y install
displayName: (1ES) Disable apt upgrade
# Make sure GPU isn't broken.
# Sometimes we can't save the GPU because upgrade runs too early.
# We have to rerun the pipeline if unlucky. But it doesn't matter if we don't intend to use GPU at all.
- script: |
echo "There can be unlucky cases when we can't save the GPU. If nvidia-smi fails, try to rerun the failed jobs."
nvidia-smi
displayName: (1ES) Check GPU status
condition: and(succeeded(), ${{ parameters.check_gpu }})

Просмотреть файл

@ -2,9 +2,9 @@
# so that further offline investigations are possible.
parameters:
- name: remote
type: boolean
default: false
- name: training_service
type: string
default: unknown
steps:
@ -16,11 +16,16 @@ steps:
condition: and(failed(), not(contains(variables['Agent.OS'], 'Windows')))
displayName: (failed) (POSIX) Latest experiment directory
- script: |
cp -r /tmp/$USER/nni ${EXPERIMENT_DIR}/local && echo "Copy successful" || echo "Copy failed"
condition: and(variables['experiment_dir'], eq('${{ parameters.training_service }}', 'local'), not(contains(variables['Agent.OS'], 'Windows')))
displayName: (failed) (POSIX) Harvest GPU scheduler logs
- script: |
set -e
export EXPERIMENT_ID=$(echo ${EXPERIMENT_DIR} | sed -e 's/\/.*\///g')
sudo docker cp $(Build.BuildId):/tmp/nni-experiments/${EXPERIMENT_ID} ${EXPERIMENT_DIR}/remote && echo "Copy successful" || echo "Copy failed"
condition: and(variables['experiment_dir'], ${{ parameters.remote }}, not(contains(variables['Agent.OS'], 'Windows')))
condition: and(variables['experiment_dir'], eq('${{ parameters.training_service }}', 'remote'), not(contains(variables['Agent.OS'], 'Windows')))
displayName: (failed) (POSIX) Harvest remote trial logs
- powershell: |
@ -30,6 +35,21 @@ steps:
condition: and(failed(), contains(variables['Agent.OS'], 'Windows'))
displayName: (failed) (Windows) Latest experiment directory
- powershell: |
$latestDir = Get-Item $(experiment_dir)
$tmpPath = "${env:Temp}\${env:UserName}\nni"
$destPath = "${latestDir}\local"
if (Test-Path $tmpPath) {
Write-Host "Copying $tmpPath to $destPath"
Copy-Item $tmpPath -Destination $destPath -Recurse
}
else {
Write-host "$tmpPath doesn't exist"
}
condition: and(variables['experiment_dir'], eq('${{ parameters.training_service }}', 'local'), contains(variables['Agent.OS'], 'Windows'))
displayName: (failed) (Windows) Harvest GPU scheduler logs
- powershell: |
$latestDir = Get-Item $(experiment_dir)
$experimentId = $latestDir.name
@ -43,7 +63,7 @@ steps:
else {
Write-host "$remotePath doesn't exist"
}
condition: and(variables['experiment_dir'], ${{ parameters.remote }}, contains(variables['Agent.OS'], 'Windows'))
condition: and(variables['experiment_dir'], eq('${{ parameters.training_service }}', 'remote'), contains(variables['Agent.OS'], 'Windows'))
displayName: (failed) (Windows) Harvest remote trial logs
- publish: $(experiment_dir)

Просмотреть файл

@ -53,8 +53,9 @@ def test_kill_process_slow_no_patience():
start_time = time.time()
kill_command(process.pid, timeout=1) # didn't wait long enough
end_time = time.time()
if sys.platform == 'linux': # FIXME: on non-linux, seems that the time of termination can't be controlled
assert 0.5 < end_time - start_time < 2
if sys.platform == 'linux':
# There was assert 0.5 < end_time - start_time. It's not stable.
assert end_time - start_time < 2
assert process.poll() is None
assert _check_pid_running(process.pid)
else:
@ -73,8 +74,7 @@ def test_kill_process_slow_patiently():
kill_command(process.pid, timeout=3) # wait long enough
end_time = time.time()
assert end_time - start_time < 5
if sys.platform == 'linux':
assert end_time - start_time > 1 # I don't know why windows is super fast
# assert end_time - start_time > 1 # This check is disabled because it's not stable
@pytest.mark.skipif(sys.platform != 'linux', reason='Signal issues on non-linux.')

Просмотреть файл

@ -3,6 +3,8 @@
"type": "azure-arm",
"client_id": "<client_id>",
"client_secret": "<client_secret>",
"subscription_id": "<subscription_id>",
"managed_image_name": "<managed_image_name>",
"managed_image_resource_group_name": "<resource_group>",
@ -20,7 +22,7 @@
"gallery_name": "<gallery_name>",
"image_name": "<image_name>",
"image_version": "<image_version>",
"replication_regions": ["southeastasia", "westus2"],
"replication_regions": ["southeastasia", "westus2", "eastus"],
"storage_account_type": "Standard_LRS"
},

Просмотреть файл

@ -3,6 +3,8 @@
"type": "azure-arm",
"client_id": "<client_id>",
"client_secret": "<client_secret>",
"subscription_id": "<subscription_id>",
"managed_image_name": "<managed_image_name>",
"managed_image_resource_group_name": "<resource_group>",
@ -18,7 +20,7 @@
"gallery_name": "<gallery_name>",
"image_name": "<image_name>",
"image_version": "<image_version>",
"replication_regions": ["southeastasia", "westus2"],
"replication_regions": ["southeastasia", "westus2", "eastus"],
"storage_account_type": "Standard_LRS"
},

Просмотреть файл

@ -0,0 +1,40 @@
#!/bin/bash
# Disable the periodical apt-get upgrade, as it will break the GPU driver.
sed -i -e "s/Update-Package-Lists \"1\"/Update-Package-Lists \"0\"/g" /etc/apt/apt.conf.d/10periodic
sed -i -e "s/Update-Package-Lists \"1\"/Update-Package-Lists \"0\"/g" /etc/apt/apt.conf.d/20auto-upgrades
sed -i -e "s/Unattended-Upgrade \"1\"/Unattended-Upgrade \"0\"/g" /etc/apt/apt.conf.d/20auto-upgrades
systemctl disable apt-daily.timer
systemctl disable apt-daily.service
systemctl disable apt-daily-upgrade.timer
systemctl disable apt-daily-upgrade.service
# In case the trick above doesn't work, try to uncomment the following lines.
# References: https://gist.github.com/posilva/1cefb5bf1eeccf9382920e5d57a4b3fe
# apt-get -y purge update-notifier-common ubuntu-release-upgrader-core landscape-common unattended-upgrades
# systemctl kill --kill-who=all apt-daily.service
# systemctl kill --kill-who=all apt-daily-upgrade.service
# systemctl stop apt-daily.timer
# systemctl disable apt-daily.timer
# systemctl stop apt-daily.service
# systemctl disable apt-daily.service
# systemctl stop apt-daily-upgrade.timer
# systemctl disable apt-daily-upgrade.timer
# systemctl stop apt-daily-upgrade.service
# systemctl disable apt-daily-upgrade.service
# systemctl daemon-reload
# systemctl reset-failed
# rm /etc/systemd/system/timers.target.wants/apt-daily.timer
# rm /etc/systemd/system/timers.target.wants/apt-daily-upgrade.timer
# mv /usr/lib/apt/apt.systemd.daily /usr/lib/apt/apt.systemd.daily.DISABLED
# mv /lib/systemd/system/apt-daily.service /lib/systemd/system/apt-daily.service.DISABLED
# mv /lib/systemd/system/apt-daily.timer /lib/systemd/system/apt-daily.timer.DISABLED
# mv /lib/systemd/system/apt-daily-upgrade.service /lib/systemd/system/apt-daily-upgrade.service.DISABLED
# mv /lib/systemd/system/apt-daily-upgrade.timer /lib/systemd/system/apt-daily-upgrade.timer.DISABLED