hibernation-setup-tool/hibernation-setup-tool.c

1800 строки
56 KiB
C
Исходник Постоянная ссылка Обычный вид История

2021-07-30 19:13:00 +03:00
// Hibernation Setup Tool
// Sets up a swap area suitable to hibernate a Linux system.
2021-03-24 04:51:10 +03:00
//
// Copyright (c) 2021 Microsoft Corp.
// Licensed under the terms of the MIT license.
#define _GNU_SOURCE
/* sys/mount.h has to be included before linux/fs.h
* https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=898743
*/
#include <sys/mount.h>
2021-03-24 04:51:10 +03:00
#include <ctype.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <ftw.h>
2021-03-24 04:51:10 +03:00
#include <linux/falloc.h>
#include <linux/fs.h>
#include <linux/ioctl.h>
#include <linux/magic.h>
2021-04-14 19:27:42 +03:00
#include <linux/suspend_ioctls.h>
2021-03-24 04:51:10 +03:00
#include <mntent.h>
#include <netdb.h>
#include <netinet/in.h>
2021-03-24 04:51:10 +03:00
#include <spawn.h>
#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/auxv.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
#include <sys/statfs.h>
2021-03-24 04:51:10 +03:00
#include <sys/swap.h>
#include <sys/types.h>
2021-04-14 19:27:42 +03:00
#include <sys/utsname.h>
2021-03-24 04:51:10 +03:00
#include <sys/vfs.h>
2021-04-14 19:27:42 +03:00
#include <sys/wait.h>
#include <syscall.h>
#include <syslog.h>
#include <sys/socket.h>
2021-03-24 04:51:10 +03:00
#include <unistd.h>
#define MEGA_BYTES (1ul << 20)
#define GIGA_BYTES (1ul << 30)
2021-03-24 04:51:10 +03:00
#ifndef SEEK_HOLE
#define SEEK_HOLE 4
#endif
#ifndef IOPRIO_WHO_PROCESS
#define IOPRIO_WHO_PROCESS 1
#endif
#ifndef IOPRIO_CLASS_IDLE
#define IOPRIO_CLASS_IDLE 3
#endif
#ifndef IOPRIO_PRIO_VALUE
#define IOPRIO_PRIO_VALUE(klass, data) (((klass) << 13) | (data))
#endif
#ifndef XFS_SUPER_MAGIC
#define XFS_SUPER_MAGIC ('X' << 24 | 'F' << 16 | 'S' << 8 | 'B')
#endif
2021-03-24 04:51:10 +03:00
static const char swap_file_name[] = "/hibfile.sys";
/* Prefixes are needed when running services. This makes it easier to grep for
* code run via hibernate, resume hooks and hibernation tool. */
static bool log_needs_tool_prefix = false;
static bool log_needs_pre_hook_prefix = false;
static bool log_needs_post_hook_prefix = false;
/* We don't always want to spam syslog: spamming stdout is fine as this tool
* and its hooks are supposed to be executed as daemons in systemd and their
* output will be stored in their journal files. */
static bool log_needs_syslog = false;
/* This is a link pointing to a file in a tmpfs filesystem and is mostly used to detect
* if we got a cold boot or not. */
2021-07-30 19:13:00 +03:00
static const char hibernate_lock_file_name[] = "/etc/hibernation-setup-tool.last_hibernation";
enum host_vm_notification {
HOST_VM_NOTIFY_COLD_BOOT, /* Sent every time system cold boots */
HOST_VM_NOTIFY_HIBERNATING, /* Sent right before hibernation */
HOST_VM_NOTIFY_RESUMED_FROM_HIBERNATION, /* Sent right after hibernation */
HOST_VM_NOTIFY_PRE_HIBERNATION_FAILED, /* Sent on errors when hibernating or resuming */
};
2021-03-24 04:51:10 +03:00
struct swap_file {
size_t capacity;
char path[];
};
static int ioprio_set(int which, int who, int ioprio) { return (int)syscall(SYS_ioprio_set, which, who, ioprio); }
2021-03-24 04:51:10 +03:00
static void log_impl(int log_level, const char *fmt, va_list ap)
2021-03-24 04:51:10 +03:00
{
if (log_needs_syslog) {
va_list ap_cpy;
va_copy(ap_cpy, ap);
vsyslog(log_level, fmt, ap_cpy);
va_end(ap_cpy);
}
2021-03-24 04:51:10 +03:00
flockfile(stdout);
if (log_needs_tool_prefix)
2021-07-30 19:13:00 +03:00
printf("hibernation-setup-tool: ");
else if (log_needs_pre_hook_prefix)
printf("hibernate-hook: ");
else if (log_needs_post_hook_prefix)
printf("resume-hook: ");
2022-01-15 07:09:41 +03:00
if (log_level == LOG_INFO)
printf("INFO: ");
2022-01-15 07:09:41 +03:00
else if (log_level == LOG_ERR)
printf("ERROR: ");
else if (log_level == LOG_NOTICE)
printf("NOTICE: ");
2021-03-24 04:51:10 +03:00
vprintf(fmt, ap);
printf("\n");
2021-03-24 04:51:10 +03:00
funlockfile(stdout);
}
__attribute__((format(printf, 1, 2))) static void log_info(const char *fmt, ...)
2021-03-24 04:51:10 +03:00
{
va_list ap;
va_start(ap, fmt);
log_impl(LOG_INFO, fmt, ap);
2021-03-24 04:51:10 +03:00
va_end(ap);
}
__attribute__((format(printf, 1, 2))) static void log_notice(const char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);
log_impl(LOG_NOTICE, fmt, ap);
va_end(ap);
}
__attribute__((format(printf, 1, 2))) __attribute__((noreturn)) static void log_fatal(const char *fmt, ...)
2021-03-24 04:51:10 +03:00
{
va_list ap;
va_start(ap, fmt);
log_impl(LOG_ERR, fmt, ap);
2021-03-24 04:51:10 +03:00
va_end(ap);
exit(1);
__builtin_unreachable();
}
static char *next_field(char *current)
{
if (!current)
return NULL;
if (*current == '\0')
return NULL;
while (!isspace(*current))
current++;
*current = '\0'; /* 0-terminate the previous call to next_field() */
current++; /* skip the NUL terminator */
2021-03-24 04:51:10 +03:00
while (isspace(*current))
current++;
return current;
}
static size_t parse_size_or_die(const char *ptr, const char expected_end, char **endptr)
{
size_t parsed;
errno = 0;
if (sizeof(size_t) == sizeof(unsigned long long)) {
parsed = strtoull(ptr, endptr, 10);
} else if (sizeof(size_t) == sizeof(unsigned long)) {
parsed = strtoul(ptr, endptr, 10);
} else {
log_fatal("Invalid size of size_t: %zu", sizeof(size_t));
}
if (errno || (endptr && **endptr != expected_end))
log_fatal("Could not parse size: %s", strerror(errno));
return parsed;
}
static const char *find_executable_in_path(const char *name, const char *path_env, char path_buf[static PATH_MAX])
2021-03-24 04:51:10 +03:00
{
if (!path_env)
path_env = "/bin:/sbin:/usr/bin:/usr/sbin";
2021-03-24 04:51:10 +03:00
while (*path_env) {
const char *p = strchr(path_env, ':');
int ret;
if (!p) {
/* Last segment of $PATH */
ret = snprintf(path_buf, PATH_MAX, "%s/%s", path_env, name);
2021-03-24 04:51:10 +03:00
path_env = "";
} else if (p - path_env) {
2021-03-24 04:51:10 +03:00
/* Middle segments */
ret = snprintf(path_buf, PATH_MAX, "%.*s/%s", (int)(p - path_env), path_env, name);
2021-03-24 04:51:10 +03:00
path_env = p + 1;
} else {
/* Empty segment or non-root directory? Skip. */
path_env = p + 1;
continue;
2021-03-24 04:51:10 +03:00
}
if (ret < 0 || ret >= PATH_MAX)
log_fatal("Building path to determine if %s exists would overflow buffer", name);
if (path_buf[0] != '/')
continue;
if (!access(path_buf, X_OK))
return path_buf;
2021-03-24 04:51:10 +03:00
}
return NULL;
}
static bool is_exec_in_path(const char *name)
{
char path_buf[PATH_MAX];
return find_executable_in_path(name, getenv("PATH"), path_buf) != NULL;
2021-03-24 04:51:10 +03:00
}
static struct swap_file *new_swap_file(const char *path, size_t capacity)
{
struct swap_file *out;
out = malloc(sizeof(*out) + strlen(path) + 1);
if (!out)
log_fatal("Could not allocate memory for swap file information");
out->capacity = capacity;
memcpy(out->path, path, strlen(path) + 1);
return out;
}
static struct swap_file *find_swap_file(size_t needed_size)
{
char buffer[1024];
FILE *swaps;
struct swap_file *out = NULL;
swaps = fopen("/proc/swaps", "re");
if (!swaps)
log_fatal("Could not open /proc/swaps: is /proc mounted?");
/* Skip first line (header) */
if (!fgets(buffer, sizeof(buffer), swaps))
log_fatal("Could not skip first line from /proc/swaps");
while (fgets(buffer, sizeof(buffer), swaps)) {
char *filename = buffer;
char *type = next_field(filename);
if (!type) {
log_info("Couldn't get the second field while parsing /proc/swaps");
break;
}
2021-03-24 04:51:10 +03:00
if (!strcmp(type, "file")) {
2021-04-14 00:40:46 +03:00
char *size = next_field(type);
if (!size)
log_fatal("Malformed line in /proc/swaps: can't find size column");
2021-03-24 04:51:10 +03:00
2021-04-14 00:40:46 +03:00
size_t size_as_int = parse_size_or_die(size, ' ', NULL);
2021-03-24 04:51:10 +03:00
if (size_as_int < needed_size)
continue;
out = new_swap_file(filename, size_as_int);
break;
}
}
fclose(swaps);
if (!out) {
struct stat st;
if (stat(swap_file_name, &st) < 0)
return NULL;
if (!S_ISREG(st.st_mode))
return NULL;
return new_swap_file(swap_file_name, st.st_size);
}
return out;
}
static size_t physical_memory(void)
{
FILE *meminfo;
char buffer[256];
size_t total = 0;
meminfo = fopen("/proc/meminfo", "re");
if (!meminfo)
log_fatal("Could not determine physical memory size. Is /proc mounted?");
while (fgets(buffer, sizeof(buffer), meminfo)) {
static const size_t mem_total_len = sizeof("MemTotal: ") - 1;
if (!strncmp(buffer, "MemTotal: ", mem_total_len)) {
char *endptr;
total = parse_size_or_die(buffer + mem_total_len, ' ', &endptr);
if (!strcmp(endptr, " kB\n"))
total *= 1024;
else if (!strcmp(endptr, " MB\n"))
total *= MEGA_BYTES;
else if (!strcmp(endptr, " GB\n"))
total *= 1024 * MEGA_BYTES;
else if (!strcmp(endptr, " TB\n"))
total *= (size_t)MEGA_BYTES * (size_t)MEGA_BYTES;
else
log_fatal("Could not determine unit for physical memory information");
break;
}
}
fclose(meminfo);
return total;
}
static size_t swap_needed_size(size_t phys_mem)
{
/* This is using the recommendation from the Fedora project documentation. */
if (phys_mem <= 2 * GIGA_BYTES)
return 3 * phys_mem;
if (phys_mem <= 8 * GIGA_BYTES)
return 2 * phys_mem;
if (phys_mem <= 64 * GIGA_BYTES)
return (3 * phys_mem) / 2;
/* Note: Fedora documentation doesn't recommend hibernation for machines over 64GB of RAM,
* but we're extending this for a bit. */
if (phys_mem <= 256 * GIGA_BYTES)
return (5 * phys_mem) / 4;
log_fatal("Hibernation not recommended for a machine with more than 256GB of RAM");
2021-03-24 04:51:10 +03:00
}
static size_t free_device_space()
{
struct statfs buffer;
size_t block_size;
size_t free_bytes;
if (statfs("/", &buffer) < 0)
log_fatal("Could not determine free device space: %s", strerror(errno));
block_size = buffer.f_bsize;
free_bytes = block_size * buffer.f_bfree;
return free_bytes;
}
2021-03-24 04:51:10 +03:00
static char *get_uuid_for_dev_path(const char *path)
{
struct stat dev_st;
struct dirent *ent;
char *uuid = NULL;
DIR *uuid_dir;
if (stat(path, &dev_st) < 0) {
log_info("Could not stat(%s): %s", path, strerror(errno));
return NULL;
2021-04-23 22:50:09 +03:00
}
2021-03-24 04:51:10 +03:00
uuid_dir = opendir("/dev/disk/by-uuid/");
if (!uuid_dir) {
log_info("Could not open directory /dev/disk/by-uuid/: %s", strerror(errno));
return NULL;
}
while ((ent = readdir(uuid_dir))) {
struct stat ent_st;
if (fstatat(dirfd(uuid_dir), ent->d_name, &ent_st, 0) < 0)
continue;
/* Shouldn't happen, but just in case */
if ((ent_st.st_mode & S_IFMT) != S_IFBLK)
continue;
if (ent_st.st_rdev == dev_st.st_rdev) {
uuid = strdup(ent->d_name);
break;
}
}
closedir(uuid_dir);
if (uuid)
log_info("UUID for device %s is %s", path, uuid);
return uuid;
}
static char *get_disk_uuid_for_file_path(const char *path)
{
FILE *mounts = setmntent("/proc/mounts", "re");
struct mntent *ent;
struct stat st;
if (!mounts)
return NULL;
if (stat(path, &st) < 0)
log_fatal("Could not stat(%s): %s", path, strerror(errno));
while ((ent = getmntent(mounts))) {
struct stat ent_st;
if (stat(ent->mnt_dir, &ent_st) < 0)
continue;
if (ent_st.st_dev == st.st_dev)
break;
}
endmntent(mounts);
if (!ent) {
log_info("Could not determine device for file in path %s", path);
return NULL;
}
return get_uuid_for_dev_path(ent->mnt_fsname);
}
static long determine_block_size_for_root_fs(void)
{
FILE *mtab = setmntent("/proc/mounts", "re");
struct mntent *mntent;
long sector_size = 0;
if (!mtab)
log_fatal("Could not determine mounted partitions. Is /proc mounted?");
while ((mntent = getmntent(mtab))) {
if (!strcmp(mntent->mnt_dir, "/")) {
int fd = open(mntent->mnt_fsname, O_RDONLY | O_CLOEXEC);
2021-03-24 04:51:10 +03:00
if (fd < 0) {
log_fatal("Could not open %s to determine block size: %s", mntent->mnt_fsname, strerror(errno));
2021-03-24 04:51:10 +03:00
}
if (ioctl(fd, BLKSSZGET, &sector_size) < 0)
sector_size = 0;
2021-03-24 04:51:10 +03:00
close(fd);
break;
}
}
endmntent(mtab);
if (sector_size) {
struct statfs sfs;
if (statfs("/", &sfs) < 0)
log_fatal("Could not determine optimal block size for root filesystem: %s", strerror(errno));
2021-03-24 04:51:10 +03:00
return sfs.f_bsize > sector_size ? sfs.f_bsize : sector_size;
}
log_fatal("Could not obtain sector size for root partition: %s", strerror(errno));
2021-03-24 04:51:10 +03:00
}
static char *read_first_line_from_file(const char *path, char buffer[static 1024])
{
FILE *f = fopen(path, "re");
if (!f)
return NULL;
2021-03-24 04:51:10 +03:00
bool did_read = fgets(buffer, 1024, f) != NULL;
fclose(f);
2021-03-24 04:51:10 +03:00
if (!did_read)
return NULL;
2021-03-24 04:51:10 +03:00
char *lf = strchr(buffer, '\n');
if (lf)
*lf = '\0';
2021-03-24 04:51:10 +03:00
return buffer;
2021-03-24 04:51:10 +03:00
}
static bool is_hyperv(void) { return !access("/sys/bus/vmbus", F_OK); }
2021-05-19 02:02:22 +03:00
static bool is_running_in_container(void)
{
FILE *cgroup;
char buffer[1024];
bool ret = false;
cgroup = fopen("/proc/1/cgroup", "re");
if (!cgroup)
log_fatal("Could not read /proc/1/cgroup to determine if we're running in a container");
while (fgets(buffer, sizeof(buffer), cgroup)) {
const char *first_colon = strchr(buffer, ':');
if (!first_colon)
continue;
/* When running in a container, PID 1 will have a line in
* /proc/1/cgroup with "0::/" as a content; whereas, when running in
* the host, it might have something like "0::/init.slice".
*
* Things might be different with anything other than systemd, but
* since we're supporting systemd-only distros at this point, it's
* safer to use this method rather than rely on things like the
* presence of /.dockerenv or something like that as the container
* runtime not be docker. */
if (!strcmp(first_colon, "::/\n")) {
ret = true;
break;
}
}
fclose(cgroup);
return ret;
}
2021-03-24 04:51:10 +03:00
static bool is_hibernation_enabled_for_vm(void)
{
char buffer[1024];
char *entry;
if (is_running_in_container()) {
log_info("We're running in a container; this isn't supported.");
return false;
}
2021-03-24 04:51:10 +03:00
if (access("/dev/snapshot", F_OK) != 0) {
log_info("Kernel does not support hibernation or /dev/snapshot has not been found.");
return false;
}
/* First, check if the kernel can hibernate. Don't even bother if the
* interface to hibernate it isn't available. */
2021-03-24 04:51:10 +03:00
entry = read_first_line_from_file("/sys/power/disk", buffer);
if (!entry) {
log_info("Kernel does not support hibernation (/sys/power/disk does not exist or can't be read).");
return false;
}
if (strstr(entry, "platform")) {
log_info("VM supports hibernation with platform-supported events.");
return true;
}
if (strstr(entry, "shutdown")) {
log_info("VM supports hibernation only with the shutdown method. This is not ideal.");
} else if (strstr(entry, "suspend")) {
log_info("VM supports hibernation only with the suspend method. This is not ideal.");
} else {
log_info("Unknown VM hibernation support mode found: %s", entry);
return false;
}
2021-03-24 04:51:10 +03:00
2021-05-19 02:02:22 +03:00
if (is_hyperv()) {
log_info("This is a Hyper-V VM, checking if hibernation is enabled through VMBus events.");
entry = read_first_line_from_file("/sys/bus/vmbus/hibernation", buffer);
if (entry) {
if (!strcmp(entry, "1")) {
log_info("Hibernation is enabled according to VMBus. This is ideal.");
return true;
}
log_info("Hibernation is disabled according to VMBus.");
}
}
log_info("Even though VM is capable of hibernation, it seems to be disabled.");
return false;
2021-03-24 04:51:10 +03:00
}
static int open_and_get_socket(const char *host, int portno)
{
struct hostent *server;
struct sockaddr_in serv_addr;
int sockfd;
/* create the socket */
sockfd = socket(AF_INET, SOCK_STREAM, 0);
if (sockfd < 0)
{
log_info("Error opening socket: %s", strerror(errno));
return -1;
}
struct timeval timeout;
timeout.tv_sec = 10;
timeout.tv_usec = 0;
if (setsockopt(sockfd, SOL_SOCKET, SO_RCVTIMEO, &timeout, sizeof timeout) < 0 ||
setsockopt(sockfd, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof timeout) < 0)
{
log_info("Unable to set timeouts for socket: %s", strerror(errno));
close(sockfd);
return -1;
}
/* lookup the ip address */
server = gethostbyname(host);
if (server == NULL)
{
log_info("Unable to fetch host info %s: %s", host, strerror(h_errno));
close(sockfd);
return -1;
}
/* fill in the structure */
memset(&serv_addr, 0, sizeof(serv_addr));
serv_addr.sin_family = AF_INET;
serv_addr.sin_port = htons(portno);
memcpy(&serv_addr.sin_addr.s_addr, server->h_addr, server->h_length);
/* connect the socket */
if (connect(sockfd, (struct sockaddr *)&serv_addr, sizeof(serv_addr)) < 0)
{
log_info("Unable to connect to host %s: %s", host, strerror(errno));
close(sockfd);
return -1;
}
return sockfd;
}
static bool is_hibernation_allowed_for_vm(void)
{
int portno = 80;
char response[4096];
int sockfd, read_bytes;
const char *imds_host = "169.254.169.254";
const char *imds_req = "GET /metadata/instance/compute/additionalCapabilities/hibernationEnabled?api-version=2021-11-01&format=text HTTP/1.1\r\nHost: %s\r\nMetadata:true\r\n\r\n";
size_t req_size = strlen(imds_req) + strlen(imds_host) + 1;
char request[req_size];
snprintf(request, req_size, imds_req, imds_host);
sockfd = open_and_get_socket(imds_host, portno);
if(sockfd < 0)
return false;
log_info("IMDS Request:\n%s\n", request);
/*
Sample request -
GET /metadata/instance/compute/additionalCapabilities/hibernationEnabled?api-version=2021-11-01&format=text HTTP/1.1
Host: 169.254.169.254
Metadata:true
*/
if (write(sockfd, request, strlen(request)) < 0)
{
log_info("Failed to write to socket: %s", strerror(errno));
close(sockfd);
return false;
}
/*
Sample response -
HTTP/1.1 200 OK
Content-Type: text/plain; charset=utf-8
Server: IMDS/150.870.65.597
Date: Tue, 22 Mar 2022 00:34:37 GMT
Content-Length: 4
true (or false)
*/
read_bytes = read(sockfd, response, sizeof(response) - 1);
if (read_bytes < 0)
{
log_info("Failed to read from socket: %s", strerror(errno));
close(sockfd);
return false;
}
else if (read_bytes == 0)
{
log_info("IMDS connection closed prematurely without returning any response");
close(sockfd);
return false;
}
response[read_bytes] = '\0';
/* close the socket */
close(sockfd);
/* process response */
log_info("IMDS Response:\n%s\n", response);
if(strstr(response, "true"))
{
log_info("Hibernation is allowed for this VM");
return true;
}
return false;
}
2021-03-24 04:51:10 +03:00
static uint32_t get_swap_file_offset(int fd)
{
uint32_t blksize;
if (ioctl(fd, FIGETBSZ, &blksize) < 0)
log_fatal("Could not get file block size: %s", strerror(errno));
2021-04-23 22:50:09 +03:00
2021-03-24 04:51:10 +03:00
uint32_t last = 0, first = 0, num_contiguous_blocks = 0;
uint32_t blocks_per_page = sysconf(_SC_PAGE_SIZE) / blksize;
uint32_t first_blknum = ~0;
for (uint32_t i = 0; i < blocks_per_page; i++) {
uint32_t blknum = i;
if (ioctl(fd, FIBMAP, &blknum) < 0)
log_fatal("Could not get filesystem block number for block #%d: %s", i, strerror(errno));
2021-03-24 04:51:10 +03:00
if (i == 0)
first_blknum = blknum;
if (last && blknum - last != 1) {
/* If we find a block that's not contiguous, bail out. We
* check below if we have enough contiguous blocks for hibernation
* to work. */
break;
}
if (!first)
first = blknum;
last = blknum;
num_contiguous_blocks++;
}
log_info("First %d blocks of %d bytes are contiguous", num_contiguous_blocks, blksize);
2021-03-24 04:51:10 +03:00
if (num_contiguous_blocks * blksize >= sysconf(_SC_PAGE_SIZE))
return first_blknum;
return ~0;
}
static bool try_zero_out_with_write(const char *path, off_t needed_size)
2021-03-24 04:51:10 +03:00
{
int fd = open(path, O_WRONLY | O_CLOEXEC);
int ret = true;
2021-03-24 04:51:10 +03:00
if (fd < 0) {
log_info("Could not open %s: %s", path, strerror(errno));
return false;
}
long block_size = determine_block_size_for_root_fs();
const uint32_t pattern = 'T' << 24 | 'F' << 16 | 'S' << 8 | 'M';
for (off_t offset = 0; offset < needed_size; offset += block_size) {
ssize_t written = pwrite(fd, &pattern, sizeof(pattern), offset);
if (written < 0) {
2021-03-24 04:51:10 +03:00
log_info("Could not write pattern to %s: %s", path, strerror(errno));
ret = false;
goto out;
2021-03-24 04:51:10 +03:00
}
}
2021-06-23 20:30:20 +03:00
2021-03-24 04:51:10 +03:00
fdatasync(fd);
out:
2021-03-24 04:51:10 +03:00
close(fd);
return ret;
2021-03-24 04:51:10 +03:00
}
static bool fs_set_flags(int fd, int flags_to_set, int flags_to_reset)
{
int current_flags;
if (ioctl(fd, FS_IOC_GETFLAGS, &current_flags) < 0)
return false;
current_flags |= flags_to_set;
current_flags &= ~flags_to_reset;
if (ioctl(fd, FS_IOC_SETFLAGS, &current_flags) < 0)
return false;
return true;
}
static bool is_file_on_fs(const char *path, __fsword_t magic)
{
struct statfs stfs;
if (!statfs(path, &stfs))
return stfs.f_type == magic;
return false;
}
static bool create_swap_file_with_size(const char *path, off_t size)
2021-03-24 04:51:10 +03:00
{
int fd = open(path, O_CLOEXEC | O_WRONLY | O_CREAT, 0600);
int rc;
2021-03-24 04:51:10 +03:00
if (fd < 0)
return false;
/* Disabling CoW is necessary on btrfs filesystems, but issue the
* ioctl regardless of the filesystem just in case.
* More information: https://wiki.archlinux.org/index.php/btrfs#Swap_file
*/
if (!fs_set_flags(fd, FS_NOCOW_FL, 0)) {
/* Some filesystems don't support CoW (EXT4 for instance), so don't bother
* giving an error message in those cases. */
if (errno != EOPNOTSUPP)
log_info("Could not disable CoW for %s: %s. Will try setting up swap anyway.", path, strerror(errno));
}
/* Disable compression, too. */
if (!fs_set_flags(fd, FS_NOCOMP_FL, FS_COMPR_FL)) {
/* Compression is optional, too, so don't bother giving an error message in
* case the filesystem doesn't support it. */
if (errno != EOPNOTSUPP)
log_info("Could not disable compression for %s: %s. Will try setting up swap anyway.", path, strerror(errno));
}
if (is_file_on_fs(path, XFS_SUPER_MAGIC)) {
rc = 0;
} else {
rc = ftruncate(fd, size);
if (rc < 0) {
if (errno == EPERM) {
log_info("Not enough disk space to create %s with %zu MB.", path, size / MEGA_BYTES);
} else {
log_info("Could not resize %s to %ld MB: %s", path, size / MEGA_BYTES, strerror(errno));
}
2021-03-24 04:51:10 +03:00
}
}
close(fd);
return rc == 0;
}
static bool try_zeroing_out_with_fallocate(const char *path, off_t size)
{
int fd = open(path, O_CLOEXEC | O_WRONLY);
if (fd < 0) {
log_info("Could not open %s for writing: %s", path, strerror(errno));
return false;
}
if (fallocate(fd, 0, 0, size) < 0) {
2022-07-01 21:41:45 +03:00
int fallocate_errno = errno;
if (unlink(path) < 0)
log_info("Couldn't remove incomplete hibernation file %s: %s", path, strerror(errno));
2022-07-01 21:41:45 +03:00
if (fallocate_errno == ENOSPC) {
log_fatal("System ran out of disk space while allocating hibernation file. It needs %zd MiB", size / MEGA_BYTES);
2021-03-24 04:51:10 +03:00
} else {
2022-07-01 23:49:11 +03:00
log_fatal("Could not allocate %s: %s", path, strerror(fallocate_errno));
2021-03-24 04:51:10 +03:00
}
}
close(fd);
return true;
}
static bool try_vspawn_and_wait(const char *program, int n_args, va_list ap)
2021-03-24 04:51:10 +03:00
{
pid_t pid;
int rc;
char **argv;
argv = calloc(n_args + 2, sizeof(char *));
if (!argv) {
log_info("Couldn't allocate memory for argument array");
return false;
}
argv[0] = (char *)program;
for (int i = 1; i <= n_args; i++)
argv[i] = va_arg(ap, char *);
2021-03-24 04:51:10 +03:00
rc = posix_spawnp(&pid, program, NULL, NULL, argv, NULL);
free(argv);
if (rc != 0) {
log_info("Could not spawn %s: %s", program, strerror(rc));
return false;
}
log_info("Waiting for %s (pid %d) to finish.", program, pid);
int wstatus;
if (waitpid(pid, &wstatus, 0) != pid) {
log_info("Couldn't wait for %s: %s", program, strerror(errno));
return false;
}
if (!WIFEXITED(wstatus)) {
log_info("%s ended abnormally: %s", program, strerror(errno));
return false;
}
if (WEXITSTATUS(wstatus) == 127) {
log_info("Failed to spawn %s", program);
return false;
}
if (WEXITSTATUS(wstatus) != 0) {
log_info("%s ended with unexpected exit code %d", program, WEXITSTATUS(wstatus));
return false;
}
log_info("%s finished successfully.", program);
return true;
}
static bool try_spawn_and_wait(const char *program, int n_args, ...)
{
va_list ap;
bool spawned;
2021-03-24 04:51:10 +03:00
va_start(ap, n_args);
spawned = try_vspawn_and_wait(program, n_args, ap);
2021-03-24 04:51:10 +03:00
va_end(ap);
return spawned;
}
2021-03-24 04:51:10 +03:00
static void spawn_and_wait(const char *program, int n_args, ...)
{
va_list ap;
bool spawned;
2021-03-24 04:51:10 +03:00
va_start(ap, n_args);
spawned = try_vspawn_and_wait(program, n_args, ap);
va_end(ap);
if (!spawned)
log_fatal("Aborting program due to error condition when spawning %s", program);
2021-03-24 04:51:10 +03:00
}
static void perform_fs_specific_checks(const char *path)
2021-03-24 04:51:10 +03:00
{
/* Not performing defragmentation for xfs file systems as xfs_fsr process is taking time for SKUs with larger RAM.
While it is good to have optimization, not ideal to have performance hit on the tool */
if (is_file_on_fs(path, EXT4_SUPER_MAGIC) && is_exec_in_path("e4defrag")) {
try_spawn_and_wait("e4defrag", 1, path);
return;
}
if (is_file_on_fs(path, BTRFS_SUPER_MAGIC)) {
struct utsname utsbuf;
if (uname(&utsbuf) < 0)
log_fatal("Could not determine Linux kernel version: %s", strerror(errno));
if (utsbuf.release[1] != '.')
log_fatal("Could not parse Linux kernel version");
2021-04-14 19:27:42 +03:00
if (utsbuf.release[0] < '5')
log_fatal("Swap files are not supported on Btrfs running on kernel %s", utsbuf.release);
if (is_exec_in_path("btrfs"))
try_spawn_and_wait("btrfs", 3, "filesystem", "defragment", path);
return;
}
2021-03-24 04:51:10 +03:00
}
bool is_kernel_version_at_least(const char *version)
{
2022-01-15 07:09:41 +03:00
struct utsname my_uname;
if (uname(&my_uname) == -1) {
log_info("uname call failed.");
2022-01-15 07:09:41 +03:00
return true;
}
unsigned sys_major_version = 0, sys_minor_version = 0;
unsigned req_major_version = 0, req_minor_version = 0;
sscanf(my_uname.release, "%u.%u", &sys_major_version, &sys_minor_version);
sscanf(version, "%u.%u", &req_major_version, &req_minor_version);
uint64_t sys_version = sys_major_version * 10000 + sys_minor_version;
uint64_t req_version = req_major_version * 10000 + req_minor_version;
return sys_version >= req_version;
2022-01-15 07:09:41 +03:00
}
2021-03-24 04:51:10 +03:00
static struct swap_file *create_swap_file(size_t needed_size)
{
log_info("Creating hibernation file at %s with %zu MB.", swap_file_name, needed_size / MEGA_BYTES);
if (!create_swap_file_with_size(swap_file_name, needed_size))
2021-03-24 04:51:10 +03:00
log_fatal("Could not create swap file, aborting.");
/* Allocate the swap file with the lowest I/O priority possible to not thrash workload */
ioprio_set(IOPRIO_WHO_PROCESS, 0, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 7));
log_info("Ensuring %s has no holes in it.", swap_file_name);
2022-01-15 13:17:55 +03:00
/* Preallocated swap files are supported on xfs since Linux 4.18. So using fallocate for XFS if system version is higher than 4.18 */
bool swap_on_xfs = is_file_on_fs(swap_file_name, XFS_SUPER_MAGIC) && !is_kernel_version_at_least("4.18");
if (swap_on_xfs || !try_zeroing_out_with_fallocate(swap_file_name, needed_size)) {
if (swap_on_xfs)
log_info("Root partition is in a XFS filesystem and kernel version is older than 4.18. Need to use slower method to allocate swap file");
else
log_info("Fast method failed; trying a slower method.");
if (!try_zero_out_with_write(swap_file_name, needed_size))
2021-03-24 04:51:10 +03:00
log_fatal("Could not create swap file.");
}
perform_fs_specific_checks(swap_file_name);
2021-03-24 04:51:10 +03:00
spawn_and_wait("mkswap", 1, swap_file_name);
struct swap_file *ret = malloc(sizeof(*ret) + sizeof(swap_file_name));
2021-03-25 00:50:04 +03:00
if (!ret)
log_fatal("Could not allocate memory to represent a swap file: %s", strerror(errno));
2021-03-24 04:51:10 +03:00
ret->capacity = needed_size;
memcpy(ret->path, swap_file_name, sizeof(swap_file_name));
return ret;
}
static bool is_kernel_cmdline_correct(const char *dev_uuid, off_t resume_offset)
{
char buffer[1024];
char *line = read_first_line_from_file("/proc/cmdline", buffer);
if (!line) {
log_info("Could not read /proc/cmdline; is /proc mounted? Assuming information is incorrect.");
return false;
}
char *resume_field = NULL;
char *resume_offset_field = NULL;
char *no_console_suspend_field = NULL;
for (char *field = line; field; field = next_field(field)) {
if (!strncmp(field, "resume=", sizeof("resume=") - 1))
resume_field = field + sizeof("resume=") - 1;
else if (!strncmp(field, "resume_offset=", sizeof("resume_offset=") - 1))
resume_offset_field = field + sizeof("resume_offset=") - 1;
else if (!strncmp(field, "no_console_suspend=", sizeof("no_console_suspend=") - 1))
no_console_suspend_field = field + sizeof("no_console_suspend=") - 1;
}
if (!resume_field)
return false;
if (!resume_offset_field)
return false;
if (!no_console_suspend_field)
return false;
char full_dev_path[PATH_MAX];
int r = snprintf(full_dev_path, PATH_MAX, "/dev/disk/by-uuid/%s", dev_uuid);
if (r < 0 || r >= PATH_MAX)
return false;
if (strcmp(resume_field, full_dev_path) != 0)
return false;
char offset_buf[3 * sizeof(size_t)];
snprintf(offset_buf, sizeof(offset_buf), "%zd", resume_offset);
if (strcmp(offset_buf, resume_offset_field) != 0)
return false;
if (strcmp(no_console_suspend_field, "1") != 0)
return false;
return true;
2021-04-23 22:50:09 +03:00
}
2021-03-24 04:51:10 +03:00
static struct resume_swap_area get_swap_area(const struct swap_file *swap)
{
int fd = open(swap->path, O_RDONLY | O_CLOEXEC);
2021-03-24 04:51:10 +03:00
struct stat st;
if (fd < 0)
log_fatal("Could not open %s: %s", swap->path, strerror(errno));
if (fstat(fd, &st) < 0)
log_fatal("Could not stat %s: %s", swap->path, strerror(errno));
if (!S_ISREG(st.st_mode))
log_fatal("Swap file %s is not a regular file", swap->path);
uint32_t offset = get_swap_file_offset(fd);
if (offset == ~0u)
log_fatal("Could not determine file system block number for %s, or file isn't contiguous", swap->path);
close(fd);
log_info("Swap file %s is at device %ld, offset %d", swap->path, st.st_dev, offset);
return (struct resume_swap_area){
2021-03-24 04:51:10 +03:00
.offset = offset,
.dev = st.st_dev,
};
}
2021-04-14 00:19:50 +03:00
static const char *find_grub_cfg_path(void)
{
static const char *possible_paths[] = {"/boot/grub2/grub.cfg", "/boot/grub/grub.cfg", NULL};
2021-04-14 00:19:50 +03:00
for (int i = 0; possible_paths[i]; i++) {
if (!access(possible_paths[i], F_OK))
return possible_paths[i];
}
log_info("Could not find GRUB configuration file. Is /boot mounted?");
return NULL;
}
static bool is_directory_empty(const char *path)
{
DIR *dir = opendir(path);
struct dirent *ent;
bool has_file = false;
if (!dir)
return true;
while ((ent = readdir(dir))) {
if (!strcmp(ent->d_name, "."))
continue;
if (!strcmp(ent->d_name, ".."))
continue;
has_file = true;
break;
}
closedir(dir);
return !has_file;
}
static bool update_kernel_cmdline_params_for_grub(
const char *dev_uuid, const struct resume_swap_area swap_area, bool has_grubby, bool has_update_grub2, bool has_grub2_mkconfig)
2021-03-24 04:51:10 +03:00
{
2021-04-14 00:19:50 +03:00
bool ret_value = true;
/* Doc:
* https://docs.fedoraproject.org/en-US/fedora/rawhide/system-administrators-guide/kernel-module-driver-configuration/Working_with_the_GRUB_2_Boot_Loader/#sec-Making_Persistent_Changes_to_a_GRUB_2_Menu_Using_the_grubby_Tool
*/
2021-03-24 04:51:10 +03:00
log_info("Kernel command line is missing parameters to resume from hibernation. Trying to patch grub configuration file.");
char *args;
if (asprintf(&args, "resume=/dev/disk/by-uuid/%s resume_offset=%lld no_console_suspend=1", dev_uuid, swap_area.offset) < 0) {
log_info("Could not allocate memory for kernel argument");
return false;
}
2022-01-15 13:17:55 +03:00
/* Updating initramfs to include resume config changes in boot process, varies by distro. update-initramfs command works
* for Ubuntu, Debian whereas dracut is a tool to build initramfs archives on RHEL, CentOS */
2021-03-24 04:51:10 +03:00
if (is_exec_in_path("update-initramfs")) {
log_info("Updating initramfs to include resume stuff");
FILE *conf = fopen("/etc/initramfs-tools/conf.d/resume", "we");
if (!conf)
2021-03-25 01:03:10 +03:00
log_fatal("Could not open initramfs-tools configuration file: %s", strerror(errno));
2021-03-24 04:51:10 +03:00
2021-07-30 19:13:00 +03:00
fprintf(conf, "# Updated automatically by hibernation-setup-tool. Do not modify.\n");
2021-03-24 04:51:10 +03:00
fprintf(conf, "RESUME=UUID=%s\n", dev_uuid);
fclose(conf);
spawn_and_wait("update-initramfs", 1, "-u");
2022-01-15 07:09:41 +03:00
} else if (is_exec_in_path("dracut")) {
log_info("Updating initramfs(dracut) to include resume stuff");
FILE *conf = fopen("/etc/dracut.conf.d/resume.conf", "we");
if (!conf)
log_fatal("Could not open initramfs-tools configuration file: %s", strerror(errno));
fprintf(conf, "# Updated automatically by hibernation-setup-tool. Do not modify.\n");
fprintf(conf, "add_dracutmodules+=\" resume \"");
fclose(conf);
spawn_and_wait("dracut", 1, "-f");
2021-03-24 04:51:10 +03:00
}
2021-04-23 22:50:09 +03:00
if (has_grubby) {
2021-03-24 04:51:10 +03:00
log_info("Using grubby to patch GRUB configuration");
spawn_and_wait("grubby", 3, "--update-kernel=ALL", "--args", args);
2021-04-14 00:19:50 +03:00
} else if (has_update_grub2 || has_grub2_mkconfig) {
2021-03-24 04:51:10 +03:00
FILE *resume_cfg;
char *old_contents = NULL;
const char *grub_cfg_path;
2021-03-24 04:51:10 +03:00
size_t old_contents_len = 0;
char buffer[1024];
2021-04-14 00:19:50 +03:00
if (!is_directory_empty("/etc/default/grub.d")) {
/* If we find this directory, it might be possible that some of the configuration
* files there will override GRUB_CMDLINE_LINUX_DEFAULT. This seems to be the case
* in some Azure Marketplace images, with files such as "50-cloudimg-settings.cfg".
* If we find the directory, assume we can create our own with higher priority
* instead of modifying the global configuration file. */
grub_cfg_path = "/etc/default/grub.d/99-hibernate-settings.cfg";
2021-04-13 20:04:17 +03:00
} else if (!access("/etc/default/grub", F_OK)) {
grub_cfg_path = "/etc/default/grub";
} else {
log_fatal("Could not determine where the Grub configuration file is");
}
2021-03-24 04:51:10 +03:00
resume_cfg = fopen(grub_cfg_path, "re");
if (resume_cfg) {
bool in_az_hibernate_agent_block = false;
while (fgets(buffer, sizeof(buffer), resume_cfg)) {
if (in_az_hibernate_agent_block) {
2021-07-30 19:13:00 +03:00
if (strstr(buffer, "# hibernation-setup-tool:end"))
in_az_hibernate_agent_block = false;
continue;
}
2021-07-30 19:13:00 +03:00
if (strstr(buffer, "# hibernation-setup-tool:start")) {
in_az_hibernate_agent_block = true;
continue;
}
size_t buflen = strlen(buffer);
char *tmp = realloc(old_contents, old_contents_len + buflen + 1);
if (!tmp)
log_fatal("Could not allocate memory: %s", strerror(errno));
memcpy(tmp + old_contents_len, buffer, buflen + 1);
old_contents_len += buflen;
old_contents = tmp;
2021-03-24 04:51:10 +03:00
}
fclose(resume_cfg);
2021-03-24 04:51:10 +03:00
}
2021-04-23 22:50:09 +03:00
2021-03-24 04:51:10 +03:00
resume_cfg = fopen(grub_cfg_path, "we");
if (!resume_cfg)
log_fatal("Could not open %s for writing: %s", grub_cfg_path, strerror(errno));
if (old_contents) {
fwrite(old_contents, old_contents_len, 1, resume_cfg);
free(old_contents);
}
2021-07-30 19:13:00 +03:00
fprintf(resume_cfg, "\n# hibernation-setup-tool:start\n");
2021-03-24 04:51:10 +03:00
fprintf(resume_cfg, "GRUB_CMDLINE_LINUX_DEFAULT=\"$GRUB_CMDLINE_LINUX_DEFAULT %s\"\n", args);
fprintf(resume_cfg, "unset GRUB_FORCE_PARTUUID\n");
2021-07-30 19:13:00 +03:00
fprintf(resume_cfg, "# hibernation-setup-tool:end\n");
2021-03-24 04:51:10 +03:00
fclose(resume_cfg);
2021-04-14 00:19:50 +03:00
if (has_update_grub2) {
log_info("Using update-grub2 to patch GRUB configuration in %s", grub_cfg_path);
spawn_and_wait("update-grub2", 0);
} else if (has_grub2_mkconfig) {
const char *grub_cfg_path = find_grub_cfg_path();
if (!grub_cfg_path) {
ret_value = false;
} else {
log_info("Using grub2-mkconfig to patch GRUB configuration in %s", grub_cfg_path);
spawn_and_wait("grub2-mkconfig", 2, "-o", grub_cfg_path);
}
}
2021-03-24 04:51:10 +03:00
}
free(args);
2021-04-14 00:19:50 +03:00
return ret_value;
2021-03-24 04:51:10 +03:00
}
static bool update_swap_offset(const struct swap_file *swap)
{
bool ret = true;
log_info("Updating swap offset");
int fd = open("/dev/snapshot", O_RDONLY | O_CLOEXEC);
2021-03-24 04:51:10 +03:00
if (fd < 0) {
log_info("Could not open /dev/snapshot: %s", strerror(errno));
return false;
}
struct resume_swap_area swap_area = get_swap_area(swap);
if (ioctl(fd, SNAPSHOT_SET_SWAP_AREA, &swap_area) < 0) {
log_info("Could not set resume_swap_area parameters in /dev/snapshot: %s", strerror(errno));
close(fd);
return false;
}
close(fd);
char *dev_uuid = get_disk_uuid_for_file_path(swap->path);
if (!dev_uuid)
log_fatal("Could not determine device UUID for swap file %s", swap->path);
log_info("Swap file %s is in device UUID %s", swap->path, dev_uuid);
if (!is_kernel_cmdline_correct(dev_uuid, swap_area.offset)) {
log_info("Kernel command-line parameters need updating.");
bool has_grubby = is_exec_in_path("grubby");
bool has_update_grub2 = is_exec_in_path("update-grub2");
2021-04-14 00:19:50 +03:00
bool has_grub2_mkconfig = is_exec_in_path("grub2-mkconfig");
if (has_grubby || has_update_grub2 || has_grub2_mkconfig) {
ret = update_kernel_cmdline_params_for_grub(dev_uuid, swap_area, has_grubby, has_update_grub2, has_grub2_mkconfig);
2021-03-24 04:51:10 +03:00
} else {
log_info(
"Could not determine how system was booted to update kernel parameters for next boot. System won't be able to resume until you fix this.");
2021-03-24 04:51:10 +03:00
ret = false;
}
}
free(dev_uuid);
return ret;
}
static void ensure_swap_is_enabled(const struct swap_file *swap, bool created)
2021-03-24 04:51:10 +03:00
{
FILE *fstab;
char *old_contents = NULL;
size_t old_contents_len = 0;
char buffer[1024];
log_info("Ensuring swap file %s is enabled", swap->path);
if (chmod(swap->path, 0600) < 0)
log_fatal("Couldn't set correct permissions on %s: %s", swap->path, strerror(errno));
2021-03-24 04:51:10 +03:00
if (swapon(swap->path, 0) < 0) {
if (errno == EINVAL && !created)
log_fatal("%s exists but kernel isn't accepting it as a swap file. Try removing it and re-running the agent.", swap->path);
2021-03-24 04:51:10 +03:00
if (errno != EBUSY)
log_fatal("Could not enable swap file: %s", strerror(errno));
}
log_info("Updating /etc/fstab");
fstab = fopen("/etc/fstab", "re");
if (!fstab)
log_fatal("Could not open fstab: %s", strerror(errno));
while (fgets(buffer, sizeof(buffer), fstab)) {
if (strstr(buffer, swap->path))
continue;
size_t buflen = strlen(buffer);
char *tmp = realloc(old_contents, old_contents_len + buflen + 1);
if (!tmp)
log_fatal("Couldn't allocate memory");
memcpy(tmp + old_contents_len, buffer, buflen + 1);
old_contents_len += buflen;
old_contents = tmp;
}
fclose(fstab);
fstab = fopen("/etc/fstab", "we");
if (!fstab)
log_fatal("Could not open fstab for writing: %s", strerror(errno));
if (old_contents) {
fwrite(old_contents, old_contents_len, 1, fstab);
free(old_contents);
}
fprintf(fstab, "\n%s\tnone\tswap\tswap\t0\t0\n", swap->path);
2021-03-24 04:51:10 +03:00
fclose(fstab);
}
static void ensure_udev_rules_are_installed(void)
{
char systemctl_path_buf[PATH_MAX];
const char *udev_rule_path;
const char *systemctl_path;
systemctl_path = find_executable_in_path("systemctl", NULL, systemctl_path_buf);
if (!systemctl_path) {
log_info("systemctl not found or not executable, udev rule won't work");
return;
}
if (!is_exec_in_path("udevadm")) {
log_info("udevadm has not been found in $PATH; maybe system doesn't use systemd?");
return;
}
if (!access("/usr/lib/udev/rules.d", F_OK)) {
udev_rule_path = "/usr/lib/udev/rules.d/99-vm-hibernation.rules";
} else if (!access("/etc/udev/rules.d", F_OK)) {
udev_rule_path = "/etc/udev/rules.d/99-vm-hibernation.rules";
} else if (!access("/lib/udev/rules.d", F_OK)) {
udev_rule_path = "/lib/udev/rules.d/99-vm-hibernation.rules";
} else {
log_info("Couldn't find where udev stores the rules. VM may not hibernate.");
return;
}
FILE *rule_file = fopen(udev_rule_path, "we");
if (!rule_file)
log_fatal("Could not open '%s' for writing: %s", udev_rule_path, strerror(errno));
fprintf(rule_file,
"SUBSYSTEM==\"vmbus\", ACTION==\"change\", "
"DRIVER==\"hv_utils\", ENV{EVENT}==\"hibernate\", "
"RUN+=\"%s hibernate\"\n",
systemctl_path);
fclose(rule_file);
log_info("udev rule to hibernate with systemd set up in %s. Telling udev about it.", udev_rule_path);
/* This isn't strictly necessary, but do it anyway just in case. */
spawn_and_wait("udevadm", 2, "control", "--reload-rules");
spawn_and_wait("udevadm", 1, "trigger");
}
static const char *readlink0(const char *path, char buf[static PATH_MAX])
{
ssize_t len = readlink(path, buf, PATH_MAX - 1);
if (len < 0)
return NULL;
buf[len] = '\0';
return buf;
}
static bool is_cold_boot(void)
{
/* We detect cold boot by creating a file in a tmpfs filesystem right
* before asking system to hibernate. If this file is not there during
* agent startup, then it's a cold boot and resuming failed; if it's
* still there, then we successfully resumed. */
char lock_file_path_buf[PATH_MAX];
const char *lock_file_path;
lock_file_path = readlink0(hibernate_lock_file_name, lock_file_path_buf);
if (lock_file_path) {
unlink(hibernate_lock_file_name);
if (access(lock_file_path, F_OK) < 0)
return true;
unlink(lock_file_path);
}
return false;
}
static void notify_vm_host(enum host_vm_notification notification)
{
static const char *types[] = {
[HOST_VM_NOTIFY_COLD_BOOT] = "cold-boot",
[HOST_VM_NOTIFY_HIBERNATING] = "hibernating",
[HOST_VM_NOTIFY_RESUMED_FROM_HIBERNATION] = "resumed-from-hibernation",
[HOST_VM_NOTIFY_PRE_HIBERNATION_FAILED] = "pre-hibernation-failed",
};
log_notice("Changed hibernation state to: %s\n", types[notification]);
}
static int recursive_rmdir_cb(const char *fpath, const struct stat *st, int typeflag, struct FTW *ftwbuf)
{
(void)st;
(void)ftwbuf;
switch (typeflag) {
case FTW_SLN: /* symbolic link pointing to a non-existing file */
case FTW_F: /* regular file */
return unlink(fpath) == 0 ? FTW_CONTINUE : FTW_STOP;
case FTW_D: /* directory */
return rmdir(fpath) == 0 ? FTW_CONTINUE : FTW_STOP;
case FTW_DP:
log_info("Removing %s directory. Its subdirectories and subfiles have been visited.", fpath);
return rmdir(fpath) == 0 ? FTW_CONTINUE : FTW_STOP;
case FTW_SL:
/* We refuse to follow symbolic links as it might lead us to some directory outside
* the one we're trying to remove. The only symlinks we care about are those that
* point to a path that does not exist, but we delete those before doing anything
* anyway. */
log_info("%s is a symbolic link. Refusing to follow.", fpath);
return FTW_STOP;
case FTW_DNR:
log_info("Can't read directory %s", fpath);
return FTW_STOP;
case FTW_NS:
log_info("stat() failed on %s", fpath);
return FTW_STOP;
default:
log_info("nftw callback called with unknown typeflag %d, stopping", typeflag);
return FTW_STOP;
}
}
2022-08-03 02:09:39 +03:00
static bool recursive_rmdir(const char *path) { return nftw(path, recursive_rmdir_cb, 16, FTW_DEPTH | FTW_PHYS | FTW_ACTIONRETVAL) == 0; }
static int handle_pre_systemd_suspend_notification(const char *action)
{
log_needs_pre_hook_prefix = true;
if (!strcmp(action, "hibernate")) {
log_info("Running pre-hibernate hooks");
/* Creating this directory with the right permissions is racy as
* we're writing to tmp which is world-writable. So do our best
* here to ensure that if this for loop terminates normally, the
* directory is empty and readable/writable only by us. In normal
* scenarios (i.e. nobody else tried creating a directory with that
* name), this will succeed the first try. This directory will be
* removed when we resume. */
for (int try = 0;; try++) {
if (try > 10) {
notify_vm_host(HOST_VM_NOTIFY_PRE_HIBERNATION_FAILED);
2021-07-30 19:13:00 +03:00
log_fatal("Tried too many times to create /tmp/hibernation-setup-tool and failed. Giving up");
}
2021-07-30 19:13:00 +03:00
if (!mkdir("/tmp/hibernation-setup-tool", 0700))
break;
if (errno != EEXIST) {
notify_vm_host(HOST_VM_NOTIFY_PRE_HIBERNATION_FAILED);
log_fatal("Couldn't create location to store hibernation agent state: %s", strerror(errno));
}
struct stat st;
2021-07-30 19:13:00 +03:00
if (!stat("/tmp/hibernation-setup-tool", &st)) {
if (S_ISDIR(st.st_mode)) {
2021-07-30 19:13:00 +03:00
log_info("/tmp/hibernation-setup-tool exists, removing it");
2021-07-30 19:13:00 +03:00
if (umount2("/tmp/hibernation-setup-tool", UMOUNT_NOFOLLOW | MNT_DETACH) < 0) {
/* See comment related to the umount2() call in handle_post_systemd_suspend_notification(). */
if (errno != EINVAL)
2021-07-30 19:13:00 +03:00
log_fatal("Error while unmounting /tmp/hibernation-setup-tool: %s", strerror(errno));
}
2021-07-30 19:13:00 +03:00
if (recursive_rmdir("/tmp/hibernation-setup-tool"))
continue;
notify_vm_host(HOST_VM_NOTIFY_PRE_HIBERNATION_FAILED);
2021-07-30 19:13:00 +03:00
log_fatal("Couldn't remove /tmp/hibernation-setup-tool directory before proceeding");
}
2022-07-26 05:01:31 +03:00
log_info("/tmp/hibernation-setup-tool exists and isn't a directory! Removing it and trying again (try %d)", try);
2022-07-26 05:01:31 +03:00
if (!unlink("/tmp/hibernation-setup-tool"))
continue;
2022-07-26 05:01:31 +03:00
notify_vm_host(HOST_VM_NOTIFY_PRE_HIBERNATION_FAILED);
log_fatal("Couldn't remove the file: %s, giving up", strerror(errno));
}
2021-07-30 19:13:00 +03:00
log_info("/tmp/hibernation-setup-tool couldn't be found but mkdir() told us it exists, trying again (try %d)", try);
continue;
}
2021-07-30 19:13:00 +03:00
if (!is_file_on_fs("/tmp/hibernation-setup-tool", TMPFS_MAGIC)) {
log_info("/tmp isn't a tmpfs filesystem; trying to mount /tmp/hibernation-setup-tool as such");
2021-07-30 19:13:00 +03:00
if (mount("tmpfs", "/tmp/hibernation-setup-tool", "tmpfs", 0, NULL) < 0) {
notify_vm_host(HOST_VM_NOTIFY_PRE_HIBERNATION_FAILED);
log_fatal("Couldn't mount temporary filesystem: %s. We need this to detect cold boots!", strerror(errno));
}
}
2021-07-30 19:13:00 +03:00
char pattern[] = "/tmp/hibernation-setup-tool/hibernatedXXXXXX";
int fd = mkostemp(pattern, O_CLOEXEC);
if (fd < 0) {
notify_vm_host(HOST_VM_NOTIFY_PRE_HIBERNATION_FAILED);
log_fatal("Couldn't create a temporary file: %s", strerror(errno));
}
close(fd);
if (unlink(hibernate_lock_file_name) < 0) {
if (!access(hibernate_lock_file_name, F_OK)) {
notify_vm_host(HOST_VM_NOTIFY_PRE_HIBERNATION_FAILED);
log_fatal("Couldn't remove %s: %s", hibernate_lock_file_name, strerror(errno));
}
}
2022-07-20 02:39:39 +03:00
if (symlink(pattern, hibernate_lock_file_name) < 0) {
2022-08-03 00:45:03 +03:00
int symlink_errno = errno;
notify_vm_host(HOST_VM_NOTIFY_PRE_HIBERNATION_FAILED);
2022-08-03 00:45:03 +03:00
log_fatal("Couldn't symlink %s to %s: %s", pattern, hibernate_lock_file_name, strerror(symlink_errno));
}
notify_vm_host(HOST_VM_NOTIFY_HIBERNATING);
log_info("Pre-hibernation hooks executed successfully");
return 0;
}
log_fatal("Can't handle `pre %s' notifications", action);
return 1;
}
static int handle_post_systemd_suspend_notification(const char *action)
2021-03-24 04:51:10 +03:00
{
log_needs_post_hook_prefix = true;
if (!strcmp(action, "hibernate")) {
char real_path_buf[PATH_MAX];
const char *real_path;
log_info("Running post-hibernate hooks");
real_path = readlink0(hibernate_lock_file_name, real_path_buf);
if (!real_path) {
/* No need to notify host VM here: if link wasn't there, it's most likely that the
* pre-hibernation hooks failed to create it in the first place. Log for debugging
* but keep going. */
log_info("This is probably fine, but couldn't readlink(%s): %s", hibernate_lock_file_name, strerror(errno));
} else if (unlink(real_path) < 0) {
log_info("This is fine, but couldn't remove %s: %s", real_path, strerror(errno));
}
if (unlink(hibernate_lock_file_name) < 0)
log_info("This is fine, but couldn't remove %s: %s", hibernate_lock_file_name, strerror(errno));
2021-07-30 19:13:00 +03:00
if (umount2("/tmp/hibernation-setup-tool", UMOUNT_NOFOLLOW | MNT_DETACH) < 0) {
/* EINVAL is returned if this isn't a mount point. This is normal if /tmp was already
* a tmpfs and we didn't create and mounted this directory in the pre-hibernate hook.
* Calling umount2() is cheaper than checking if this directory is indeed mounted as
* a tmpfs directory. */
if (errno != EINVAL)
2021-07-30 19:13:00 +03:00
log_info("While unmounting /tmp/hibernation-setup-tool: %s", strerror(errno));
}
2021-07-30 19:13:00 +03:00
if (!recursive_rmdir("/tmp/hibernation-setup-tool"))
log_info("While removing /tmp/hibernation-setup-tool: %s", strerror(errno));
notify_vm_host(HOST_VM_NOTIFY_RESUMED_FROM_HIBERNATION);
log_info("Post-hibernation hooks executed successfully");
return 0;
}
log_fatal("Can't handle `post %s' notifications", action);
return 1;
}
static int handle_systemd_suspend_notification(const char *argv0, const char *when, const char *action)
{
// Uncomment to view hook logs in syslogs
// log_needs_syslog = true;
if (!strcmp(when, "pre"))
return handle_pre_systemd_suspend_notification(action);
2021-03-24 04:51:10 +03:00
if (!strcmp(when, "post"))
return handle_post_systemd_suspend_notification(action);
log_fatal("Invalid usage: %s %s %s", argv0, when, action);
return 1;
}
static void link_hook(const char *src, const char *dest)
{
if (link(src, dest) < 0) {
if (errno != EEXIST)
return log_fatal("Couldn't link %s to %s: %s", src, dest, strerror(errno));
}
log_info("Notifying systemd of new hooks");
spawn_and_wait("systemctl", 1, "daemon-reload");
}
int main(int argc, char *argv[])
{
2022-07-27 05:37:46 +03:00
char *dest_dir = NULL;
char *when = NULL;
char *action = NULL;
if (argc > 2) {
int opt;
while ((opt = getopt(argc, argv, "a:w:d:")) != -1)
{
switch (opt)
{
case 'a':
action = optarg;
break;
case 'w':
when = optarg;
break;
case 'd':
dest_dir = optarg;
break;
}
}
}
2021-07-01 21:04:02 +03:00
if (geteuid() != 0) {
log_fatal("This program has to be executed with superuser privileges.");
return 1;
}
2021-03-24 04:51:10 +03:00
if (!is_hibernation_enabled_for_vm()) {
log_fatal("Hibernation not enabled for this VM.");
return 1;
2021-03-24 04:51:10 +03:00
}
if (!is_hibernation_allowed_for_vm()) {
log_fatal("Hibernation not allowed for this VM. Please enable Hibernation during VM creation");
return 1;
}
if (is_hyperv()) {
/* We only handle these things here on Hyper-V VMs because it's the only
* hypervisor we know that might need these kinds of notifications. */
2022-08-08 22:49:05 +03:00
if (when && action)
return handle_systemd_suspend_notification(argv[0], when, action);
log_needs_tool_prefix = true;
if (is_cold_boot())
notify_vm_host(HOST_VM_NOTIFY_COLD_BOOT);
}
log_needs_tool_prefix = true;
2021-03-24 04:51:10 +03:00
size_t total_ram = physical_memory();
if (!total_ram)
log_fatal("Could not obtain memory total from this computer");
size_t needed_swap = swap_needed_size(total_ram);
log_info("System has %zu MB of RAM; needs a swap area of %zu MB", total_ram / MEGA_BYTES, needed_swap / MEGA_BYTES);
2021-03-24 04:51:10 +03:00
struct swap_file *swap = find_swap_file(needed_swap);
if (swap) {
log_info("Swap file found with size %zu MB at %s", swap->capacity / MEGA_BYTES, swap->path);
} else {
log_info("Swap file not found");
}
2022-07-27 05:27:04 +03:00
if (swap && swap->capacity != needed_swap) {
2021-03-24 04:51:10 +03:00
log_info("Swap file %s has capacity of %zu MB but needs %zu MB. Recreating. "
"System will run without a swap file while this is being set up.",
swap->path, swap->capacity / MEGA_BYTES, needed_swap / MEGA_BYTES);
if (swapoff(swap->path) < 0) {
2021-03-24 04:51:10 +03:00
if (errno == EINVAL) {
log_info("%s is not currently being used as a swap partition. That's OK.", swap->path);
} else {
log_fatal("Could not disable swap file %s: %s", swap->path, strerror(errno));
}
}
if (unlink(swap->path) < 0) {
/* If we're trying to remove the file but it's not there anymore,
* that's fine... no need to error out. */
if (!access(swap->path, F_OK))
log_fatal("Could not remove swap file %s: %s", swap->path, strerror(errno));
}
free(swap);
swap = NULL;
}
bool created = false;
2021-03-24 04:51:10 +03:00
if (!swap) {
log_info("Creating swap file with %zu MB", needed_swap / MEGA_BYTES);
size_t free_space = free_device_space();
if (free_space < needed_swap)
log_fatal("System needs a swap area of %zu MB; but only has %zu MB free space on device", needed_swap / MEGA_BYTES, free_space / MEGA_BYTES);
2021-03-24 04:51:10 +03:00
swap = create_swap_file(needed_swap);
if (!swap)
log_fatal("Could not create swap file");
created = true;
2021-03-24 04:51:10 +03:00
}
ensure_swap_is_enabled(swap, created);
2021-03-24 04:51:10 +03:00
if (!update_swap_offset(swap))
log_fatal("Could not update swap offset.");
if (is_hyperv()) {
ensure_udev_rules_are_installed();
}
2021-03-24 04:51:10 +03:00
log_info("Swap file for VM hibernation set up successfully");
free(swap);
return 0;
}