Merge feature/page-reporting/5.15 into v5.15
* commit 'ad427234defd6cdfdc0c21ca5b64ef589b82a421': hv_balloon: Add support for configurable order free page reporting mm/page_reporting: Add checks for page_reporting_order param
This commit is contained in:
Коммит
faffcc26ac
|
@ -468,12 +468,16 @@ static bool do_hot_add;
|
|||
* the specified number of seconds.
|
||||
*/
|
||||
static uint pressure_report_delay = 45;
|
||||
extern unsigned int page_reporting_order;
|
||||
#define HV_MAX_FAILURES 2
|
||||
|
||||
/*
|
||||
* The last time we posted a pressure report to host.
|
||||
*/
|
||||
static unsigned long last_post_time;
|
||||
|
||||
static int hv_hypercall_multi_failure;
|
||||
|
||||
module_param(hot_add, bool, (S_IRUGO | S_IWUSR));
|
||||
MODULE_PARM_DESC(hot_add, "If set attempt memory hot_add");
|
||||
|
||||
|
@ -573,6 +577,10 @@ static struct hv_dynmem_device dm_device;
|
|||
|
||||
static void post_status(struct hv_dynmem_device *dm);
|
||||
|
||||
static void enable_page_reporting(void);
|
||||
|
||||
static void disable_page_reporting(void);
|
||||
|
||||
#ifdef CONFIG_MEMORY_HOTPLUG
|
||||
static inline bool has_pfn_is_backed(struct hv_hotadd_state *has,
|
||||
unsigned long pfn)
|
||||
|
@ -1402,6 +1410,18 @@ static int dm_thread_func(void *dm_dev)
|
|||
*/
|
||||
reinit_completion(&dm_device.config_event);
|
||||
post_status(dm);
|
||||
/*
|
||||
* disable free page reporting if multiple hypercall
|
||||
* failure flag set. It is not done in the page_reporting
|
||||
* callback context as that causes a deadlock between
|
||||
* page_reporting_process() and page_reporting_unregister()
|
||||
*/
|
||||
if (hv_hypercall_multi_failure >= HV_MAX_FAILURES) {
|
||||
pr_err("Multiple failures in cold memory discard hypercall, disabling page reporting\n");
|
||||
disable_page_reporting();
|
||||
/* Reset the flag after disabling reporting */
|
||||
hv_hypercall_multi_failure = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
@ -1577,20 +1597,20 @@ static void balloon_onchannelcallback(void *context)
|
|||
|
||||
}
|
||||
|
||||
/* Hyper-V only supports reporting 2MB pages or higher */
|
||||
#define HV_MIN_PAGE_REPORTING_ORDER 9
|
||||
#define HV_MIN_PAGE_REPORTING_LEN (HV_HYP_PAGE_SIZE << HV_MIN_PAGE_REPORTING_ORDER)
|
||||
#define HV_LARGE_REPORTING_ORDER 9
|
||||
#define HV_LARGE_REPORTING_LEN (HV_HYP_PAGE_SIZE << \
|
||||
HV_LARGE_REPORTING_ORDER)
|
||||
static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info,
|
||||
struct scatterlist *sgl, unsigned int nents)
|
||||
{
|
||||
unsigned long flags;
|
||||
struct hv_memory_hint *hint;
|
||||
int i;
|
||||
int i, order;
|
||||
u64 status;
|
||||
struct scatterlist *sg;
|
||||
|
||||
WARN_ON_ONCE(nents > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES);
|
||||
WARN_ON_ONCE(sgl->length < HV_MIN_PAGE_REPORTING_LEN);
|
||||
WARN_ON_ONCE(sgl->length < (HV_HYP_PAGE_SIZE << page_reporting_order));
|
||||
local_irq_save(flags);
|
||||
hint = *(struct hv_memory_hint **)this_cpu_ptr(hyperv_pcpu_input_arg);
|
||||
if (!hint) {
|
||||
|
@ -1605,21 +1625,53 @@ static int hv_free_page_report(struct page_reporting_dev_info *pr_dev_info,
|
|||
|
||||
range = &hint->ranges[i];
|
||||
range->address_space = 0;
|
||||
/* page reporting only reports 2MB pages or higher */
|
||||
range->page.largepage = 1;
|
||||
range->page.additional_pages =
|
||||
(sg->length / HV_MIN_PAGE_REPORTING_LEN) - 1;
|
||||
range->page_size = HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB;
|
||||
range->base_large_pfn =
|
||||
page_to_hvpfn(sg_page(sg)) >> HV_MIN_PAGE_REPORTING_ORDER;
|
||||
order = get_order(sg->length);
|
||||
/*
|
||||
* Hyper-V expects the additional_pages field in the units
|
||||
* of one of these 3 sizes, 4Kbytes, 2Mbytes or 1Gbytes.
|
||||
* This is dictated by the values of the fields page.largesize
|
||||
* and page_size.
|
||||
* This code however, only uses 4Kbytes and 2Mbytes units
|
||||
* and not 1Gbytes unit.
|
||||
*/
|
||||
|
||||
/* page reporting for pages 2MB or higher */
|
||||
if (order >= HV_LARGE_REPORTING_ORDER ) {
|
||||
range->page.largepage = 1;
|
||||
range->page_size = HV_GPA_PAGE_RANGE_PAGE_SIZE_2MB;
|
||||
range->base_large_pfn = page_to_hvpfn(
|
||||
sg_page(sg)) >> HV_LARGE_REPORTING_ORDER;
|
||||
range->page.additional_pages =
|
||||
(sg->length / HV_LARGE_REPORTING_LEN) - 1;
|
||||
} else {
|
||||
/* Page reporting for pages below 2MB */
|
||||
range->page.basepfn = page_to_hvpfn(sg_page(sg));
|
||||
range->page.largepage = false;
|
||||
range->page.additional_pages =
|
||||
(sg->length / HV_HYP_PAGE_SIZE) - 1;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
status = hv_do_rep_hypercall(HV_EXT_CALL_MEMORY_HEAT_HINT, nents, 0,
|
||||
hint, NULL);
|
||||
local_irq_restore(flags);
|
||||
if ((status & HV_HYPERCALL_RESULT_MASK) != HV_STATUS_SUCCESS) {
|
||||
if (!hv_result_success(status)) {
|
||||
|
||||
pr_err("Cold memory discard hypercall failed with status %llx\n",
|
||||
status);
|
||||
status);
|
||||
if (hv_hypercall_multi_failure > 0)
|
||||
hv_hypercall_multi_failure++;
|
||||
|
||||
if (hv_result(status) == HV_STATUS_INVALID_PARAMETER) {
|
||||
pr_err("Underlying Hyper-V does not support order less than 9. Hypercall failed\n");
|
||||
pr_err("Defaulting to page_reporting_order %d\n",
|
||||
pageblock_order);
|
||||
page_reporting_order = pageblock_order;
|
||||
hv_hypercall_multi_failure++;
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
|
@ -1630,12 +1682,6 @@ static void enable_page_reporting(void)
|
|||
{
|
||||
int ret;
|
||||
|
||||
/* Essentially, validating 'PAGE_REPORTING_MIN_ORDER' is big enough. */
|
||||
if (pageblock_order < HV_MIN_PAGE_REPORTING_ORDER) {
|
||||
pr_debug("Cold memory discard is only supported on 2MB pages and above\n");
|
||||
return;
|
||||
}
|
||||
|
||||
if (!hv_query_ext_cap(HV_EXT_CAPABILITY_MEMORY_COLD_DISCARD_HINT)) {
|
||||
pr_debug("Cold memory discard hint not supported by Hyper-V\n");
|
||||
return;
|
||||
|
@ -1643,12 +1689,18 @@ static void enable_page_reporting(void)
|
|||
|
||||
BUILD_BUG_ON(PAGE_REPORTING_CAPACITY > HV_MEMORY_HINT_MAX_GPA_PAGE_RANGES);
|
||||
dm_device.pr_dev_info.report = hv_free_page_report;
|
||||
/*
|
||||
* We let the page_reporting_order parameter decide the order
|
||||
* in the page_reporting code
|
||||
*/
|
||||
dm_device.pr_dev_info.order = 0;
|
||||
ret = page_reporting_register(&dm_device.pr_dev_info);
|
||||
if (ret < 0) {
|
||||
dm_device.pr_dev_info.report = NULL;
|
||||
pr_err("Failed to enable cold memory discard: %d\n", ret);
|
||||
} else {
|
||||
pr_info("Cold memory discard hint enabled\n");
|
||||
pr_info("Cold memory discard hint enabled with order %d\n",
|
||||
page_reporting_order);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -11,10 +11,42 @@
|
|||
#include "page_reporting.h"
|
||||
#include "internal.h"
|
||||
|
||||
unsigned int page_reporting_order = MAX_ORDER;
|
||||
module_param(page_reporting_order, uint, 0644);
|
||||
/* Initialize to an unsupported value */
|
||||
unsigned int page_reporting_order = -1;
|
||||
|
||||
static int page_order_update_notify(const char *val, const struct kernel_param *kp)
|
||||
{
|
||||
/*
|
||||
* If param is set beyond this limit, order is set to default
|
||||
* pageblock_order value
|
||||
*/
|
||||
return param_set_uint_minmax(val, kp, 0, MAX_ORDER-1);
|
||||
}
|
||||
|
||||
static const struct kernel_param_ops page_reporting_param_ops = {
|
||||
.set = &page_order_update_notify,
|
||||
/*
|
||||
* For the get op, use param_get_int instead of param_get_uint.
|
||||
* This is to make sure that when unset the initialized value of
|
||||
* -1 is shown correctly
|
||||
*/
|
||||
.get = ¶m_get_int,
|
||||
};
|
||||
|
||||
module_param_cb(page_reporting_order, &page_reporting_param_ops,
|
||||
&page_reporting_order, 0644);
|
||||
MODULE_PARM_DESC(page_reporting_order, "Set page reporting order");
|
||||
|
||||
/*
|
||||
* This symbol is also a kernel parameter. Export the page_reporting_order
|
||||
* symbol so that other drivers can access it to control order values without
|
||||
* having to introduce another configurable parameter. Only one driver can
|
||||
* register with the page_reporting driver for the service, so we have just
|
||||
* one control parameter for the use case(which can be accessed in both
|
||||
* drivers)
|
||||
*/
|
||||
EXPORT_SYMBOL_GPL(page_reporting_order);
|
||||
|
||||
#define PAGE_REPORTING_DELAY (2 * HZ)
|
||||
static struct page_reporting_dev_info __rcu *pr_dev_info __read_mostly;
|
||||
|
||||
|
@ -330,10 +362,18 @@ int page_reporting_register(struct page_reporting_dev_info *prdev)
|
|||
}
|
||||
|
||||
/*
|
||||
* Update the page reporting order if it's specified by driver.
|
||||
* Otherwise, it falls back to @pageblock_order.
|
||||
* If the page_reporting_order value is not set, we check if
|
||||
* an order is provided from the driver that is performing the
|
||||
* registration. If that is not provided either, we default to
|
||||
* pageblock_order.
|
||||
*/
|
||||
page_reporting_order = prdev->order ? : pageblock_order;
|
||||
|
||||
if (page_reporting_order == -1) {
|
||||
if (prdev->order > 0 && prdev->order <= MAX_ORDER)
|
||||
page_reporting_order = prdev->order;
|
||||
else
|
||||
page_reporting_order = pageblock_order;
|
||||
}
|
||||
|
||||
/* initialize state and work structures */
|
||||
atomic_set(&prdev->state, PAGE_REPORTING_IDLE);
|
||||
|
|
Загрузка…
Ссылка в новой задаче